finish gui.tex

ppwwyyxx · Jan 3, 2014 · 4bada4c · 4bada4c
1 parent 17b9ad2
commit 4bada4c
Show file tree

Hide file tree

Showing 7 changed files with 41 additions and 73 deletions.
diff --git a/doc/Final-Report-Complete/dataset.tex b/doc/Final-Report-Complete/dataset.tex
@@ -1,14 +1,21 @@
 \section{Dataset}
-	The dataset provided by teacher comprised of 102 speaker, in which 60 are
-	females and the rest are males, with three different speaking style: Spontaneous,
-	Reading and Whisper. A statistic is as follows:
+\label{sec:data}
+  In the filed of speech/speaker recognition, there are some research oriented
+  corpus, but most of them are expensive. \cite{database} gives a detailed list on the
+  popular speech corpus for speech/speaker recognition. In this system,
+  we mainly use the speech corpus provided by our teacher Xu.
+
+	The dataset provided comprised of 102 speaker, in which 60 are
+	females and the rest are males.
+    The dataset contains three different speaking style: Spontaneous,
+	Reading and Whisper. Some simple statistics are as follows:
 	\begin{table}[!ht]
 		\centering
 		\begin{tabular}{|c|c|c|c|}
-			\hline
-			& Spontaneous & Reading & Whisper \\\hline
+			\shline
+			& Spontaneous & Reading & Whisper \\\shline
 			Average Duration & 202s & 205s & 221s \\\hline
 			Female Average Duration & 205s & 202s & 217s \\\hline
-			Male Average Duration & 200s & 203s & 223s \\\hline
+			Male Average Duration & 200s & 203s & 223s \\\shline
 		\end{tabular}
 	\end{table}
diff --git a/doc/Final-Report-Complete/gui.tex b/doc/Final-Report-Complete/gui.tex
@@ -39,9 +39,9 @@ \section{GUI}
       \includegraphics[width=0.8\textwidth]{img/recognition.png}
     \end{figure}
 
-    A enrolled user present or record a piece of utterance,
-    the system tells who the person is and show user's avatar.
-    Recognition of multiple pre-recorded files can be done as well.
+    An user present  can record a piece of utterance, or provide a wav file,
+    then the system will tell who the person is and show his/her avatar.
+    Recognition of multiple pre-recorded files can be done as well, the result will be printed in the command line.
 
   \item \textbf{Conversation Recognition Mode} \\
     \begin{figure}[H]
@@ -54,26 +54,17 @@ \section{GUI}
     together near the microphone. Same recording procedure as above.
     The system will continuously collect voice data, and determine
     who is speaking right now. Current speaker's anvatar will show up
-    in screen; otherwise the name will be shown. The conversation
-    audio can be downloaded and saved.
-    There are some ways to visualize the speaker-distribution in the
-    conversation.
-    \begin{itemize}
-      \item \textbf{Conversation log}
-        A detailed log, including start time, stop time,
-        current speaker of each period is generated.
-      \item \textbf{Conversation flow graph}
-        \begin{figure}[H]
-          \centering
-          \includegraphics[width=0.8\textwidth]{img/conversationgraph.png}
-        \end{figure}
+    in screen; otherwise the name will be shown.
 
-        A timeline of the conversation will be shown by a number of
-        talking-clouds joining together, with start time, stop time
-        and users' avatars labeled. Different users are presented
-        with different colors.The timeline will flow to the left dynamically
-        just as time elapses. The visualization of the conversation is done
-        in this way. This functionality is still under development.
-    \end{itemize}
+    We can show a \textbf{Conversation flow graph} to visualize the recognition.
+    A timeline of the conversation will be shown by a number of
+    talking-clouds joining together, with start time, stop time
+    and users' avatars labeled. The avatar of the talking person will also be larger than the others.
+    Different users are displayed with different colors in the timeline,
+    and the timeline flows to the left dynamically just as time elapses.
+    \begin{figure}[H]
+      \centering
+      \includegraphics[width=0.8\textwidth]{img/gui-graph.png}
+    \end{figure}
 
 \end{itemize}
diff --git a/doc/Final-Report-Complete/img/gui-graph.png b/doc/Final-Report-Complete/img/gui-graph.png
diff --git a/doc/Final-Report-Complete/implementation.tex b/doc/Final-Report-Complete/implementation.tex
@@ -1,9 +1,9 @@
 %File: implementation.tex
-%Date: Fri Jan 03 21:07:55 2014 +0800
+%Date: Fri Jan 03 21:09:50 2014 +0800
 %Author: Yuxin Wu <ppwwyyxxc@gmail.com>
 
 \section{Implementation}
-The whole system is written mainly in python, together with code in C++ and matlab.
+The whole system is written mainly in python, together with some code in C++ and matlab.
 The system strongly relies on the support of the numpy\cite{numpy} and scipy\cite{scipy} library.
 
 \begin{enumerate}

diff --git a/doc/Final-Report-Complete/report.tex b/doc/Final-Report-Complete/report.tex
@@ -1,6 +1,6 @@
 %
 % $File: report.tex
-% $Date: Fri Jan 03 18:37:42 2014 +0800
+% $Date: Fri Jan 03 21:16:42 2014 +0800
 %
 
 \documentclass{article}
@@ -67,6 +67,12 @@
 \maketitle
 \tableofcontents
 
+%thick shline
+\newlength\savewidth
+\newcommand\shline{\noalign{\global\savewidth\arrayrulewidth\global\arrayrulewidth 1pt}
+                   \hline
+                   \noalign{\global\arrayrulewidth\savewidth}}
+
 \input{intro}
 \input{algorithm}
 \input{implementation}

diff --git a/doc/Final-Report-Complete/result.tex b/doc/Final-Report-Complete/result.tex
@@ -1,7 +1,6 @@
 \section{Performance}
 \label{sec:result}
-We have tested our approaches under various parameters, based on a corpus provided by teacher Xu.
-For detailed description of the corpus, please see former report.
+We have tested our approaches under various parameters, based on a corpus described in \secref{data}.
 
 All the tests in this section have been conducted serval times
 (depending on computation cost, vary from 10 to 30)
@@ -148,7 +147,7 @@ \subsection{Accuracy Curve on Different Number of Speakers}
 
 \begin{figure}[H]
   \centering
-  \includegraphics[width=0.9\textwidth]{img/performance.pdf}
+  \includegraphics[width=0.8\textwidth]{img/performance.pdf}
 \end{figure}
 
 We also conducted experiments on different style of corpus.
@@ -166,16 +165,16 @@ \subsection{Accuracy Curve on Different Number of Speakers}
 
 \begin{figure}[H]
   \centering
-  \includegraphics[width=0.9\textwidth]{img/reading.pdf}
+  \includegraphics[width=0.8\textwidth]{img/reading.pdf}
 \end{figure}
 \begin{figure}[H]
   \centering
-  \includegraphics[width=0.9\textwidth]{img/spont.pdf}
+  \includegraphics[width=0.8\textwidth]{img/spont.pdf}
 \end{figure}
 
 \begin{figure}[H]
   \centering
-  \includegraphics[width=0.9\textwidth]{img/whisper.pdf}
+  \includegraphics[width=0.8\textwidth]{img/whisper.pdf}
 \end{figure}
 
 \subsection{CRBM Performance Test}

diff --git a/src/exp/gen_result.py b/src/exp/gen_result.py