Added API to port internal speech probability in NS.

Identical with CL652007 that's already been accepted for commit. TBR=andrew@webrtc.org BUG=None TEST=None Review URL: https://webrtc-codereview.appspot.com/670009 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2511 4adac7df-926f-26a2-2b94-8c16560cd09d
GetStream · Jul 12, 2012 · 08329f4 · 08329f4
1 parent 6182db1
commit 08329f4
Show file tree

Hide file tree

Showing 9 changed files with 102 additions and 8 deletions.
diff --git a/data/audio_processing/output_data_float.pb b/data/audio_processing/output_data_float.pb
diff --git a/src/modules/audio_processing/include/audio_processing.h b/src/modules/audio_processing/include/audio_processing.h
@@ -545,6 +545,11 @@ class NoiseSuppression {
   virtual int set_level(Level level) = 0;
   virtual Level level() const = 0;
 
+  // Returns the internally computed prior speech probability of current frame
+  // averaged over output channels. This is not supported in fixed point, for
+  // which |kUnsupportedFunctionError| is returned.
+  virtual float speech_probability() const = 0;
+
  protected:
   virtual ~NoiseSuppression() {};
 };

diff --git a/src/modules/audio_processing/noise_suppression_impl.cc b/src/modules/audio_processing/noise_suppression_impl.cc
@@ -110,6 +110,20 @@ NoiseSuppression::Level NoiseSuppressionImpl::level() const {
   return level_;
 }
 
+float NoiseSuppressionImpl::speech_probability() const {
+#if defined(WEBRTC_NS_FLOAT)
+  float probability_average = 0.0f;
+  for (int i = 0; i < num_handles(); i++) {
+    Handle* my_handle = static_cast<Handle*>(handle(i));
+    probability_average += WebRtcNs_prior_speech_probability(my_handle);
+  }
+  return probability_average / num_handles();
+#elif defined(WEBRTC_NS_FIXED)
+  // Currently not available for the fixed point implementation.
+  return apm_->kUnsupportedFunctionError;
+#endif
+}
+
 void* NoiseSuppressionImpl::CreateHandle() const {
   Handle* handle = NULL;
 #if defined(WEBRTC_NS_FLOAT)

diff --git a/src/modules/audio_processing/noise_suppression_impl.h b/src/modules/audio_processing/noise_suppression_impl.h
@@ -28,6 +28,7 @@ class NoiseSuppressionImpl : public NoiseSuppression,
 
   // NoiseSuppression implementation.
   virtual bool is_enabled() const;
+  float speech_probability() const;
 
  private:
   // NoiseSuppression implementation.

diff --git a/src/modules/audio_processing/ns/include/noise_suppression.h b/src/modules/audio_processing/ns/include/noise_suppression.h
@@ -20,14 +20,14 @@ extern "C" {
 #endif
 
 /*
- * This function creates an instance to the noise reduction structure
+ * This function creates an instance to the noise suppression structure
  *
  * Input:
- *      - NS_inst       : Pointer to noise reduction instance that should be
+ *      - NS_inst       : Pointer to noise suppression instance that should be
  *                        created
  *
  * Output:
- *      - NS_inst       : Pointer to created noise reduction instance
+ *      - NS_inst       : Pointer to created noise suppression instance
  *
  * Return value         :  0 - Ok
  *                        -1 - Error
@@ -36,7 +36,7 @@ int WebRtcNs_Create(NsHandle** NS_inst);
 
 
 /*
- * This function frees the dynamic memory of a specified Noise Reduction
+ * This function frees the dynamic memory of a specified noise suppression
  * instance.
  *
  * Input:
@@ -49,7 +49,8 @@ int WebRtcNs_Free(NsHandle* NS_inst);
 
 
 /*
- * This function initializes a NS instance
+ * This function initializes a NS instance and has to be called before any other
+ * processing is made.
  *
  * Input:
  *      - NS_inst       : Instance that should be initialized
@@ -67,11 +68,11 @@ int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs);
  * This changes the aggressiveness of the noise suppression method.
  *
  * Input:
- *      - NS_inst       : Instance that should be initialized
+ *      - NS_inst       : Noise suppression instance.
  *      - mode          : 0: Mild, 1: Medium , 2: Aggressive
  *
  * Output:
- *      - NS_inst       : Initialized instance
+ *      - NS_inst       : Updated instance.
  *
  * Return value         :  0 - Ok
  *                        -1 - Error
@@ -84,7 +85,7 @@ int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
  * input and output signals should always be 10ms (80 or 160 samples).
  *
  * Input
- *      - NS_inst       : NS Instance. Needs to be initiated before call.
+ *      - NS_inst       : Noise suppression instance.
  *      - spframe       : Pointer to speech frame buffer for L band
  *      - spframe_H     : Pointer to speech frame buffer for H band
  *      - fs            : sampling frequency
@@ -103,6 +104,18 @@ int WebRtcNs_Process(NsHandle* NS_inst,
                      short* outframe,
                      short* outframe_H);
 
+/* Returns the internally used prior speech probability of the current frame.
+ * There is a frequency bin based one as well, with which this should not be
+ * confused.
+ *
+ * Input
+ *      - handle        : Noise suppression instance.
+ *
+ * Return value         : Prior speech probability in interval [0.0, 1.0].
+ *                        -1 - NULL pointer or uninitialized instance.
+ */
+float WebRtcNs_prior_speech_probability(NsHandle* handle);
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/src/modules/audio_processing/ns/noise_suppression.c b/src/modules/audio_processing/ns/noise_suppression.c
@@ -46,3 +46,14 @@ int WebRtcNs_Process(NsHandle* NS_inst, short* spframe, short* spframe_H,
   return WebRtcNs_ProcessCore(
       (NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
 }
+
+float WebRtcNs_prior_speech_probability(NsHandle* handle) {
+  NSinst_t* self = (NSinst_t*) handle;
+  if (handle == NULL) {
+    return -1;
+  }
+  if (self->initFlag == 0) {
+    return -1;
+  }
+  return self->priorSpeechProb;
+}
diff --git a/src/modules/audio_processing/test/process_test.cc b/src/modules/audio_processing/test/process_test.cc
@@ -115,6 +115,7 @@ void usage() {
   printf("  --ns_moderate\n");
   printf("  --ns_high\n");
   printf("  --ns_very_high\n");
+  printf("  --ns_prob_file FILE\n");
   printf("\n  -vad     Voice activity detection\n");
   printf("  --vad_out_file FILE\n");
   printf("\n Level metrics (enabled by default)\n");
@@ -149,6 +150,7 @@ void void_main(int argc, char* argv[]) {
   const char* near_filename = NULL;
   const char* out_filename = NULL;
   const char* vad_out_filename = NULL;
+  const char* ns_prob_filename = NULL;
   const char* aecm_echo_path_in_filename = NULL;
   const char* aecm_echo_path_out_filename = NULL;
 
@@ -336,6 +338,11 @@ void void_main(int argc, char* argv[]) {
       ASSERT_EQ(apm->kNoError,
           apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh));
 
+    } else if (strcmp(argv[i], "--ns_prob_file") == 0) {
+      i++;
+      ASSERT_LT(i, argc) << "Specify filename after --ns_prob_file";
+      ns_prob_filename = argv[i];
+
     } else if (strcmp(argv[i], "-vad") == 0) {
       ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
 
@@ -390,6 +397,7 @@ void void_main(int argc, char* argv[]) {
   const char delay_filename[] = "apm_delay.dat";
   const char drift_filename[] = "apm_drift.dat";
   const char vad_file_default[] = "vad_out.dat";
+  const char ns_prob_file_default[] = "ns_prob.dat";
 
   if (!simulating) {
     far_filename = far_file_default;
@@ -404,6 +412,10 @@ void void_main(int argc, char* argv[]) {
     vad_out_filename = vad_file_default;
   }
 
+  if (!ns_prob_filename) {
+    ns_prob_filename = ns_prob_file_default;
+  }
+
   FILE* pb_file = NULL;
   FILE* far_file = NULL;
   FILE* near_file = NULL;
@@ -412,6 +424,7 @@ void void_main(int argc, char* argv[]) {
   FILE* delay_file = NULL;
   FILE* drift_file = NULL;
   FILE* vad_out_file = NULL;
+  FILE* ns_prob_file = NULL;
   FILE* aecm_echo_path_in_file = NULL;
   FILE* aecm_echo_path_out_file = NULL;
 
@@ -466,6 +479,12 @@ void void_main(int argc, char* argv[]) {
                                       << vad_out_file;
   }
 
+  if (apm->noise_suppression()->is_enabled()) {
+    ns_prob_file = fopen(ns_prob_filename, "wb");
+    ASSERT_TRUE(NULL != ns_prob_file) << "Unable to open NS output file "
+                                      << ns_prob_file;
+  }
+
   if (aecm_echo_path_in_filename != NULL) {
     aecm_echo_path_in_file = fopen(aecm_echo_path_in_filename, "rb");
     ASSERT_TRUE(NULL != aecm_echo_path_in_file) << "Unable to open file "
@@ -504,6 +523,7 @@ void void_main(int argc, char* argv[]) {
   int drift_samples = 0;
   int capture_level = 127;
   int8_t stream_has_voice = 0;
+  float ns_speech_prob = 0.0f;
 
   TickTime t0 = TickTime::Now();
   TickTime t1 = t0;
@@ -643,6 +663,14 @@ void void_main(int argc, char* argv[]) {
                                vad_out_file));
         }
 
+        if (ns_prob_file != NULL) {
+          ns_speech_prob = apm->noise_suppression()->speech_probability();
+          ASSERT_EQ(1u, fwrite(&ns_speech_prob,
+                               sizeof(ns_speech_prob),
+                               1,
+                               ns_prob_file));
+        }
+
         if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) {
           ASSERT_EQ(msg.level(), capture_level);
         }
@@ -842,6 +870,14 @@ void void_main(int argc, char* argv[]) {
                                vad_out_file));
         }
 
+        if (ns_prob_file != NULL) {
+          ns_speech_prob = apm->noise_suppression()->speech_probability();
+          ASSERT_EQ(1u, fwrite(&ns_speech_prob,
+                               sizeof(ns_speech_prob),
+                               1,
+                               ns_prob_file));
+        }
+
         if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) {
           ASSERT_EQ(capture_level_in, capture_level);
         }

diff --git a/src/modules/audio_processing/test/unit_test.cc b/src/modules/audio_processing/test/unit_test.cc
@@ -1278,6 +1278,7 @@ TEST_F(ApmTest, Process) {
     int analog_level = 127;
     int analog_level_average = 0;
     int max_output_average = 0;
+    float ns_speech_prob_average = 0.0f;
 
     while (1) {
       if (!ReadFrame(far_file_, revframe_)) break;
@@ -1314,6 +1315,8 @@ TEST_F(ApmTest, Process) {
         EXPECT_EQ(AudioFrame::kVadPassive, frame_->vad_activity_);
       }
 
+      ns_speech_prob_average += apm_->noise_suppression()->speech_probability();
+
       size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_;
       size_t write_count = fwrite(frame_->data_,
                                   sizeof(int16_t),
@@ -1327,6 +1330,7 @@ TEST_F(ApmTest, Process) {
     }
     max_output_average /= frame_count;
     analog_level_average /= frame_count;
+    ns_speech_prob_average /= frame_count;
 
 #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
     EchoCancellation::Metrics echo_metrics;
@@ -1368,6 +1372,9 @@ TEST_F(ApmTest, Process) {
       EXPECT_EQ(reference_delay.std(), std);
 
       EXPECT_EQ(test->rms_level(), rms_level);
+
+      EXPECT_FLOAT_EQ(test->ns_speech_probability_average(),
+                      ns_speech_prob_average);
 #endif
     } else {
       test->set_has_echo_count(has_echo_count);
@@ -1395,6 +1402,10 @@ TEST_F(ApmTest, Process) {
       message_delay->set_std(std);
 
       test->set_rms_level(rms_level);
+
+      EXPECT_LE(0.0f, ns_speech_prob_average);
+      EXPECT_GE(1.0f, ns_speech_prob_average);
+      test->set_ns_speech_probability_average(ns_speech_prob_average);
 #endif
     }
 

diff --git a/src/modules/audio_processing/test/unittest.proto b/src/modules/audio_processing/test/unittest.proto
@@ -44,6 +44,9 @@ message Test {
   optional DelayMetrics delay_metrics = 12;
 
   optional int32 rms_level = 13;
+
+  optional float ns_speech_probability_average = 14;
+
 }
 
 message OutputData {