Skip to content

Commit

Permalink
Added API to port internal speech probability in NS.
Browse files Browse the repository at this point in the history
Identical with CL652007 that's already been accepted for commit.

TBR=andrew@webrtc.org
BUG=None
TEST=None
Review URL: https://webrtc-codereview.appspot.com/670009

git-svn-id: http://webrtc.googlecode.com/svn/trunk@2511 4adac7df-926f-26a2-2b94-8c16560cd09d
  • Loading branch information
bjornv@webrtc.org committed Jul 12, 2012
1 parent 6182db1 commit 08329f4
Show file tree
Hide file tree
Showing 9 changed files with 102 additions and 8 deletions.
Binary file modified data/audio_processing/output_data_float.pb
Binary file not shown.
5 changes: 5 additions & 0 deletions src/modules/audio_processing/include/audio_processing.h
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,11 @@ class NoiseSuppression {
virtual int set_level(Level level) = 0;
virtual Level level() const = 0;

// Returns the internally computed prior speech probability of current frame
// averaged over output channels. This is not supported in fixed point, for
// which |kUnsupportedFunctionError| is returned.
virtual float speech_probability() const = 0;

protected:
virtual ~NoiseSuppression() {};
};
Expand Down
14 changes: 14 additions & 0 deletions src/modules/audio_processing/noise_suppression_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,20 @@ NoiseSuppression::Level NoiseSuppressionImpl::level() const {
return level_;
}

float NoiseSuppressionImpl::speech_probability() const {
#if defined(WEBRTC_NS_FLOAT)
float probability_average = 0.0f;
for (int i = 0; i < num_handles(); i++) {
Handle* my_handle = static_cast<Handle*>(handle(i));
probability_average += WebRtcNs_prior_speech_probability(my_handle);
}
return probability_average / num_handles();
#elif defined(WEBRTC_NS_FIXED)
// Currently not available for the fixed point implementation.
return apm_->kUnsupportedFunctionError;
#endif
}

void* NoiseSuppressionImpl::CreateHandle() const {
Handle* handle = NULL;
#if defined(WEBRTC_NS_FLOAT)
Expand Down
1 change: 1 addition & 0 deletions src/modules/audio_processing/noise_suppression_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class NoiseSuppressionImpl : public NoiseSuppression,

// NoiseSuppression implementation.
virtual bool is_enabled() const;
float speech_probability() const;

private:
// NoiseSuppression implementation.
Expand Down
29 changes: 21 additions & 8 deletions src/modules/audio_processing/ns/include/noise_suppression.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ extern "C" {
#endif

/*
* This function creates an instance to the noise reduction structure
* This function creates an instance to the noise suppression structure
*
* Input:
* - NS_inst : Pointer to noise reduction instance that should be
* - NS_inst : Pointer to noise suppression instance that should be
* created
*
* Output:
* - NS_inst : Pointer to created noise reduction instance
* - NS_inst : Pointer to created noise suppression instance
*
* Return value : 0 - Ok
* -1 - Error
Expand All @@ -36,7 +36,7 @@ int WebRtcNs_Create(NsHandle** NS_inst);


/*
* This function frees the dynamic memory of a specified Noise Reduction
* This function frees the dynamic memory of a specified noise suppression
* instance.
*
* Input:
Expand All @@ -49,7 +49,8 @@ int WebRtcNs_Free(NsHandle* NS_inst);


/*
* This function initializes a NS instance
* This function initializes a NS instance and has to be called before any other
* processing is made.
*
* Input:
* - NS_inst : Instance that should be initialized
Expand All @@ -67,11 +68,11 @@ int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs);
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - NS_inst : Instance that should be initialized
* - NS_inst : Noise suppression instance.
* - mode : 0: Mild, 1: Medium , 2: Aggressive
*
* Output:
* - NS_inst : Initialized instance
* - NS_inst : Updated instance.
*
* Return value : 0 - Ok
* -1 - Error
Expand All @@ -84,7 +85,7 @@ int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
* input and output signals should always be 10ms (80 or 160 samples).
*
* Input
* - NS_inst : NS Instance. Needs to be initiated before call.
* - NS_inst : Noise suppression instance.
* - spframe : Pointer to speech frame buffer for L band
* - spframe_H : Pointer to speech frame buffer for H band
* - fs : sampling frequency
Expand All @@ -103,6 +104,18 @@ int WebRtcNs_Process(NsHandle* NS_inst,
short* outframe,
short* outframe_H);

/* Returns the internally used prior speech probability of the current frame.
* There is a frequency bin based one as well, with which this should not be
* confused.
*
* Input
* - handle : Noise suppression instance.
*
* Return value : Prior speech probability in interval [0.0, 1.0].
* -1 - NULL pointer or uninitialized instance.
*/
float WebRtcNs_prior_speech_probability(NsHandle* handle);

#ifdef __cplusplus
}
#endif
Expand Down
11 changes: 11 additions & 0 deletions src/modules/audio_processing/ns/noise_suppression.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,14 @@ int WebRtcNs_Process(NsHandle* NS_inst, short* spframe, short* spframe_H,
return WebRtcNs_ProcessCore(
(NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
}

float WebRtcNs_prior_speech_probability(NsHandle* handle) {
NSinst_t* self = (NSinst_t*) handle;
if (handle == NULL) {
return -1;
}
if (self->initFlag == 0) {
return -1;
}
return self->priorSpeechProb;
}
36 changes: 36 additions & 0 deletions src/modules/audio_processing/test/process_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ void usage() {
printf(" --ns_moderate\n");
printf(" --ns_high\n");
printf(" --ns_very_high\n");
printf(" --ns_prob_file FILE\n");
printf("\n -vad Voice activity detection\n");
printf(" --vad_out_file FILE\n");
printf("\n Level metrics (enabled by default)\n");
Expand Down Expand Up @@ -149,6 +150,7 @@ void void_main(int argc, char* argv[]) {
const char* near_filename = NULL;
const char* out_filename = NULL;
const char* vad_out_filename = NULL;
const char* ns_prob_filename = NULL;
const char* aecm_echo_path_in_filename = NULL;
const char* aecm_echo_path_out_filename = NULL;

Expand Down Expand Up @@ -336,6 +338,11 @@ void void_main(int argc, char* argv[]) {
ASSERT_EQ(apm->kNoError,
apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh));

} else if (strcmp(argv[i], "--ns_prob_file") == 0) {
i++;
ASSERT_LT(i, argc) << "Specify filename after --ns_prob_file";
ns_prob_filename = argv[i];

} else if (strcmp(argv[i], "-vad") == 0) {
ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));

Expand Down Expand Up @@ -390,6 +397,7 @@ void void_main(int argc, char* argv[]) {
const char delay_filename[] = "apm_delay.dat";
const char drift_filename[] = "apm_drift.dat";
const char vad_file_default[] = "vad_out.dat";
const char ns_prob_file_default[] = "ns_prob.dat";

if (!simulating) {
far_filename = far_file_default;
Expand All @@ -404,6 +412,10 @@ void void_main(int argc, char* argv[]) {
vad_out_filename = vad_file_default;
}

if (!ns_prob_filename) {
ns_prob_filename = ns_prob_file_default;
}

FILE* pb_file = NULL;
FILE* far_file = NULL;
FILE* near_file = NULL;
Expand All @@ -412,6 +424,7 @@ void void_main(int argc, char* argv[]) {
FILE* delay_file = NULL;
FILE* drift_file = NULL;
FILE* vad_out_file = NULL;
FILE* ns_prob_file = NULL;
FILE* aecm_echo_path_in_file = NULL;
FILE* aecm_echo_path_out_file = NULL;

Expand Down Expand Up @@ -466,6 +479,12 @@ void void_main(int argc, char* argv[]) {
<< vad_out_file;
}

if (apm->noise_suppression()->is_enabled()) {
ns_prob_file = fopen(ns_prob_filename, "wb");
ASSERT_TRUE(NULL != ns_prob_file) << "Unable to open NS output file "
<< ns_prob_file;
}

if (aecm_echo_path_in_filename != NULL) {
aecm_echo_path_in_file = fopen(aecm_echo_path_in_filename, "rb");
ASSERT_TRUE(NULL != aecm_echo_path_in_file) << "Unable to open file "
Expand Down Expand Up @@ -504,6 +523,7 @@ void void_main(int argc, char* argv[]) {
int drift_samples = 0;
int capture_level = 127;
int8_t stream_has_voice = 0;
float ns_speech_prob = 0.0f;

TickTime t0 = TickTime::Now();
TickTime t1 = t0;
Expand Down Expand Up @@ -643,6 +663,14 @@ void void_main(int argc, char* argv[]) {
vad_out_file));
}

if (ns_prob_file != NULL) {
ns_speech_prob = apm->noise_suppression()->speech_probability();
ASSERT_EQ(1u, fwrite(&ns_speech_prob,
sizeof(ns_speech_prob),
1,
ns_prob_file));
}

if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) {
ASSERT_EQ(msg.level(), capture_level);
}
Expand Down Expand Up @@ -842,6 +870,14 @@ void void_main(int argc, char* argv[]) {
vad_out_file));
}

if (ns_prob_file != NULL) {
ns_speech_prob = apm->noise_suppression()->speech_probability();
ASSERT_EQ(1u, fwrite(&ns_speech_prob,
sizeof(ns_speech_prob),
1,
ns_prob_file));
}

if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) {
ASSERT_EQ(capture_level_in, capture_level);
}
Expand Down
11 changes: 11 additions & 0 deletions src/modules/audio_processing/test/unit_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1278,6 +1278,7 @@ TEST_F(ApmTest, Process) {
int analog_level = 127;
int analog_level_average = 0;
int max_output_average = 0;
float ns_speech_prob_average = 0.0f;

while (1) {
if (!ReadFrame(far_file_, revframe_)) break;
Expand Down Expand Up @@ -1314,6 +1315,8 @@ TEST_F(ApmTest, Process) {
EXPECT_EQ(AudioFrame::kVadPassive, frame_->vad_activity_);
}

ns_speech_prob_average += apm_->noise_suppression()->speech_probability();

size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_;
size_t write_count = fwrite(frame_->data_,
sizeof(int16_t),
Expand All @@ -1327,6 +1330,7 @@ TEST_F(ApmTest, Process) {
}
max_output_average /= frame_count;
analog_level_average /= frame_count;
ns_speech_prob_average /= frame_count;

#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
EchoCancellation::Metrics echo_metrics;
Expand Down Expand Up @@ -1368,6 +1372,9 @@ TEST_F(ApmTest, Process) {
EXPECT_EQ(reference_delay.std(), std);

EXPECT_EQ(test->rms_level(), rms_level);

EXPECT_FLOAT_EQ(test->ns_speech_probability_average(),
ns_speech_prob_average);
#endif
} else {
test->set_has_echo_count(has_echo_count);
Expand Down Expand Up @@ -1395,6 +1402,10 @@ TEST_F(ApmTest, Process) {
message_delay->set_std(std);

test->set_rms_level(rms_level);

EXPECT_LE(0.0f, ns_speech_prob_average);
EXPECT_GE(1.0f, ns_speech_prob_average);
test->set_ns_speech_probability_average(ns_speech_prob_average);
#endif
}

Expand Down
3 changes: 3 additions & 0 deletions src/modules/audio_processing/test/unittest.proto
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ message Test {
optional DelayMetrics delay_metrics = 12;

optional int32 rms_level = 13;

optional float ns_speech_probability_average = 14;

}

message OutputData {
Expand Down

0 comments on commit 08329f4

Please sign in to comment.