Skip to content

Commit

Permalink
Drop the 16kHz sample rate restriction on AECM and zero out higher bands
Browse files Browse the repository at this point in the history
The restriction has been removed completely and AECM now supports any
number of higher bands. But this has been achieved by always zeroing out the
higher bands, instead of applying a constant gain which is the average over half
of the lower band (like it is done for the AEC), because that would be
non-trivial to implement and we don't want to spend too much time on AECM, since
we want to get rid of it in the long term anyway.

R=peah@webrtc.org, solenberg@webrtc.org, tina.legrand@webrtc.org

Review URL: https://codereview.webrtc.org/1774553002 .

Cr-Commit-Position: refs/heads/master@{#11931}
  • Loading branch information
Alex Luebs committed Mar 9, 2016
1 parent 3ecb5c8 commit f687d53
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 75 deletions.
Binary file modified data/audio_processing/output_data_fixed.pb
Binary file not shown.
14 changes: 1 addition & 13 deletions webrtc/modules/audio_processing/audio_processing_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ const size_t AudioProcessing::kNumNativeSampleRates =
arraysize(AudioProcessing::kNativeSampleRatesHz);
const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1];
const int AudioProcessing::kMaxAECMSampleRateHz = kSampleRate16kHz;

AudioProcessing* AudioProcessing::Create() {
Config config;
Expand Down Expand Up @@ -369,7 +368,7 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {

formats_.api_format = config;

// We process at the closest native rate >= min(input rate, output rate)...
// We process at the closest native rate >= min(input rate, output rate).
const int min_proc_rate =
std::min(formats_.api_format.input_stream().sample_rate_hz(),
formats_.api_format.output_stream().sample_rate_hz());
Expand All @@ -380,11 +379,6 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
break;
}
}
// ...with one exception.
if (public_submodules_->echo_control_mobile->is_enabled() &&
min_proc_rate > kMaxAECMSampleRateHz) {
fwd_proc_rate = kMaxAECMSampleRateHz;
}

capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate);

Expand Down Expand Up @@ -620,12 +614,6 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
return kBadSampleRateError;
}

if (public_submodules_->echo_control_mobile->is_enabled() &&
frame->sample_rate_hz_ > kMaxAECMSampleRateHz) {
LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates";
return kUnsupportedComponentError;
}

ProcessingConfig processing_config;
{
// Aquire lock for the access of api_format.
Expand Down
12 changes: 9 additions & 3 deletions webrtc/modules/audio_processing/echo_control_mobile_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,12 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {

handle_index++;
}
for (size_t band = 1u; band < audio->num_bands(); ++band) {
memset(audio->split_bands(i)[band],
0,
audio->num_frames_per_band() *
sizeof(audio->split_bands(i)[band][0]));
}
}

return AudioProcessing::kNoError;
Expand Down Expand Up @@ -313,8 +319,8 @@ int EchoControlMobileImpl::Initialize() {
}
}

if (apm_->proc_sample_rate_hz() > AudioProcessing::kSampleRate16kHz) {
LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates";
if (apm_->proc_split_sample_rate_hz() > AudioProcessing::kSampleRate16kHz) {
LOG(LS_ERROR) << "AECM only supports 16 kHz or lower split sample rates";
return AudioProcessing::kBadSampleRateError;
}

Expand Down Expand Up @@ -370,7 +376,7 @@ int EchoControlMobileImpl::InitializeHandle(void* handle) const {
rtc::CritScope cs_capture(crit_capture_);
assert(handle != NULL);
Handle* my_handle = static_cast<Handle*>(handle);
if (WebRtcAecm_Init(my_handle, apm_->proc_sample_rate_hz()) != 0) {
if (WebRtcAecm_Init(my_handle, apm_->proc_split_sample_rate_hz()) != 0) {
return GetHandleError(my_handle);
}
if (external_echo_path_ != NULL) {
Expand Down
1 change: 0 additions & 1 deletion webrtc/modules/audio_processing/include/audio_processing.h
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,6 @@ class AudioProcessing {
static const int kNativeSampleRatesHz[];
static const size_t kNumNativeSampleRates;
static const int kMaxNativeSampleRateHz;
static const int kMaxAECMSampleRateHz;

static const int kChunkSizeMs = 10;
};
Expand Down
86 changes: 33 additions & 53 deletions webrtc/modules/audio_processing/test/audio_processing_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,7 @@ bool write_ref_data = false;
const google::protobuf::int32 kChannels[] = {1, 2};
const int kSampleRates[] = {8000, 16000, 32000, 48000};

#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
// AECM doesn't support super-wb.
const int kProcessSampleRates[] = {8000, 16000};
#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
const int kProcessSampleRates[] = {8000, 16000, 32000, 48000};
#endif

enum StreamDirection { kForward = 0, kReverse };

Expand Down Expand Up @@ -435,11 +430,7 @@ void ApmTest::SetUp() {
frame_ = new AudioFrame();
revframe_ = new AudioFrame();

#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
Init(16000, 16000, 16000, 2, 2, 2, false);
#else
Init(32000, 32000, 32000, 2, 2, 2, false);
#endif
}

void ApmTest::TearDown() {
Expand Down Expand Up @@ -1039,18 +1030,6 @@ TEST_F(ApmTest, DISABLED_EchoCancellationReportsCorrectDelays) {
}

TEST_F(ApmTest, EchoControlMobile) {
// AECM won't use super-wideband.
SetFrameSampleRate(frame_, 32000);
EXPECT_NOERR(apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kBadSampleRateError,
apm_->echo_control_mobile()->Enable(true));
SetFrameSampleRate(frame_, 16000);
EXPECT_NOERR(apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError,
apm_->echo_control_mobile()->Enable(true));
SetFrameSampleRate(frame_, 32000);
EXPECT_EQ(apm_->kUnsupportedComponentError, apm_->ProcessStream(frame_));

// Turn AECM on (and AEC off)
Init(16000, 16000, 16000, 2, 2, 2, false);
EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true));
Expand Down Expand Up @@ -1974,6 +1953,7 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
num_input_channels);

int analog_level = 127;
size_t num_bad_chunks = 0;
while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) &&
ReadFrame(near_file_, frame_, float_cb_.get())) {
frame_->vad_activity_ = AudioFrame::kVadUnknown;
Expand Down Expand Up @@ -2012,18 +1992,13 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
float snr = ComputeSNR(output_int16.channels()[j],
output_cb.channels()[j],
samples_per_channel, &variance);
#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
// There are a few chunks in the fixed-point profile that give low SNR.
// Listening confirmed the difference is acceptable.
const float kVarianceThreshold = 150;
const float kSNRThreshold = 10;
#else

const float kVarianceThreshold = 20;
const float kSNRThreshold = 20;
#endif

// Skip frames with low energy.
if (sqrt(variance) > kVarianceThreshold) {
EXPECT_LT(kSNRThreshold, snr);
if (sqrt(variance) > kVarianceThreshold && snr < kSNRThreshold) {
++num_bad_chunks;
}
}

Expand All @@ -2039,6 +2014,16 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
// Reset in case of downmixing.
frame_->num_channels_ = static_cast<size_t>(test->num_input_channels());
}

#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
const size_t kMaxNumBadChunks = 0;
#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
// There are a few chunks in the fixed-point profile that give low SNR.
// Listening confirmed the difference is acceptable.
const size_t kMaxNumBadChunks = 60;
#endif
EXPECT_LE(num_bad_chunks, kMaxNumBadChunks);

rewind(far_file_);
rewind(near_file_);
}
Expand Down Expand Up @@ -2560,11 +2545,6 @@ TEST_P(AudioProcessingTest, Formats) {
} else {
ref_rate = 8000;
}
#ifdef WEBRTC_AUDIOPROC_FIXED_PROFILE
if (file_direction == kForward) {
ref_rate = std::min(ref_rate, 16000);
}
#endif
FILE* out_file = fopen(
OutputFilePath("out", input_rate_, output_rate_, reverse_input_rate_,
reverse_output_rate_, cf[i].num_input,
Expand Down Expand Up @@ -2716,22 +2696,22 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
CommonFormats,
AudioProcessingTest,
testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 20, 0),
std::tr1::make_tuple(48000, 48000, 32000, 48000, 20, 30),
std::tr1::make_tuple(48000, 48000, 16000, 48000, 20, 20),
std::tr1::make_tuple(48000, 44100, 48000, 44100, 15, 20),
std::tr1::make_tuple(48000, 44100, 32000, 44100, 15, 15),
std::tr1::make_tuple(48000, 44100, 16000, 44100, 15, 15),
testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 0, 0),
std::tr1::make_tuple(48000, 48000, 32000, 48000, 40, 30),
std::tr1::make_tuple(48000, 48000, 16000, 48000, 40, 20),
std::tr1::make_tuple(48000, 44100, 48000, 44100, 25, 20),
std::tr1::make_tuple(48000, 44100, 32000, 44100, 25, 15),
std::tr1::make_tuple(48000, 44100, 16000, 44100, 25, 15),
std::tr1::make_tuple(48000, 32000, 48000, 32000, 20, 35),
std::tr1::make_tuple(48000, 32000, 32000, 32000, 20, 0),
std::tr1::make_tuple(48000, 32000, 16000, 32000, 20, 20),
std::tr1::make_tuple(48000, 16000, 48000, 16000, 20, 20),
std::tr1::make_tuple(48000, 16000, 32000, 16000, 20, 20),
std::tr1::make_tuple(48000, 16000, 16000, 16000, 20, 0),

std::tr1::make_tuple(44100, 48000, 48000, 48000, 20, 0),
std::tr1::make_tuple(44100, 48000, 32000, 48000, 20, 30),
std::tr1::make_tuple(44100, 48000, 16000, 48000, 20, 20),
std::tr1::make_tuple(44100, 48000, 48000, 48000, 15, 0),
std::tr1::make_tuple(44100, 48000, 32000, 48000, 15, 30),
std::tr1::make_tuple(44100, 48000, 16000, 48000, 15, 20),
std::tr1::make_tuple(44100, 44100, 48000, 44100, 15, 20),
std::tr1::make_tuple(44100, 44100, 32000, 44100, 15, 15),
std::tr1::make_tuple(44100, 44100, 16000, 44100, 15, 15),
Expand All @@ -2742,15 +2722,15 @@ INSTANTIATE_TEST_CASE_P(
std::tr1::make_tuple(44100, 16000, 32000, 16000, 20, 20),
std::tr1::make_tuple(44100, 16000, 16000, 16000, 20, 0),

std::tr1::make_tuple(32000, 48000, 48000, 48000, 20, 0),
std::tr1::make_tuple(32000, 48000, 32000, 48000, 20, 30),
std::tr1::make_tuple(32000, 48000, 16000, 48000, 20, 20),
std::tr1::make_tuple(32000, 44100, 48000, 44100, 15, 20),
std::tr1::make_tuple(32000, 44100, 32000, 44100, 15, 15),
std::tr1::make_tuple(32000, 44100, 16000, 44100, 15, 15),
std::tr1::make_tuple(32000, 32000, 48000, 32000, 20, 35),
std::tr1::make_tuple(32000, 32000, 32000, 32000, 20, 0),
std::tr1::make_tuple(32000, 32000, 16000, 32000, 20, 20),
std::tr1::make_tuple(32000, 48000, 48000, 48000, 35, 0),
std::tr1::make_tuple(32000, 48000, 32000, 48000, 65, 30),
std::tr1::make_tuple(32000, 48000, 16000, 48000, 40, 20),
std::tr1::make_tuple(32000, 44100, 48000, 44100, 20, 20),
std::tr1::make_tuple(32000, 44100, 32000, 44100, 20, 15),
std::tr1::make_tuple(32000, 44100, 16000, 44100, 20, 15),
std::tr1::make_tuple(32000, 32000, 48000, 32000, 35, 35),
std::tr1::make_tuple(32000, 32000, 32000, 32000, 0, 0),
std::tr1::make_tuple(32000, 32000, 16000, 32000, 40, 20),
std::tr1::make_tuple(32000, 16000, 48000, 16000, 20, 20),
std::tr1::make_tuple(32000, 16000, 32000, 16000, 20, 20),
std::tr1::make_tuple(32000, 16000, 16000, 16000, 20, 0),
Expand Down
5 changes: 0 additions & 5 deletions webrtc/voice_engine/transmit_mixer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1146,11 +1146,6 @@ void TransmitMixer::GenerateAudioFrame(const int16_t* audio,
break;
}
}
if (audioproc_->echo_control_mobile()->is_enabled()) {
// AECM only supports 8 and 16 kHz.
_audioFrame.sample_rate_hz_ = std::min(
_audioFrame.sample_rate_hz_, AudioProcessing::kMaxAECMSampleRateHz);
}
_audioFrame.num_channels_ = std::min(num_channels, num_codec_channels);
RemixAndResample(audio, samples_per_channel, num_channels, sample_rate_hz,
&resampler_, &_audioFrame);
Expand Down

0 comments on commit f687d53

Please sign in to comment.