diff options
author | minyue <minyue@chromium.org> | 2015-11-08 11:08:33 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-11-08 19:09:14 +0000 |
commit | 6a05781d7334b28a75b3321301a8927b5cd7dc36 (patch) | |
tree | 88d37f49f648d21c54722d4d49e251a2d61aa67b | |
parent | 78fd1c0964bcb02f305a9c823c8c2e2156bc2870 (diff) | |
download | chromium_src-6a05781d7334b28a75b3321301a8927b5cd7dc36.zip chromium_src-6a05781d7334b28a75b3321301a8927b5cd7dc36.tar.gz chromium_src-6a05781d7334b28a75b3321301a8927b5cd7dc36.tar.bz2 |
Add detection for repeated audio in capturing.
This CL is to add a light-weight detector for repeated audio that has been observed in some recent recording. The issue might have been resolved in a recent fix, see
https://chromium.googlesource.com/chromium/src/+/8d9071da52c70d300bfc0cdc0448c564b39764f4
It is still good to add a detector and UMA stats to verify the fix and fire alarms for occurrences of audio repetition due to other potential causes.
The repetition detector was planned to be placed in WebRTC Audio Processing Module, but per discussion, it is better to be placed in content renderer. The algorithm has been reviewed, see
https://codereview.webrtc.org/1287663002/
BUG=520425
TEST=build with custom Chromium that produces repeated audio.
Review URL: https://codereview.chromium.org/1357013006
Cr-Commit-Position: refs/heads/master@{#358555}
-rw-r--r-- | content/content_renderer.gypi | 2 | ||||
-rw-r--r-- | content/content_tests.gypi | 1 | ||||
-rw-r--r-- | content/renderer/media/audio_repetition_detector.cc | 177 | ||||
-rw-r--r-- | content/renderer/media/audio_repetition_detector.h | 147 | ||||
-rw-r--r-- | content/renderer/media/audio_repetition_detector_unittest.cc | 353 | ||||
-rw-r--r-- | content/renderer/media/media_stream_audio_processor.cc | 43 | ||||
-rw-r--r-- | content/renderer/media/media_stream_audio_processor.h | 5 | ||||
-rw-r--r-- | tools/metrics/histograms/histograms.xml | 9 |
8 files changed, 737 insertions, 0 deletions
diff --git a/content/content_renderer.gypi b/content/content_renderer.gypi index 500e0b3..36a7b0f 100644 --- a/content/content_renderer.gypi +++ b/content/content_renderer.gypi @@ -623,6 +623,8 @@ # WebRTC-specific sources. Put WebRTC plugin-related stuff in the # Plugin+WebRTC section below. 'private_renderer_webrtc_sources': [ + 'renderer/media/audio_repetition_detector.cc', + 'renderer/media/audio_repetition_detector.h', 'renderer/media/media_recorder_handler.cc', 'renderer/media/media_recorder_handler.h', 'renderer/media/media_stream.cc', diff --git a/content/content_tests.gypi b/content/content_tests.gypi index 02b112b..728c41c 100644 --- a/content/content_tests.gypi +++ b/content/content_tests.gypi @@ -748,6 +748,7 @@ 'browser/renderer_host/p2p/socket_host_test_utils.h', 'browser/renderer_host/p2p/socket_host_udp_unittest.cc', 'browser/renderer_host/p2p/socket_host_unittest.cc', + 'renderer/media/audio_repetition_detector_unittest.cc', 'renderer/media/media_recorder_handler_unittest.cc', 'renderer/media/media_stream_audio_processor_unittest.cc', 'renderer/media/media_stream_constraints_util_unittest.cc', diff --git a/content/renderer/media/audio_repetition_detector.cc b/content/renderer/media/audio_repetition_detector.cc new file mode 100644 index 0000000..17b7499 --- /dev/null +++ b/content/renderer/media/audio_repetition_detector.cc @@ -0,0 +1,177 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "content/renderer/media/audio_repetition_detector.h" + +#include "base/logging.h" +#include "base/macros.h" + +namespace { + +const float EPSILON = 4.0f / 32768.0f; + +} // namespace + +namespace content { + +AudioRepetitionDetector::AudioRepetitionDetector( + int min_length_ms, size_t max_frames, + const std::vector<int>& look_back_times, + const RepetitionCallback& repetition_callback) + : max_look_back_ms_(0), + min_length_ms_(min_length_ms), + sample_rate_(0), + buffer_size_frames_(0), + buffer_end_index_(0), + max_frames_(max_frames), + repetition_callback_(repetition_callback) { + DCHECK(main_thread_checker_.CalledOnValidThread()); + processing_thread_checker_.DetachFromThread(); + + // Avoid duplications in |look_back_times| if any. + std::vector<int> temp(look_back_times); + std::sort(temp.begin(), temp.end()); + temp.erase(std::unique(temp.begin(), temp.end()), temp.end()); + + max_look_back_ms_ = temp.back(); + for (int look_back : temp) + states_.push_back(new State(look_back)); +} + +AudioRepetitionDetector::~AudioRepetitionDetector() { + DCHECK(main_thread_checker_.CalledOnValidThread()); +} + +void AudioRepetitionDetector::Detect(const float* data, size_t num_frames, + size_t num_channels, int sample_rate) { + DCHECK(processing_thread_checker_.CalledOnValidThread()); + DCHECK(!states_.empty()); + + if (num_channels != num_channels_ || sample_rate != sample_rate_) + Reset(num_channels, sample_rate); + + // The maximum number of frames |audio_buffer_| can take in is |max_frames_|. + // Therefore, input data with larger frames needs be divided into chunks. + const size_t chunk_size = max_frames_ * num_channels; + while (num_frames > max_frames_) { + Detect(data, max_frames_, num_channels, sample_rate); + data += chunk_size; + num_frames -= max_frames_; + } + + if (num_frames == 0) + return; + + AddFramesToBuffer(data, num_frames); + + for (size_t idx = num_frames; idx > 0; --idx, data += num_channels) { + for (State* state : states_) { + // Look back position depends on the sample rate. It is rounded down to + // the closest integer. + const size_t look_back_frames = + state->look_back_ms() * sample_rate_ / 1000; + // Equal(data, offset) checks if |data| equals the audio frame located + // |offset| frames from the end of buffer. Now a full frame has been + // inserted to the buffer, and thus |offset| should compensate for it. + if (Equal(data, look_back_frames + idx)) { + if (!state->reported()) { + state->Increment(IsZero(data, num_channels)); + if (HasValidReport(state)) { + repetition_callback_.Run(state->look_back_ms()); + state->set_reported(true); + } + } + } else { + state->Reset(); + } + } + } +} + +AudioRepetitionDetector::State::State(int look_back_ms) + : look_back_ms_(look_back_ms) { + Reset(); +} + +void AudioRepetitionDetector::State::Increment(bool zero) { + if (zero) { + if (count_frames_ == 0) { + // If a repetition starts with zeros, we enter the all zero mode until + // a non zero is found later. The point is that the beginning zeros should + // be counted in the length of the repetition as long as the repetition + // does not comprise only zeros. + all_zero_ = true; + } + } else { + all_zero_ = false; + } + ++count_frames_; +} + +void AudioRepetitionDetector::State::Reset() { + count_frames_ = 0; + all_zero_ = true; + reported_ = false; +} + +void AudioRepetitionDetector::Reset(size_t num_channels, int sample_rate) { + DCHECK(processing_thread_checker_.CalledOnValidThread()); + num_channels_ = num_channels; + sample_rate_ = sample_rate; + + // |(xxx + 999) / 1000| is an arithmetic way to round up |xxx / 1000|. + buffer_size_frames_ = + (max_look_back_ms_ * sample_rate_ + 999) / 1000 + max_frames_; + + audio_buffer_.resize(buffer_size_frames_ * num_channels_); + for (State* state : states_) + state->Reset(); +} + +void AudioRepetitionDetector::AddFramesToBuffer(const float* data, + size_t num_frames) { + DCHECK(processing_thread_checker_.CalledOnValidThread()); + DCHECK_LE(num_frames, buffer_size_frames_); + const size_t margin = buffer_size_frames_ - buffer_end_index_; + const auto it = audio_buffer_.begin() + buffer_end_index_ * num_channels_; + if (num_frames <= margin) { + std::copy(data, data + num_frames * num_channels_, it); + buffer_end_index_ += num_frames; + } else { + std::copy(data, data + margin * num_channels_, it); + std::copy(data + margin * num_channels_, data + num_frames * num_channels_, + audio_buffer_.begin()); + buffer_end_index_ = num_frames - margin; + } +} + +bool AudioRepetitionDetector::Equal(const float* frame, + int look_back_frames) const { + DCHECK(processing_thread_checker_.CalledOnValidThread()); + const size_t look_back_index = + (buffer_end_index_ + buffer_size_frames_ - look_back_frames) % + buffer_size_frames_ ; + auto it = audio_buffer_.begin() + look_back_index * num_channels_; + for (size_t channel = 0; channel < num_channels_; ++channel, ++frame, ++it) { + if (*frame != *it) + return false; + } + return true; +} + +bool AudioRepetitionDetector::IsZero(const float* frame, + size_t num_channels) const { + for (size_t channel = 0; channel < num_channels; ++channel, ++frame) { + if (*frame < -EPSILON || *frame > EPSILON) + return false; + } + return true; +} + +bool AudioRepetitionDetector::HasValidReport(const State* state) const { + return (!state->all_zero() && state->count_frames() >= + static_cast<size_t>(min_length_ms_ * sample_rate_ / 1000)); +} + +} // namespace content diff --git a/content/renderer/media/audio_repetition_detector.h b/content/renderer/media/audio_repetition_detector.h new file mode 100644 index 0000000..6c4b00a --- /dev/null +++ b/content/renderer/media/audio_repetition_detector.h @@ -0,0 +1,147 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CONTENT_RENDERER_MEDIA_AUDIO_REPETITION_DETECTOR_H_ +#define CONTENT_RENDERER_MEDIA_AUDIO_REPETITION_DETECTOR_H_ + +#include <vector> + +#include "base/callback.h" +#include "base/macros.h" +#include "base/memory/scoped_vector.h" +#include "base/threading/thread_checker.h" +#include "content/common/content_export.h" + +namespace content { + +// AudioRepetitionDetector detects bit-exact audio repetitions of registered +// patterns. A repetition pattern is defined by a look back time. The detector +// buffers the audio signal and checks equality of each input sample against the +// samples at the look back positions of all registered patterns, and counts the +// duration of any consecutive equality. +// All methods should be called from the same thread. However, we allow the +// construction and destruction be made from a separate thread. + +class CONTENT_EXPORT AudioRepetitionDetector { + public: + // Callback that defines the action upon a repetition is detected. One int + // parameter to the callback is the look back time (in milliseconds) of the + // detected repetition. + typedef base::Callback<void(int)> RepetitionCallback; + + // |min_length_ms| is the minimum duration (in milliseconds) of repetitions + // that count. + // |max_frames| is the maximum number of audio frames that will be provided to + // |Detect()| each time. Input longer than |max_frames| won't cause any + // problem, and will only affect computational efficiency. + // |look_back_times| is a vector of look back times (in milliseconds) for the + // detector to keep track. + AudioRepetitionDetector(int min_length_ms, size_t max_frames, + const std::vector<int>& look_back_times, + const RepetitionCallback& repetition_callback); + + virtual ~AudioRepetitionDetector(); + + // Detect repetition in |data|. |sample_rate| is measured in Hz. + void Detect(const float* data, size_t num_frames, size_t num_channels, + int sample_rate); + + private: + friend class AudioRepetitionDetectorForTest; + + // A state is used by the detector to keep track of a consecutive repetition, + // whether the samples in a repetition are all zeros, and whether a repetition + // has been reported. + class State { + public: + explicit State(int look_back_ms); + + int look_back_ms() const { return look_back_ms_; }; + size_t count_frames() const { return count_frames_; } + bool all_zero() const { return all_zero_; } + bool reported() const { return reported_; } + void set_reported(bool reported) { reported_ = reported; } + + // Increase |count_frames_| by 1, and |zero| indidates whether the added + // audio frame is zero. + void Increment(bool zero); + + void Reset(); + + private: + // Look back time of the repetition pattern this state keeps track of. + const int look_back_ms_; + + // Counter of frames in a consecutive repetition. + size_t count_frames_; + + // Whether a repetition contains only zeros. + bool all_zero_; + + // |reported_| tells whether a repetition has been reported. This is to make + // sure that a repetition with a long duration will be reported as early as + // being detected but no more than one time. + bool reported_; + + DISALLOW_COPY_AND_ASSIGN(State); + }; + + // Reset |audio_buffer_| when number of channels or sample rate (Hz) changes. + void Reset(size_t num_channels, int sample_rate); + + // Add frames (interleaved if stereo) to |audio_buffer_|. + void AddFramesToBuffer(const float* data, size_t num_frames); + + // Determine if an audio frame (samples interleaved if stereo) is identical to + // |audio_buffer_| at a look back position. + bool Equal(const float* frame, int look_back_samples) const; + + // Determine if an audio frame (samples interleaved if stereo) is zero. + bool IsZero(const float* frame, size_t num_channels) const; + + // Check whether the state contains a valid repetition report. + bool HasValidReport(const State* state) const; + + // Used to DCHECK that we are called on the correct thread. Ctor/dtor + // should be called on one thread. The rest can be called on another. + base::ThreadChecker main_thread_checker_; + base::ThreadChecker processing_thread_checker_; + + ScopedVector<State> states_; + + // Ring buffer to store input audio. + std::vector<float> audio_buffer_; + + // Maximum look back time of all registered repetitions. This defines the size + // of |audio_buffer_| + int max_look_back_ms_; + + // The shortest length for repetitions. + const int min_length_ms_; + + // Number of audio channels in buffer. + size_t num_channels_; + + // Sample rate in Hz. + int sample_rate_; + + // Number of frames in |audio_buffer|. + size_t buffer_size_frames_; + + // The index of the last frame in |audio_buffer|. + size_t buffer_end_index_; + + // The maximum frames |audio_buffer_| can take in each time. + const size_t max_frames_; + + // Action when a repetition is found. |look_back_ms| provides the look back + // time of the detected repetition. + RepetitionCallback repetition_callback_; + + DISALLOW_COPY_AND_ASSIGN(AudioRepetitionDetector); +}; + +} // namespace content + +#endif // CONTENT_RENDERER_MEDIA_AUDIO_REPETITION_DETECTOR_H_ diff --git a/content/renderer/media/audio_repetition_detector_unittest.cc b/content/renderer/media/audio_repetition_detector_unittest.cc new file mode 100644 index 0000000..cdb49b2 --- /dev/null +++ b/content/renderer/media/audio_repetition_detector_unittest.cc @@ -0,0 +1,353 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <map> + +#include "base/bind.h" +#include "base/macros.h" +#include "base/rand_util.h" +#include "content/renderer/media/audio_repetition_detector.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace content { + +namespace { +const int kDefaultMinLengthMs = 1; +const size_t kDefaultMaxFrames = 480; // 10 ms * 48 kHz + +// Sample rate used in many tests. We choose a special sample rate in order to +// make the test signal obvious. +const int kSampleRateHz = 1000; + +} + +class AudioRepetitionDetectorForTest : public AudioRepetitionDetector { + public: + AudioRepetitionDetectorForTest(int min_length_ms, size_t max_frames, + const int* look_back_times, + size_t num_look_back) + : AudioRepetitionDetector( + min_length_ms, max_frames, + std::vector<int>(look_back_times, look_back_times + num_look_back), + base::Bind(&AudioRepetitionDetectorForTest::OnRepetitionDetected, + base::Unretained(this))) { + } + + int GetCount(int look_back_ms) const { + auto it = counters_.find(look_back_ms); + return it == counters_.end() ? 0 : it->second; + } + + void ResetCounters() { + counters_.clear(); + } + + private: + void OnRepetitionDetected(int look_back_ms) { + auto it = counters_.find(look_back_ms); + if (it == counters_.end()) { + counters_.insert(std::pair<int, size_t>(look_back_ms, 1)); + return; + } + it->second++; + } + + std::map<int, size_t> counters_; +}; + +class AudioRepetitionDetectorTest : public ::testing::Test { + public: + AudioRepetitionDetectorTest() + : detector_(nullptr) { + } + + protected: + struct ExpectedCount { + int look_back_ms; + int count; + }; + + // Verify if the counts on the repetition patterns match expectation after + // injecting a signal. No reset on the counters + void Verify(const ExpectedCount* expected_counts, size_t num_patterns, + const float* tester, size_t num_frames, + int sample_rate_hz, size_t channels = 1) { + detector_->Detect(tester, num_frames, channels, sample_rate_hz); + for (size_t idx = 0; idx < num_patterns; ++idx) { + const int look_back_ms = expected_counts[idx].look_back_ms; + EXPECT_EQ(expected_counts[idx].count, detector_->GetCount(look_back_ms)) + << "Repetition with look back " + << look_back_ms + << " ms counted wrong."; + } + } + + void VerifyStereo(const ExpectedCount* expected_counts, size_t num_patterns, + const float* tester, size_t num_frames, + int sample_rate_hz) { + static const size_t kNumChannels = 2; + + // Get memory to store interleaved stereo. + scoped_ptr<float[]> tester_stereo( + new float[num_frames * kNumChannels]); + + for (size_t idx = 0; idx < num_frames; ++idx, ++tester) { + for (size_t channel = 0; channel < kNumChannels; ++channel) + tester_stereo[idx * kNumChannels + channel] = *tester; + } + + Verify(expected_counts, num_patterns, tester_stereo.get(), + num_frames, sample_rate_hz, kNumChannels); + } + + void SetDetector(int min_length_ms, size_t max_frames, + const int* look_back_times, size_t num_look_back) { + detector_.reset(new AudioRepetitionDetectorForTest(min_length_ms, + max_frames, + look_back_times, + num_look_back)); + } + + void ResetCounters() { + detector_->ResetCounters(); + } + + private: + scoped_ptr<AudioRepetitionDetectorForTest> detector_; +}; + +TEST_F(AudioRepetitionDetectorTest, Basic) { + // Check that one look back time will registered only once. + const int kLookbackTimes[] = {3, 3, 3, 3}; + + const float kTestSignal[] = {1, 2, 3, 1, 2, 3}; + const ExpectedCount kExpectedCounts_1[] = { + {3, 1} + }; + const ExpectedCount kExpectedCounts_2[] = { + {3, 1} + }; + + + SetDetector(kDefaultMinLengthMs, kDefaultMaxFrames, kLookbackTimes, + arraysize(kLookbackTimes)); + Verify(kExpectedCounts_1, arraysize(kExpectedCounts_1), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); + Verify(kExpectedCounts_2, arraysize(kExpectedCounts_2), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); + ResetCounters(); + + VerifyStereo(kExpectedCounts_1, arraysize(kExpectedCounts_1), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); + VerifyStereo(kExpectedCounts_2, arraysize(kExpectedCounts_2), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); +} + +TEST_F(AudioRepetitionDetectorTest, StereoOutOfSync) { + const int kLookbackTimes[] = {3}; + const float kTestSignal[] = { + 1, 1, + 2, 2, + 3, 3, + 1, 1, + 2, 2, + 3, 1}; + const ExpectedCount kExpectedCounts[] = { + {3, 0} + }; + + // By default, any repetition longer than 1 ms (1 sample at 1000 Hz) will be + // counted as repetition. This test needs to make it longer. + SetDetector(3, kDefaultMaxFrames, kLookbackTimes, arraysize(kLookbackTimes)); + Verify(kExpectedCounts, arraysize(kExpectedCounts), kTestSignal, + arraysize(kTestSignal) / 2, kSampleRateHz, 2); +} + +TEST_F(AudioRepetitionDetectorTest, IncompletePattern) { + const int kLookbackTimes[] = {3}; + const float kTestSignal[] = {1, 2, 1, 2, 3, 1, 2, 3}; + const ExpectedCount kExpectedCounts[] = { + {3, 1}, + }; + + SetDetector(kDefaultMinLengthMs, kDefaultMaxFrames, kLookbackTimes, + arraysize(kLookbackTimes)); + Verify(kExpectedCounts, arraysize(kExpectedCounts), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); + ResetCounters(); + VerifyStereo(kExpectedCounts, arraysize(kExpectedCounts), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); +} + +TEST_F(AudioRepetitionDetectorTest, PatternLongerThanFrame) { + // To make the test signal most obvious, we choose a special sample rate. + const int kSampleRateHz = 1000; + + const int kLookbackTimes[] = {6}; + const float kTestSignal_1[] = {1, 2, 3, 4, 5}; + const float kTestSignal_2[] = {6, 1, 2, 3, 4, 5, 6}; + const ExpectedCount kExpectedCounts_1[] = { + {6, 0}, + }; + const ExpectedCount kExpectedCounts_2[] = { + {6, 1}, + }; + + SetDetector(kDefaultMinLengthMs, kDefaultMaxFrames, kLookbackTimes, + arraysize(kLookbackTimes)); + Verify(kExpectedCounts_1, arraysize(kExpectedCounts_1), kTestSignal_1, + arraysize(kTestSignal_1), kSampleRateHz); + Verify(kExpectedCounts_2, arraysize(kExpectedCounts_2), kTestSignal_2, + arraysize(kTestSignal_2), kSampleRateHz); + ResetCounters(); + VerifyStereo(kExpectedCounts_1, arraysize(kExpectedCounts_1), kTestSignal_1, + arraysize(kTestSignal_1), kSampleRateHz); + VerifyStereo(kExpectedCounts_2, arraysize(kExpectedCounts_2), kTestSignal_2, + arraysize(kTestSignal_2), kSampleRateHz); +} + +TEST_F(AudioRepetitionDetectorTest, TwoPatterns) { + const int kLookbackTimes[] = {3, 4}; + const float kTestSignal[] = {1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4}; + const ExpectedCount kExpectedCounts[] = { + // 1,2,3 belongs to both patterns. + {3, 1}, + {4, 1} + }; + + SetDetector(kDefaultMinLengthMs, kDefaultMaxFrames, kLookbackTimes, + arraysize(kLookbackTimes)); + Verify(kExpectedCounts, arraysize(kExpectedCounts), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); + ResetCounters(); + VerifyStereo(kExpectedCounts, arraysize(kExpectedCounts), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); +} + +TEST_F(AudioRepetitionDetectorTest, MaxFramesShorterThanInput) { + // To make the test signal most obvious, we choose a special sample rate. + const int kSampleRateHz = 1000; + + const int kLookbackTimes[] = {3, 4}; + const float kTestSignal[] = {1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4}; + const ExpectedCount kExpectedCounts[] = { + // 1,2,3 belongs to both patterns. + {3, 1}, + {4, 1} + }; + + // length of kTestSignal is 11 but I set maximum frames to be 2. The detection + // should still work. + SetDetector(kDefaultMinLengthMs, 2, kLookbackTimes,arraysize(kLookbackTimes)); + Verify(kExpectedCounts, arraysize(kExpectedCounts), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); + ResetCounters(); + VerifyStereo(kExpectedCounts, arraysize(kExpectedCounts), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); +} + +TEST_F(AudioRepetitionDetectorTest, NestedPatterns) { + const int kLookbackTimes[] = {6, 3}; + const float kTestSignal[] = {1, 2, 3, 1, 2, 3}; + const ExpectedCount kExpectedCounts_1[] = { + {3, 1}, + {6, 0} + }; + const ExpectedCount kExpectedCounts_2[] = { + {3, 1}, + {6, 1} + }; + + SetDetector(kDefaultMinLengthMs, kDefaultMaxFrames, kLookbackTimes, + arraysize(kLookbackTimes)); + Verify(kExpectedCounts_1, arraysize(kExpectedCounts_1), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); + Verify(kExpectedCounts_2, arraysize(kExpectedCounts_2), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); + ResetCounters(); + VerifyStereo(kExpectedCounts_1, arraysize(kExpectedCounts_1), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); + VerifyStereo(kExpectedCounts_2, arraysize(kExpectedCounts_2), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); +} + +TEST_F(AudioRepetitionDetectorTest, NotFullLengthPattern) { + const int kLookbackTimes[] = {4}; + const float kTestSignal[] = {1, 2, 3, -1, 1, 2, 3, -2}; + const ExpectedCount kExpectedCounts[] = { + {4, 1}, + }; + + SetDetector(kDefaultMinLengthMs, kDefaultMaxFrames, kLookbackTimes, + arraysize(kLookbackTimes)); + Verify(kExpectedCounts, arraysize(kExpectedCounts), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); + ResetCounters(); + VerifyStereo(kExpectedCounts, arraysize(kExpectedCounts), kTestSignal, + arraysize(kTestSignal), kSampleRateHz); +} + +TEST_F(AudioRepetitionDetectorTest, ZerosCountOrNot) { + const int kLookbackTimes[] = {3}; + const float kTestSignal_1[] = {0, 0, 0, 0, 0, 0}; + const float kTestSignal_2[] = {0, 1, 2, 0, 1, 2}; + const ExpectedCount kExpectedCounts_1[] = { + // Full zeros won't count. + {3, 0}, + }; + const ExpectedCount kExpectedCounts_2[] = { + // Partial zero will count. + {3, 1}, + }; + + SetDetector(kDefaultMinLengthMs, kDefaultMaxFrames, kLookbackTimes, + arraysize(kLookbackTimes)); + Verify(kExpectedCounts_1, arraysize(kExpectedCounts_1), kTestSignal_1, + arraysize(kTestSignal_1), kSampleRateHz); + Verify(kExpectedCounts_2, arraysize(kExpectedCounts_2), kTestSignal_2, + arraysize(kTestSignal_2), kSampleRateHz); + ResetCounters(); + VerifyStereo(kExpectedCounts_1, arraysize(kExpectedCounts_1), kTestSignal_1, + arraysize(kTestSignal_1), kSampleRateHz); + VerifyStereo(kExpectedCounts_2, arraysize(kExpectedCounts_2), kTestSignal_2, + arraysize(kTestSignal_2), kSampleRateHz); +} + +// Previous tests use short signal to test the detection algorithm, this one +// tests a normal frame size +TEST_F(AudioRepetitionDetectorTest, NormalSignal) { + const int kNormalSampleRateHz = 44100; + // Let the signal be "*(4ms)-A(13ms)-*(100ms)-A", where * denotes random + // samples. + const size_t kPreSamples = kNormalSampleRateHz * 4 / 1000; + const size_t kRepSamples = kNormalSampleRateHz * 13 / 1000; + const size_t kSkipSamples = kNormalSampleRateHz * 100 / 1000; + const size_t kSamples = kPreSamples + kRepSamples * 2 + kSkipSamples; + + const int kLookbackTimes[] = {80, 90, 100, 110, 120}; + + float test_signal[kSamples]; + size_t idx = 0; + for (; idx < kPreSamples + kRepSamples + kSkipSamples; ++idx) + test_signal[idx] = static_cast<float>(base::RandDouble()); + + for (; idx < kSamples; ++idx) + test_signal[idx] = test_signal[idx - kSkipSamples]; + + ExpectedCount expect_counts[arraysize(kLookbackTimes)]; + for (size_t i = 0; i < arraysize(kLookbackTimes); ++i) { + expect_counts[i].look_back_ms = kLookbackTimes[i]; + expect_counts[i].count = 0; + } + + // We only expect a repetition with 100 ms look back time. + expect_counts[2].count = 1; + + SetDetector(kDefaultMinLengthMs, kDefaultMaxFrames, kLookbackTimes, + arraysize(kLookbackTimes)); + Verify(expect_counts, arraysize(expect_counts), test_signal, kSamples, + kNormalSampleRateHz); +} + +} // namespace content diff --git a/content/renderer/media/media_stream_audio_processor.cc b/content/renderer/media/media_stream_audio_processor.cc index 18f7bed..c736696 100644 --- a/content/renderer/media/media_stream_audio_processor.cc +++ b/content/renderer/media/media_stream_audio_processor.cc @@ -29,6 +29,32 @@ using webrtc::NoiseSuppression; const int kAudioProcessingNumberOfChannels = 1; +// Minimum duration of any detectable audio repetition. +const int kMinLengthMs = 1; + +// The following variables defines the look back time of audio repetitions that +// will be logged. The complexity of the detector is proportional to the number +// of look back times we keep track. +const int kMinLookbackTimeMs = 10; +const int kMaxLookbackTimeMs = 200; +const int kLookbackTimeStepMs = 10; + +// Maximum frames of any input chunk of audio. Used by +// |MediaStreamAudioProcessor::audio_repetition_detector_|. Input longer than +// |kMaxFrames| won't cause any problem, and will only affect computational +// efficiency. +const size_t kMaxFrames = 480; // 10 ms * 48 kHz + +// Send UMA report on an audio repetition being detected. |look_back_ms| +// provides the look back time of the detected repetition. This function is +// called back by |MediaStreamAudioProcessor::audio_repetition_detector_|. +void ReportRepetition(int look_back_ms) { + UMA_HISTOGRAM_CUSTOM_COUNTS( + "Media.AudioCapturerRepetition", look_back_ms, + kMinLookbackTimeMs, kMaxLookbackTimeMs, + (kMaxLookbackTimeMs - kMinLookbackTimeMs) / kLookbackTimeStepMs + 1); +} + AudioProcessing::ChannelLayout MapLayout(media::ChannelLayout media_layout) { switch (media_layout) { case media::CHANNEL_LAYOUT_MONO: @@ -252,6 +278,16 @@ MediaStreamAudioProcessor::MediaStreamAudioProcessor( // ensure that we do get the filter when we should. if (aec_dump_message_filter_.get()) aec_dump_message_filter_->AddDelegate(this); + + // Create and configure |audio_repetition_detector_|. + std::vector<int> look_back_times; + for (int time = kMaxLookbackTimeMs; time >= kMinLookbackTimeMs; + time -= kLookbackTimeStepMs) { + look_back_times.push_back(time); + } + audio_repetition_detector_.reset( + new AudioRepetitionDetector(kMinLengthMs, kMaxFrames, look_back_times, + base::Bind(&ReportRepetition))); } MediaStreamAudioProcessor::~MediaStreamAudioProcessor() { @@ -297,6 +333,13 @@ bool MediaStreamAudioProcessor::ProcessAndConsumeData( if (!capture_fifo_->Consume(&process_bus, capture_delay)) return false; + // Detect bit-exact repetition of audio present in the captured audio. + // We detect only one channel. + audio_repetition_detector_->Detect(process_bus->bus()->channel(0), + process_bus->bus()->frames(), + 1, // number of channels + input_format_.sample_rate()); + // Use the process bus directly if audio processing is disabled. MediaStreamAudioBus* output_bus = process_bus; *new_volume = 0; diff --git a/content/renderer/media/media_stream_audio_processor.h b/content/renderer/media/media_stream_audio_processor.h index 4b74ca0a..f5d5781 100644 --- a/content/renderer/media/media_stream_audio_processor.h +++ b/content/renderer/media/media_stream_audio_processor.h @@ -14,6 +14,7 @@ #include "content/common/content_export.h" #include "content/public/common/media_stream_request.h" #include "content/renderer/media/aec_dump_message_filter.h" +#include "content/renderer/media/audio_repetition_detector.h" #include "content/renderer/media/webrtc_audio_device_impl.h" #include "media/base/audio_converter.h" #include "third_party/libjingle/source/talk/app/webrtc/mediastreaminterface.h" @@ -113,6 +114,7 @@ class CONTENT_EXPORT MediaStreamAudioProcessor : private: friend class MediaStreamAudioProcessorTest; + FRIEND_TEST_ALL_PREFIXES(MediaStreamAudioProcessorTest, GetAecDumpMessageFilter); @@ -153,6 +155,9 @@ class CONTENT_EXPORT MediaStreamAudioProcessor : // both the capture audio thread and the render audio thread. base::subtle::Atomic32 render_delay_ms_; + // Module to detect and report (to UMA) bit exact audio repetition. + scoped_ptr<AudioRepetitionDetector> audio_repetition_detector_; + // Module to handle processing and format conversion. scoped_ptr<webrtc::AudioProcessing> audio_processing_; diff --git a/tools/metrics/histograms/histograms.xml b/tools/metrics/histograms/histograms.xml index 5e9d7a1..359e403 100644 --- a/tools/metrics/histograms/histograms.xml +++ b/tools/metrics/histograms/histograms.xml @@ -17636,6 +17636,15 @@ http://cs/file:chrome/histograms.xml - but prefer this file for new entries. </summary> </histogram> +<histogram name="Media.AudioCapturerRepetition" units="milliseconds"> + <owner>minyue@chromium.org</owner> + <summary> + Captures bit-exact audio repetitions with pre-defined look back time. As + soon as a repetition is detected, its look back time is reported. Ideally, + no reports should be generated. + </summary> +</histogram> + <histogram name="Media.AudioChannelLayout" enum="ChannelLayout"> <owner>dalecurtis@chromium.org</owner> <summary>Audio channel layout in HTML5 media.</summary> |