diff options
Diffstat (limited to 'content/browser/speech/endpointer/endpointer_unittest.cc')
-rw-r--r-- | content/browser/speech/endpointer/endpointer_unittest.cc | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/content/browser/speech/endpointer/endpointer_unittest.cc b/content/browser/speech/endpointer/endpointer_unittest.cc new file mode 100644 index 0000000..3d1583e --- /dev/null +++ b/content/browser/speech/endpointer/endpointer_unittest.cc @@ -0,0 +1,146 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/task.h" +#include "content/browser/speech/endpointer/endpointer.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { +const int kFrameRate = 50; // 20 ms long frames for AMR encoding. +const int kSampleRate = 8000; // 8 k samples per second for AMR encoding. + +// At 8 sample per second a 20 ms frame is 160 samples, which corrsponds +// to the AMR codec. +const int kFrameSize = kSampleRate / kFrameRate; // 160 samples. +COMPILE_ASSERT(kFrameSize == 160, invalid_frame_size); +} + +namespace speech_input { + +class FrameProcessor { + public: + // Process a single frame of test audio samples. + virtual EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) = 0; +}; + +void RunEndpointerEventsTest(FrameProcessor* processor) { + int16 samples[kFrameSize]; + + // We will create a white noise signal of 150 frames. The frames from 50 to + // 100 will have more power, and the endpointer should fire on those frames. + const int kNumFrames = 150; + + // Create a random sequence of samples. + srand(1); + float gain = 0.0; + int64 time = 0; + for (int frame_count = 0; frame_count < kNumFrames; ++frame_count) { + // The frames from 50 to 100 will have more power, and the endpointer + // should detect those frames as speech. + if ((frame_count >= 50) && (frame_count < 100)) { + gain = 2000.0; + } else { + gain = 1.0; + } + // Create random samples. + for (int i = 0; i < kFrameSize; ++i) { + float randNum = static_cast<float>(rand() - (RAND_MAX / 2)) / + static_cast<float>(RAND_MAX); + samples[i] = static_cast<int16>(gain * randNum); + } + + EpStatus ep_status = processor->ProcessFrame(time, samples, kFrameSize); + time += static_cast<int64>(kFrameSize * (1e6 / kSampleRate)); + + // Log the status. + if (20 == frame_count) + EXPECT_EQ(EP_PRE_SPEECH, ep_status); + if (70 == frame_count) + EXPECT_EQ(EP_SPEECH_PRESENT, ep_status); + if (120 == frame_count) + EXPECT_EQ(EP_PRE_SPEECH, ep_status); + } +} + +// This test instantiates and initializes a stand alone endpointer module. +// The test creates FrameData objects with random noise and send them +// to the endointer module. The energy of the first 50 frames is low, +// followed by 500 high energy frames, and another 50 low energy frames. +// We test that the correct start and end frames were detected. +class EnergyEndpointerFrameProcessor : public FrameProcessor { + public: + explicit EnergyEndpointerFrameProcessor(EnergyEndpointer* endpointer) + : endpointer_(endpointer) {} + + EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) { + endpointer_->ProcessAudioFrame(time, samples, kFrameSize, NULL); + int64 ep_time; + return endpointer_->Status(&ep_time); + } + + private: + EnergyEndpointer* endpointer_; +}; + +TEST(EndpointerTest, TestEnergyEndpointerEvents) { + // Initialize endpointer and configure it. We specify the parameters + // here for a 20ms window, and a 20ms step size, which corrsponds to + // the narrow band AMR codec. + EnergyEndpointerParams ep_config; + ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate)); + ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate)); + ep_config.set_endpoint_margin(0.2f); + ep_config.set_onset_window(0.15f); + ep_config.set_speech_on_window(0.4f); + ep_config.set_offset_window(0.15f); + ep_config.set_onset_detect_dur(0.09f); + ep_config.set_onset_confirm_dur(0.075f); + ep_config.set_on_maintain_dur(0.10f); + ep_config.set_offset_confirm_dur(0.12f); + ep_config.set_decision_threshold(100.0f); + EnergyEndpointer endpointer; + endpointer.Init(ep_config); + + endpointer.StartSession(); + + EnergyEndpointerFrameProcessor frame_processor(&endpointer); + RunEndpointerEventsTest(&frame_processor); + + endpointer.EndSession(); +}; + +// Test endpointer wrapper class. +class EndpointerFrameProcessor : public FrameProcessor { + public: + explicit EndpointerFrameProcessor(Endpointer* endpointer) + : endpointer_(endpointer) {} + + EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) { + endpointer_->ProcessAudio(samples, kFrameSize, NULL); + int64 ep_time; + return endpointer_->Status(&ep_time); + } + + private: + Endpointer* endpointer_; +}; + +TEST(EndpointerTest, TestEmbeddedEndpointerEvents) { + const int kSampleRate = 8000; // 8 k samples per second for AMR encoding. + + Endpointer endpointer(kSampleRate); + const int64 kMillisecondsPerMicrosecond = 1000; + const int64 short_timeout = 300 * kMillisecondsPerMicrosecond; + endpointer.set_speech_input_possibly_complete_silence_length(short_timeout); + const int64 long_timeout = 500 * kMillisecondsPerMicrosecond; + endpointer.set_speech_input_complete_silence_length(long_timeout); + endpointer.StartSession(); + + EndpointerFrameProcessor frame_processor(&endpointer); + RunEndpointerEventsTest(&frame_processor); + + endpointer.EndSession(); +} + +} // namespace speech_input |