// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "content/renderer/media/media_stream_audio_processor.h" #include "base/command_line.h" #include "base/debug/trace_event.h" #if defined(OS_MACOSX) #include "base/metrics/field_trial.h" #endif #include "base/metrics/histogram.h" #include "content/public/common/content_switches.h" #include "content/renderer/media/media_stream_audio_processor_options.h" #include "content/renderer/media/rtc_media_constraints.h" #include "content/renderer/media/webrtc_audio_device_impl.h" #include "media/audio/audio_parameters.h" #include "media/base/audio_converter.h" #include "media/base/audio_fifo.h" #include "media/base/channel_layout.h" #include "third_party/WebKit/public/platform/WebMediaConstraints.h" #include "third_party/libjingle/source/talk/app/webrtc/mediaconstraintsinterface.h" #include "third_party/webrtc/modules/audio_processing/typing_detection.h" namespace content { namespace { using webrtc::AudioProcessing; #if defined(OS_ANDROID) const int kAudioProcessingSampleRate = 16000; #else const int kAudioProcessingSampleRate = 32000; #endif const int kAudioProcessingNumberOfChannels = 1; AudioProcessing::ChannelLayout MapLayout(media::ChannelLayout media_layout) { switch (media_layout) { case media::CHANNEL_LAYOUT_MONO: return AudioProcessing::kMono; case media::CHANNEL_LAYOUT_STEREO: return AudioProcessing::kStereo; case media::CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC: return AudioProcessing::kStereoAndKeyboard; default: NOTREACHED() << "Layout not supported: " << media_layout; return AudioProcessing::kMono; } } AudioProcessing::ChannelLayout ChannelsToLayout(int num_channels) { switch (num_channels) { case 1: return AudioProcessing::kMono; case 2: return AudioProcessing::kStereo; default: NOTREACHED() << "Channels not supported: " << num_channels; return AudioProcessing::kMono; } } // Used by UMA histograms and entries shouldn't be re-ordered or removed. enum AudioTrackProcessingStates { AUDIO_PROCESSING_ENABLED = 0, AUDIO_PROCESSING_DISABLED, AUDIO_PROCESSING_IN_WEBRTC, AUDIO_PROCESSING_MAX }; void RecordProcessingState(AudioTrackProcessingStates state) { UMA_HISTOGRAM_ENUMERATION("Media.AudioTrackProcessingStates", state, AUDIO_PROCESSING_MAX); } } // namespace // Wraps AudioBus to provide access to the array of channel pointers, since this // is the type webrtc::AudioProcessing deals in. The array is refreshed on every // channel_ptrs() call, and will be valid until the underlying AudioBus pointers // are changed, e.g. through calls to SetChannelData() or SwapChannels(). // // All methods are called on one of the capture or render audio threads // exclusively. class MediaStreamAudioBus { public: MediaStreamAudioBus(int channels, int frames) : bus_(media::AudioBus::Create(channels, frames)), channel_ptrs_(new float*[channels]) { // May be created in the main render thread and used in the audio threads. thread_checker_.DetachFromThread(); } media::AudioBus* bus() { DCHECK(thread_checker_.CalledOnValidThread()); return bus_.get(); } float* const* channel_ptrs() { DCHECK(thread_checker_.CalledOnValidThread()); for (int i = 0; i < bus_->channels(); ++i) { channel_ptrs_[i] = bus_->channel(i); } return channel_ptrs_.get(); } private: base::ThreadChecker thread_checker_; scoped_ptr bus_; scoped_ptr channel_ptrs_; }; // Wraps AudioFifo to provide a cleaner interface to MediaStreamAudioProcessor. // It avoids the FIFO when the source and destination frames match. All methods // are called on one of the capture or render audio threads exclusively. class MediaStreamAudioFifo { public: MediaStreamAudioFifo(int channels, int source_frames, int destination_frames) : source_frames_(source_frames), destination_(new MediaStreamAudioBus(channels, destination_frames)), data_available_(false) { if (source_frames != destination_frames) { // Since we require every Push to be followed by as many Consumes as // possible, twice the larger of the two is a (probably) loose upper bound // on the FIFO size. const int fifo_frames = 2 * std::max(source_frames, destination_frames); fifo_.reset(new media::AudioFifo(channels, fifo_frames)); } // May be created in the main render thread and used in the audio threads. thread_checker_.DetachFromThread(); } void Push(const media::AudioBus* source) { DCHECK(thread_checker_.CalledOnValidThread()); DCHECK_EQ(source->channels(), destination_->bus()->channels()); DCHECK_EQ(source->frames(), source_frames_); if (fifo_) { fifo_->Push(source); } else { source->CopyTo(destination_->bus()); data_available_ = true; } } // Returns true if there are destination_frames() of data available to be // consumed, and otherwise false. bool Consume(MediaStreamAudioBus** destination) { DCHECK(thread_checker_.CalledOnValidThread()); if (fifo_) { if (fifo_->frames() < destination_->bus()->frames()) return false; fifo_->Consume(destination_->bus(), 0, destination_->bus()->frames()); } else { if (!data_available_) return false; // The data was already copied to |destination_| in this case. data_available_ = false; } *destination = destination_.get(); return true; } private: base::ThreadChecker thread_checker_; const int source_frames_; // For a DCHECK. scoped_ptr destination_; scoped_ptr fifo_; // Only used when the FIFO is disabled; bool data_available_; }; bool MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() { return !CommandLine::ForCurrentProcess()->HasSwitch( switches::kDisableAudioTrackProcessing); } MediaStreamAudioProcessor::MediaStreamAudioProcessor( const blink::WebMediaConstraints& constraints, int effects, WebRtcPlayoutDataSource* playout_data_source) : render_delay_ms_(0), playout_data_source_(playout_data_source), audio_mirroring_(false), typing_detected_(false), stopped_(false) { capture_thread_checker_.DetachFromThread(); render_thread_checker_.DetachFromThread(); InitializeAudioProcessingModule(constraints, effects); if (IsAudioTrackProcessingEnabled()) { aec_dump_message_filter_ = AecDumpMessageFilter::Get(); // In unit tests not creating a message filter, |aec_dump_message_filter_| // will be NULL. We can just ignore that. Other unit tests and browser tests // ensure that we do get the filter when we should. if (aec_dump_message_filter_.get()) aec_dump_message_filter_->AddDelegate(this); } } MediaStreamAudioProcessor::~MediaStreamAudioProcessor() { DCHECK(main_thread_checker_.CalledOnValidThread()); Stop(); } void MediaStreamAudioProcessor::OnCaptureFormatChanged( const media::AudioParameters& input_format) { DCHECK(main_thread_checker_.CalledOnValidThread()); // There is no need to hold a lock here since the caller guarantees that // there is no more PushCaptureData() and ProcessAndConsumeData() callbacks // on the capture thread. InitializeCaptureFifo(input_format); // Reset the |capture_thread_checker_| since the capture data will come from // a new capture thread. capture_thread_checker_.DetachFromThread(); } void MediaStreamAudioProcessor::PushCaptureData( const media::AudioBus* audio_source) { DCHECK(capture_thread_checker_.CalledOnValidThread()); capture_fifo_->Push(audio_source); } bool MediaStreamAudioProcessor::ProcessAndConsumeData( base::TimeDelta capture_delay, int volume, bool key_pressed, int* new_volume, int16** out) { DCHECK(capture_thread_checker_.CalledOnValidThread()); TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessAndConsumeData"); MediaStreamAudioBus* process_bus; if (!capture_fifo_->Consume(&process_bus)) return false; // Use the process bus directly if audio processing is disabled. MediaStreamAudioBus* output_bus = process_bus; *new_volume = 0; if (audio_processing_) { output_bus = output_bus_.get(); *new_volume = ProcessData(process_bus->channel_ptrs(), process_bus->bus()->frames(), capture_delay, volume, key_pressed, output_bus->channel_ptrs()); } // Swap channels before interleaving the data. if (audio_mirroring_ && output_format_.channel_layout() == media::CHANNEL_LAYOUT_STEREO) { // Swap the first and second channels. output_bus->bus()->SwapChannels(0, 1); } output_bus->bus()->ToInterleaved(output_bus->bus()->frames(), sizeof(int16), output_data_.get()); *out = output_data_.get(); return true; } void MediaStreamAudioProcessor::Stop() { DCHECK(main_thread_checker_.CalledOnValidThread()); if (stopped_) return; stopped_ = true; if (aec_dump_message_filter_.get()) { aec_dump_message_filter_->RemoveDelegate(this); aec_dump_message_filter_ = NULL; } if (!audio_processing_.get()) return; StopEchoCancellationDump(audio_processing_.get()); if (playout_data_source_) { playout_data_source_->RemovePlayoutSink(this); playout_data_source_ = NULL; } } const media::AudioParameters& MediaStreamAudioProcessor::InputFormat() const { return input_format_; } const media::AudioParameters& MediaStreamAudioProcessor::OutputFormat() const { return output_format_; } void MediaStreamAudioProcessor::OnAecDumpFile( const IPC::PlatformFileForTransit& file_handle) { DCHECK(main_thread_checker_.CalledOnValidThread()); base::File file = IPC::PlatformFileForTransitToFile(file_handle); DCHECK(file.IsValid()); if (audio_processing_) StartEchoCancellationDump(audio_processing_.get(), file.Pass()); else file.Close(); } void MediaStreamAudioProcessor::OnDisableAecDump() { DCHECK(main_thread_checker_.CalledOnValidThread()); if (audio_processing_) StopEchoCancellationDump(audio_processing_.get()); } void MediaStreamAudioProcessor::OnIpcClosing() { DCHECK(main_thread_checker_.CalledOnValidThread()); aec_dump_message_filter_ = NULL; } void MediaStreamAudioProcessor::OnPlayoutData(media::AudioBus* audio_bus, int sample_rate, int audio_delay_milliseconds) { DCHECK(render_thread_checker_.CalledOnValidThread()); DCHECK(audio_processing_->echo_control_mobile()->is_enabled() ^ audio_processing_->echo_cancellation()->is_enabled()); TRACE_EVENT0("audio", "MediaStreamAudioProcessor::OnPlayoutData"); DCHECK_LT(audio_delay_milliseconds, std::numeric_limits::max()); base::subtle::Release_Store(&render_delay_ms_, audio_delay_milliseconds); InitializeRenderFifoIfNeeded(sample_rate, audio_bus->channels(), audio_bus->frames()); render_fifo_->Push(audio_bus); MediaStreamAudioBus* analysis_bus; while (render_fifo_->Consume(&analysis_bus)) { audio_processing_->AnalyzeReverseStream( analysis_bus->channel_ptrs(), analysis_bus->bus()->frames(), sample_rate, ChannelsToLayout(audio_bus->channels())); } } void MediaStreamAudioProcessor::OnPlayoutDataSourceChanged() { DCHECK(main_thread_checker_.CalledOnValidThread()); // There is no need to hold a lock here since the caller guarantees that // there is no more OnPlayoutData() callback on the render thread. render_thread_checker_.DetachFromThread(); render_fifo_.reset(); } void MediaStreamAudioProcessor::GetStats(AudioProcessorStats* stats) { stats->typing_noise_detected = (base::subtle::Acquire_Load(&typing_detected_) != false); GetAecStats(audio_processing_.get(), stats); } void MediaStreamAudioProcessor::InitializeAudioProcessingModule( const blink::WebMediaConstraints& constraints, int effects) { DCHECK(!audio_processing_); MediaAudioConstraints audio_constraints(constraints, effects); // Audio mirroring can be enabled even though audio processing is otherwise // disabled. audio_mirroring_ = audio_constraints.GetProperty( MediaAudioConstraints::kGoogAudioMirroring); if (!IsAudioTrackProcessingEnabled()) { RecordProcessingState(AUDIO_PROCESSING_IN_WEBRTC); return; } #if defined(OS_IOS) // On iOS, VPIO provides built-in AGC and AEC. const bool echo_cancellation = false; const bool goog_agc = false; #else const bool echo_cancellation = audio_constraints.GetEchoCancellationProperty(); const bool goog_agc = audio_constraints.GetProperty( MediaAudioConstraints::kGoogAutoGainControl); #endif #if defined(OS_IOS) || defined(OS_ANDROID) const bool goog_experimental_aec = false; const bool goog_typing_detection = false; #else const bool goog_experimental_aec = audio_constraints.GetProperty( MediaAudioConstraints::kGoogExperimentalEchoCancellation); const bool goog_typing_detection = audio_constraints.GetProperty( MediaAudioConstraints::kGoogTypingNoiseDetection); #endif const bool goog_ns = audio_constraints.GetProperty( MediaAudioConstraints::kGoogNoiseSuppression); const bool goog_experimental_ns = audio_constraints.GetProperty( MediaAudioConstraints::kGoogExperimentalNoiseSuppression); const bool goog_high_pass_filter = audio_constraints.GetProperty( MediaAudioConstraints::kGoogHighpassFilter); // Return immediately if no goog constraint is enabled. if (!echo_cancellation && !goog_experimental_aec && !goog_ns && !goog_high_pass_filter && !goog_typing_detection && !goog_agc && !goog_experimental_ns) { RecordProcessingState(AUDIO_PROCESSING_DISABLED); return; } // Experimental options provided at creation. webrtc::Config config; if (goog_experimental_aec) config.Set(new webrtc::DelayCorrection(true)); if (goog_experimental_ns) config.Set(new webrtc::ExperimentalNs(true)); #if defined(OS_MACOSX) if (base::FieldTrialList::FindFullName("NoReportedDelayOnMac") == "Enabled") config.Set(new webrtc::ReportedDelay(false)); #endif // Create and configure the webrtc::AudioProcessing. audio_processing_.reset(webrtc::AudioProcessing::Create(config)); // Enable the audio processing components. if (echo_cancellation) { EnableEchoCancellation(audio_processing_.get()); if (playout_data_source_) playout_data_source_->AddPlayoutSink(this); } if (goog_ns) EnableNoiseSuppression(audio_processing_.get()); if (goog_high_pass_filter) EnableHighPassFilter(audio_processing_.get()); if (goog_typing_detection) { // TODO(xians): Remove this |typing_detector_| after the typing suppression // is enabled by default. typing_detector_.reset(new webrtc::TypingDetection()); EnableTypingDetection(audio_processing_.get(), typing_detector_.get()); } if (goog_agc) EnableAutomaticGainControl(audio_processing_.get()); RecordProcessingState(AUDIO_PROCESSING_ENABLED); } void MediaStreamAudioProcessor::InitializeCaptureFifo( const media::AudioParameters& input_format) { DCHECK(main_thread_checker_.CalledOnValidThread()); DCHECK(input_format.IsValid()); input_format_ = input_format; // TODO(ajm): For now, we assume fixed parameters for the output when audio // processing is enabled, to match the previous behavior. We should either // use the input parameters (in which case, audio processing will convert // at output) or ideally, have a backchannel from the sink to know what // format it would prefer. const int output_sample_rate = audio_processing_ ? kAudioProcessingSampleRate : input_format.sample_rate(); const media::ChannelLayout output_channel_layout = audio_processing_ ? media::GuessChannelLayout(kAudioProcessingNumberOfChannels) : input_format.channel_layout(); // webrtc::AudioProcessing requires a 10 ms chunk size. We use this native // size when processing is enabled. When disabled we use the same size as // the source if less than 10 ms. // // TODO(ajm): This conditional buffer size appears to be assuming knowledge of // the sink based on the source parameters. PeerConnection sinks seem to want // 10 ms chunks regardless, while WebAudio sinks want less, and we're assuming // we can identify WebAudio sinks by the input chunk size. Less fragile would // be to have the sink actually tell us how much it wants (as in the above // TODO). int processing_frames = input_format.sample_rate() / 100; int output_frames = output_sample_rate / 100; if (!audio_processing_ && input_format.frames_per_buffer() < output_frames) { processing_frames = input_format.frames_per_buffer(); output_frames = processing_frames; } output_format_ = media::AudioParameters( media::AudioParameters::AUDIO_PCM_LOW_LATENCY, output_channel_layout, output_sample_rate, 16, output_frames); capture_fifo_.reset( new MediaStreamAudioFifo(input_format.channels(), input_format.frames_per_buffer(), processing_frames)); if (audio_processing_) { output_bus_.reset(new MediaStreamAudioBus(output_format_.channels(), output_frames)); } output_data_.reset(new int16[output_format_.GetBytesPerBuffer() / sizeof(int16)]); } void MediaStreamAudioProcessor::InitializeRenderFifoIfNeeded( int sample_rate, int number_of_channels, int frames_per_buffer) { DCHECK(render_thread_checker_.CalledOnValidThread()); if (render_fifo_.get() && render_format_.sample_rate() == sample_rate && render_format_.channels() == number_of_channels && render_format_.frames_per_buffer() == frames_per_buffer) { // Do nothing if the |render_fifo_| has been setup properly. return; } render_format_ = media::AudioParameters( media::AudioParameters::AUDIO_PCM_LOW_LATENCY, media::GuessChannelLayout(number_of_channels), sample_rate, 16, frames_per_buffer); const int analysis_frames = sample_rate / 100; // 10 ms chunks. render_fifo_.reset( new MediaStreamAudioFifo(number_of_channels, frames_per_buffer, analysis_frames)); } int MediaStreamAudioProcessor::ProcessData(const float* const* process_ptrs, int process_frames, base::TimeDelta capture_delay, int volume, bool key_pressed, float* const* output_ptrs) { DCHECK(audio_processing_); DCHECK(capture_thread_checker_.CalledOnValidThread()); TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessData"); base::subtle::Atomic32 render_delay_ms = base::subtle::Acquire_Load(&render_delay_ms_); int64 capture_delay_ms = capture_delay.InMilliseconds(); DCHECK_LT(capture_delay_ms, std::numeric_limits::max()); int total_delay_ms = capture_delay_ms + render_delay_ms; if (total_delay_ms > 300) { LOG(WARNING) << "Large audio delay, capture delay: " << capture_delay_ms << "ms; render delay: " << render_delay_ms << "ms"; } webrtc::AudioProcessing* ap = audio_processing_.get(); ap->set_stream_delay_ms(total_delay_ms); DCHECK_LE(volume, WebRtcAudioDeviceImpl::kMaxVolumeLevel); webrtc::GainControl* agc = ap->gain_control(); int err = agc->set_stream_analog_level(volume); DCHECK_EQ(err, 0) << "set_stream_analog_level() error: " << err; ap->set_stream_key_pressed(key_pressed); err = ap->ProcessStream(process_ptrs, process_frames, input_format_.sample_rate(), MapLayout(input_format_.channel_layout()), output_format_.sample_rate(), MapLayout(output_format_.channel_layout()), output_ptrs); DCHECK_EQ(err, 0) << "ProcessStream() error: " << err; if (typing_detector_) { webrtc::VoiceDetection* vad = ap->voice_detection(); DCHECK(vad->is_enabled()); bool detected = typing_detector_->Process(key_pressed, vad->stream_has_voice()); base::subtle::Release_Store(&typing_detected_, detected); } // Return 0 if the volume hasn't been changed, and otherwise the new volume. return (agc->stream_analog_level() == volume) ? 0 : agc->stream_analog_level(); } } // namespace content