// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "content/renderer/media/webrtc_audio_capturer.h" #include "base/bind.h" #include "base/logging.h" #include "base/metrics/histogram.h" #include "base/strings/string_util.h" #include "base/strings/stringprintf.h" #include "content/child/child_process.h" #include "content/renderer/media/audio_device_factory.h" #include "content/renderer/media/media_stream_audio_processor.h" #include "content/renderer/media/media_stream_audio_processor_options.h" #include "content/renderer/media/media_stream_audio_source.h" #include "content/renderer/media/webrtc_audio_device_impl.h" #include "content/renderer/media/webrtc_local_audio_track.h" #include "content/renderer/media/webrtc_logging.h" #include "media/audio/sample_rates.h" namespace content { namespace { // Supported hardware sample rates for input and output sides. #if defined(OS_WIN) || defined(OS_MACOSX) // media::GetAudioInputHardwareSampleRate() asks the audio layer // for its current sample rate (set by the user) on Windows and Mac OS X. // The listed rates below adds restrictions and WebRtcAudioDeviceImpl::Init() // will fail if the user selects any rate outside these ranges. const int kValidInputRates[] = {192000, 96000, 48000, 44100, 32000, 16000, 8000}; #elif defined(OS_LINUX) || defined(OS_OPENBSD) const int kValidInputRates[] = {48000, 44100}; #elif defined(OS_ANDROID) const int kValidInputRates[] = {48000, 44100}; #else const int kValidInputRates[] = {44100}; #endif // Time constant for AudioPowerMonitor. See AudioPowerMonitor ctor comments // for semantics. This value was arbitrarily chosen, but seems to work well. const int kPowerMonitorTimeConstantMs = 10; // The time between two audio power level samples. const int kPowerMonitorLogIntervalSeconds = 10; } // namespace // Reference counted container of WebRtcLocalAudioTrack delegate. // TODO(xians): Switch to MediaStreamAudioSinkOwner. class WebRtcAudioCapturer::TrackOwner : public base::RefCountedThreadSafe { public: explicit TrackOwner(WebRtcLocalAudioTrack* track) : delegate_(track) {} void Capture(const int16* audio_data, base::TimeDelta delay, double volume, bool key_pressed, bool need_audio_processing) { base::AutoLock lock(lock_); if (delegate_) { delegate_->Capture(audio_data, delay, volume, key_pressed, need_audio_processing); } } void OnSetFormat(const media::AudioParameters& params) { base::AutoLock lock(lock_); if (delegate_) delegate_->OnSetFormat(params); } void SetAudioProcessor( const scoped_refptr& processor) { base::AutoLock lock(lock_); if (delegate_) delegate_->SetAudioProcessor(processor); } void Reset() { base::AutoLock lock(lock_); delegate_ = NULL; } void Stop() { base::AutoLock lock(lock_); DCHECK(delegate_); // This can be reentrant so reset |delegate_| before calling out. WebRtcLocalAudioTrack* temp = delegate_; delegate_ = NULL; temp->Stop(); } // Wrapper which allows to use std::find_if() when adding and removing // sinks to/from the list. struct TrackWrapper { TrackWrapper(WebRtcLocalAudioTrack* track) : track_(track) {} bool operator()( const scoped_refptr& owner) const { return owner->IsEqual(track_); } WebRtcLocalAudioTrack* track_; }; protected: virtual ~TrackOwner() {} private: friend class base::RefCountedThreadSafe; bool IsEqual(const WebRtcLocalAudioTrack* other) const { base::AutoLock lock(lock_); return (other == delegate_); } // Do NOT reference count the |delegate_| to avoid cyclic reference counting. WebRtcLocalAudioTrack* delegate_; mutable base::Lock lock_; DISALLOW_COPY_AND_ASSIGN(TrackOwner); }; // static scoped_refptr WebRtcAudioCapturer::CreateCapturer( int render_view_id, const StreamDeviceInfo& device_info, const blink::WebMediaConstraints& constraints, WebRtcAudioDeviceImpl* audio_device, MediaStreamAudioSource* audio_source) { scoped_refptr capturer = new WebRtcAudioCapturer( render_view_id, device_info, constraints, audio_device, audio_source); if (capturer->Initialize()) return capturer; return NULL; } bool WebRtcAudioCapturer::Initialize() { DCHECK(thread_checker_.CalledOnValidThread()); DVLOG(1) << "WebRtcAudioCapturer::Initialize()"; WebRtcLogMessage(base::StringPrintf( "WAC::Initialize. render_view_id=%d" ", channel_layout=%d, sample_rate=%d, buffer_size=%d" ", session_id=%d, paired_output_sample_rate=%d" ", paired_output_frames_per_buffer=%d, effects=%d. ", render_view_id_, device_info_.device.input.channel_layout, device_info_.device.input.sample_rate, device_info_.device.input.frames_per_buffer, device_info_.session_id, device_info_.device.matched_output.sample_rate, device_info_.device.matched_output.frames_per_buffer, device_info_.device.input.effects)); if (render_view_id_ == -1) { // Return true here to allow injecting a new source via // SetCapturerSourceForTesting() at a later state. return true; } MediaAudioConstraints audio_constraints(constraints_, device_info_.device.input.effects); if (!audio_constraints.IsValid()) return false; media::ChannelLayout channel_layout = static_cast( device_info_.device.input.channel_layout); DVLOG(1) << "Audio input hardware channel layout: " << channel_layout; UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioInputChannelLayout", channel_layout, media::CHANNEL_LAYOUT_MAX + 1); // Verify that the reported input channel configuration is supported. if (channel_layout != media::CHANNEL_LAYOUT_MONO && channel_layout != media::CHANNEL_LAYOUT_STEREO && channel_layout != media::CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC) { DLOG(ERROR) << channel_layout << " is not a supported input channel configuration."; return false; } DVLOG(1) << "Audio input hardware sample rate: " << device_info_.device.input.sample_rate; media::AudioSampleRate asr; if (media::ToAudioSampleRate(device_info_.device.input.sample_rate, &asr)) { UMA_HISTOGRAM_ENUMERATION( "WebRTC.AudioInputSampleRate", asr, media::kAudioSampleRateMax + 1); } else { UMA_HISTOGRAM_COUNTS("WebRTC.AudioInputSampleRateUnexpected", device_info_.device.input.sample_rate); } // Verify that the reported input hardware sample rate is supported // on the current platform. if (std::find(&kValidInputRates[0], &kValidInputRates[0] + arraysize(kValidInputRates), device_info_.device.input.sample_rate) == &kValidInputRates[arraysize(kValidInputRates)]) { DLOG(ERROR) << device_info_.device.input.sample_rate << " is not a supported input rate."; return false; } // Create and configure the default audio capturing source. SetCapturerSource(AudioDeviceFactory::NewInputDevice(render_view_id_), channel_layout, static_cast(device_info_.device.input.sample_rate)); // Add the capturer to the WebRtcAudioDeviceImpl since it needs some hardware // information from the capturer. if (audio_device_) audio_device_->AddAudioCapturer(this); return true; } WebRtcAudioCapturer::WebRtcAudioCapturer( int render_view_id, const StreamDeviceInfo& device_info, const blink::WebMediaConstraints& constraints, WebRtcAudioDeviceImpl* audio_device, MediaStreamAudioSource* audio_source) : constraints_(constraints), audio_processor_( new rtc::RefCountedObject( constraints, device_info.device.input.effects, audio_device)), running_(false), render_view_id_(render_view_id), device_info_(device_info), volume_(0), peer_connection_mode_(false), key_pressed_(false), need_audio_processing_(false), audio_device_(audio_device), audio_source_(audio_source), audio_power_monitor_( device_info_.device.input.sample_rate, base::TimeDelta::FromMilliseconds(kPowerMonitorTimeConstantMs)) { DVLOG(1) << "WebRtcAudioCapturer::WebRtcAudioCapturer()"; } WebRtcAudioCapturer::~WebRtcAudioCapturer() { DCHECK(thread_checker_.CalledOnValidThread()); DCHECK(tracks_.IsEmpty()); DVLOG(1) << "WebRtcAudioCapturer::~WebRtcAudioCapturer()"; Stop(); } void WebRtcAudioCapturer::AddTrack(WebRtcLocalAudioTrack* track) { DCHECK(track); DVLOG(1) << "WebRtcAudioCapturer::AddTrack()"; { base::AutoLock auto_lock(lock_); // Verify that |track| is not already added to the list. DCHECK(!tracks_.Contains(TrackOwner::TrackWrapper(track))); // Add with a tag, so we remember to call OnSetFormat() on the new // track. scoped_refptr track_owner(new TrackOwner(track)); tracks_.AddAndTag(track_owner); } } void WebRtcAudioCapturer::RemoveTrack(WebRtcLocalAudioTrack* track) { DCHECK(thread_checker_.CalledOnValidThread()); DVLOG(1) << "WebRtcAudioCapturer::RemoveTrack()"; bool stop_source = false; { base::AutoLock auto_lock(lock_); scoped_refptr removed_item = tracks_.Remove(TrackOwner::TrackWrapper(track)); // Clear the delegate to ensure that no more capture callbacks will // be sent to this sink. Also avoids a possible crash which can happen // if this method is called while capturing is active. if (removed_item.get()) { removed_item->Reset(); stop_source = tracks_.IsEmpty(); } } if (stop_source) { // Since WebRtcAudioCapturer does not inherit MediaStreamAudioSource, // and instead MediaStreamAudioSource is composed of a WebRtcAudioCapturer, // we have to call StopSource on the MediaStreamSource. This will call // MediaStreamAudioSource::DoStopSource which in turn call // WebRtcAudioCapturerer::Stop(); audio_source_->StopSource(); } } void WebRtcAudioCapturer::SetCapturerSource( const scoped_refptr& source, media::ChannelLayout channel_layout, float sample_rate) { DCHECK(thread_checker_.CalledOnValidThread()); DVLOG(1) << "SetCapturerSource(channel_layout=" << channel_layout << "," << "sample_rate=" << sample_rate << ")"; scoped_refptr old_source; { base::AutoLock auto_lock(lock_); if (source_.get() == source.get()) return; source_.swap(old_source); source_ = source; // Reset the flag to allow starting the new source. running_ = false; } DVLOG(1) << "Switching to a new capture source."; if (old_source.get()) old_source->Stop(); // Dispatch the new parameters both to the sink(s) and to the new source, // also apply the new |constraints|. // The idea is to get rid of any dependency of the microphone parameters // which would normally be used by default. // bits_per_sample is always 16 for now. int buffer_size = GetBufferSize(sample_rate); media::AudioParameters params(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, channel_layout, 0, sample_rate, 16, buffer_size, device_info_.device.input.effects); { base::AutoLock auto_lock(lock_); // Notify the |audio_processor_| of the new format. audio_processor_->OnCaptureFormatChanged(params); MediaAudioConstraints audio_constraints(constraints_, device_info_.device.input.effects); need_audio_processing_ = audio_constraints.NeedsAudioProcessing(); // Notify all tracks about the new format. tracks_.TagAll(); } if (source.get()) source->Initialize(params, this, session_id()); Start(); } void WebRtcAudioCapturer::EnablePeerConnectionMode() { DCHECK(thread_checker_.CalledOnValidThread()); DVLOG(1) << "EnablePeerConnectionMode"; // Do nothing if the peer connection mode has been enabled. if (peer_connection_mode_) return; peer_connection_mode_ = true; int render_view_id = -1; media::AudioParameters input_params; { base::AutoLock auto_lock(lock_); // Simply return if there is no existing source or the |render_view_id_| is // not valid. if (!source_.get() || render_view_id_== -1) return; render_view_id = render_view_id_; input_params = audio_processor_->InputFormat(); } // Do nothing if the current buffer size is the WebRtc native buffer size. if (GetBufferSize(input_params.sample_rate()) == input_params.frames_per_buffer()) { return; } // Create a new audio stream as source which will open the hardware using // WebRtc native buffer size. SetCapturerSource(AudioDeviceFactory::NewInputDevice(render_view_id), input_params.channel_layout(), static_cast(input_params.sample_rate())); } void WebRtcAudioCapturer::Start() { DCHECK(thread_checker_.CalledOnValidThread()); DVLOG(1) << "WebRtcAudioCapturer::Start()"; base::AutoLock auto_lock(lock_); if (running_ || !source_) return; // Start the data source, i.e., start capturing data from the current source. // We need to set the AGC control before starting the stream. source_->SetAutomaticGainControl(true); source_->Start(); running_ = true; } void WebRtcAudioCapturer::Stop() { DCHECK(thread_checker_.CalledOnValidThread()); DVLOG(1) << "WebRtcAudioCapturer::Stop()"; scoped_refptr source; TrackList::ItemList tracks; { base::AutoLock auto_lock(lock_); if (!running_) return; source = source_; tracks = tracks_.Items(); tracks_.Clear(); running_ = false; } // Remove the capturer object from the WebRtcAudioDeviceImpl. if (audio_device_) audio_device_->RemoveAudioCapturer(this); for (TrackList::ItemList::const_iterator it = tracks.begin(); it != tracks.end(); ++it) { (*it)->Stop(); } if (source.get()) source->Stop(); // Stop the audio processor to avoid feeding render data into the processor. audio_processor_->Stop(); } void WebRtcAudioCapturer::SetVolume(int volume) { DVLOG(1) << "WebRtcAudioCapturer::SetVolume()"; DCHECK_LE(volume, MaxVolume()); double normalized_volume = static_cast(volume) / MaxVolume(); base::AutoLock auto_lock(lock_); if (source_.get()) source_->SetVolume(normalized_volume); } int WebRtcAudioCapturer::Volume() const { base::AutoLock auto_lock(lock_); return volume_; } int WebRtcAudioCapturer::MaxVolume() const { return WebRtcAudioDeviceImpl::kMaxVolumeLevel; } void WebRtcAudioCapturer::Capture(const media::AudioBus* audio_source, int audio_delay_milliseconds, double volume, bool key_pressed) { // This callback is driven by AudioInputDevice::AudioThreadCallback if // |source_| is AudioInputDevice, otherwise it is driven by client's // CaptureCallback. #if defined(OS_WIN) || defined(OS_MACOSX) DCHECK_LE(volume, 1.0); #elif (defined(OS_LINUX) && !defined(OS_CHROMEOS)) || defined(OS_OPENBSD) // We have a special situation on Linux where the microphone volume can be // "higher than maximum". The input volume slider in the sound preference // allows the user to set a scaling that is higher than 100%. It means that // even if the reported maximum levels is N, the actual microphone level can // go up to 1.5x*N and that corresponds to a normalized |volume| of 1.5x. DCHECK_LE(volume, 1.6); #endif TrackList::ItemList tracks; TrackList::ItemList tracks_to_notify_format; int current_volume = 0; base::TimeDelta audio_delay; bool need_audio_processing = true; { base::AutoLock auto_lock(lock_); if (!running_) return; // Map internal volume range of [0.0, 1.0] into [0, 255] used by AGC. // The volume can be higher than 255 on Linux, and it will be cropped to // 255 since AGC does not allow values out of range. volume_ = static_cast((volume * MaxVolume()) + 0.5); current_volume = volume_ > MaxVolume() ? MaxVolume() : volume_; audio_delay = base::TimeDelta::FromMilliseconds(audio_delay_milliseconds); audio_delay_ = audio_delay; key_pressed_ = key_pressed; tracks = tracks_.Items(); tracks_.RetrieveAndClearTags(&tracks_to_notify_format); // Set the flag to turn on the audio processing in PeerConnection level. // Note that, we turn off the audio processing in PeerConnection if the // processor has already processed the data. need_audio_processing = need_audio_processing_ ? !MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() : false; } DCHECK(audio_processor_->InputFormat().IsValid()); DCHECK_EQ(audio_source->channels(), audio_processor_->InputFormat().channels()); DCHECK_EQ(audio_source->frames(), audio_processor_->InputFormat().frames_per_buffer()); // Notify the tracks on when the format changes. This will do nothing if // |tracks_to_notify_format| is empty. media::AudioParameters output_params = audio_processor_->OutputFormat(); for (TrackList::ItemList::const_iterator it = tracks_to_notify_format.begin(); it != tracks_to_notify_format.end(); ++it) { (*it)->OnSetFormat(output_params); (*it)->SetAudioProcessor(audio_processor_); } if ((base::TimeTicks::Now() - last_audio_level_log_time_).InSeconds() > kPowerMonitorLogIntervalSeconds) { audio_power_monitor_.Scan(*audio_source, audio_source->frames()); last_audio_level_log_time_ = base::TimeTicks::Now(); std::pair result = audio_power_monitor_.ReadCurrentPowerAndClip(); WebRtcLogMessage(base::StringPrintf( "WAC::Capture: current_audio_power=%.2fdBFS.", result.first)); audio_power_monitor_.Reset(); } // Push the data to the processor for processing. audio_processor_->PushCaptureData(audio_source); // Process and consume the data in the processor until there is not enough // data in the processor. int16* output = NULL; int new_volume = 0; while (audio_processor_->ProcessAndConsumeData( audio_delay, current_volume, key_pressed, &new_volume, &output)) { // Feed the post-processed data to the tracks. for (TrackList::ItemList::const_iterator it = tracks.begin(); it != tracks.end(); ++it) { (*it)->Capture(output, audio_delay, current_volume, key_pressed, need_audio_processing); } if (new_volume) { SetVolume(new_volume); // Update the |current_volume| to avoid passing the old volume to AGC. current_volume = new_volume; } } } void WebRtcAudioCapturer::OnCaptureError() { NOTIMPLEMENTED(); } media::AudioParameters WebRtcAudioCapturer::source_audio_parameters() const { base::AutoLock auto_lock(lock_); return audio_processor_ ? audio_processor_->InputFormat() : media::AudioParameters(); } bool WebRtcAudioCapturer::GetPairedOutputParameters( int* session_id, int* output_sample_rate, int* output_frames_per_buffer) const { // Don't set output parameters unless all of them are valid. if (device_info_.session_id <= 0 || !device_info_.device.matched_output.sample_rate || !device_info_.device.matched_output.frames_per_buffer) return false; *session_id = device_info_.session_id; *output_sample_rate = device_info_.device.matched_output.sample_rate; *output_frames_per_buffer = device_info_.device.matched_output.frames_per_buffer; return true; } int WebRtcAudioCapturer::GetBufferSize(int sample_rate) const { DCHECK(thread_checker_.CalledOnValidThread()); #if defined(OS_ANDROID) // TODO(henrika): Tune and adjust buffer size on Android. return (2 * sample_rate / 100); #endif // PeerConnection is running at a buffer size of 10ms data. A multiple of // 10ms as the buffer size can give the best performance to PeerConnection. int peer_connection_buffer_size = sample_rate / 100; // Use the native hardware buffer size in non peer connection mode when the // platform is using a native buffer size smaller than the PeerConnection // buffer size. int hardware_buffer_size = device_info_.device.input.frames_per_buffer; if (!peer_connection_mode_ && hardware_buffer_size && hardware_buffer_size <= peer_connection_buffer_size) { return hardware_buffer_size; } return (sample_rate / 100); } void WebRtcAudioCapturer::GetAudioProcessingParams( base::TimeDelta* delay, int* volume, bool* key_pressed) { base::AutoLock auto_lock(lock_); *delay = audio_delay_; *volume = volume_; *key_pressed = key_pressed_; } void WebRtcAudioCapturer::SetCapturerSourceForTesting( const scoped_refptr& source, media::AudioParameters params) { // Create a new audio stream as source which uses the new source. SetCapturerSource(source, params.channel_layout(), static_cast(params.sample_rate())); } } // namespace content