diff options
author | henrika@chromium.org <henrika@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-02-06 08:41:24 +0000 |
---|---|---|
committer | henrika@chromium.org <henrika@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-02-06 08:41:24 +0000 |
commit | 3ca01de32c4b1e40d45d2daeedcb841429ce4704 (patch) | |
tree | 826de070a239aeca2eb3c37392eabd3560f674e3 | |
parent | 2987c2f633fdff2ba80ac2894384698c850948dd (diff) | |
download | chromium_src-3ca01de32c4b1e40d45d2daeedcb841429ce4704.zip chromium_src-3ca01de32c4b1e40d45d2daeedcb841429ce4704.tar.gz chromium_src-3ca01de32c4b1e40d45d2daeedcb841429ce4704.tar.bz2 |
Avoids irregular OnMoreData callbacks on Windows using Core Audio.
Browser changes:
- Improves how native audio buffer sizes are derived on Windows.
- Forces user to always open up at native audio paramters.
- Improved internal scheme to set up the actial endpoint buffer based on input size.
- Refactored WSAPI output implementation and introduced CoreAudioUtil methods.
- Harmonized WSAPI output implementation with exusting unified implementation (to prepare for future merge).
- Changed GetAudioHardwareBufferSize() in audio_util.
Render changes for WebRTC:
- WebRTC now always asks for an output stream using native parameters to avoid rebuffering in the audio converter.
- Any buffer-size mismatch is now taken care of in WebRtcAudioRendrer using a pull FIFO. Delay estimates are also compensated if FIFO is used.
- Added DCHECKs to verify that methods are called on the expected threads.
BUG=170498
TEST=media_unittests, content_unittests, HTML5 audio tests in Chrome, WebAudio and Flash tests in Chrome, WebRTC tests in Chrome.
Review URL: https://codereview.chromium.org/12049070
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@180936 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | content/renderer/media/webrtc_audio_capturer.cc | 2 | ||||
-rw-r--r-- | content/renderer/media/webrtc_audio_device_unittest.cc | 6 | ||||
-rw-r--r-- | content/renderer/media/webrtc_audio_renderer.cc | 236 | ||||
-rw-r--r-- | content/renderer/media/webrtc_audio_renderer.h | 34 | ||||
-rw-r--r-- | media/audio/audio_util.cc | 49 | ||||
-rw-r--r-- | media/audio/win/audio_low_latency_output_win.cc | 797 | ||||
-rw-r--r-- | media/audio/win/audio_low_latency_output_win.h | 73 | ||||
-rw-r--r-- | media/audio/win/audio_low_latency_output_win_unittest.cc | 112 | ||||
-rw-r--r-- | media/audio/win/audio_unified_win.cc | 32 | ||||
-rw-r--r-- | media/audio/win/audio_unified_win.h | 3 | ||||
-rw-r--r-- | media/audio/win/core_audio_util_win.cc | 131 | ||||
-rw-r--r-- | media/audio/win/core_audio_util_win.h | 13 | ||||
-rw-r--r-- | media/audio/win/core_audio_util_win_unittest.cc | 37 |
13 files changed, 632 insertions, 893 deletions
diff --git a/content/renderer/media/webrtc_audio_capturer.cc b/content/renderer/media/webrtc_audio_capturer.cc index a3c6ebf..a7e0b4f 100644 --- a/content/renderer/media/webrtc_audio_capturer.cc +++ b/content/renderer/media/webrtc_audio_capturer.cc @@ -373,7 +373,7 @@ void WebRtcAudioCapturer::Capture(media::AudioBus* audio_source, loopback_fifo_->max_frames()) { loopback_fifo_->Push(audio_source); } else { - DLOG(WARNING) << "FIFO is full"; + DVLOG(1) << "FIFO is full"; } } } diff --git a/content/renderer/media/webrtc_audio_device_unittest.cc b/content/renderer/media/webrtc_audio_device_unittest.cc index e988aa2..53d1c76 100644 --- a/content/renderer/media/webrtc_audio_device_unittest.cc +++ b/content/renderer/media/webrtc_audio_device_unittest.cc @@ -41,7 +41,7 @@ scoped_ptr<media::AudioHardwareConfig> CreateRealHardwareConfig() { } // Return true if at least one element in the array matches |value|. -bool FindElementInArray(int* array, int size, int value) { +bool FindElementInArray(const int* array, int size, int value) { return (std::find(&array[0], &array[0] + size, value) != &array[size]); } @@ -56,7 +56,7 @@ bool HardwareSampleRatesAreValid() { // The actual WebRTC client can limit these ranges further depending on // platform but this is the maximum range we support today. int valid_input_rates[] = {16000, 32000, 44100, 48000, 96000}; - int valid_output_rates[] = {44100, 48000, 96000}; + int valid_output_rates[] = {16000, 32000, 44100, 48000, 96000}; media::AudioHardwareConfig* hardware_config = RenderThreadImpl::current()->GetAudioHardwareConfig(); @@ -448,7 +448,7 @@ TEST_F(WebRTCAudioDeviceTest, DISABLED_PlayLocalFile) { // Play 2 seconds worth of audio and then quit. message_loop_.PostDelayedTask(FROM_HERE, MessageLoop::QuitClosure(), - base::TimeDelta::FromSeconds(2)); + base::TimeDelta::FromSeconds(6)); message_loop_.Run(); renderer->Stop(); diff --git a/content/renderer/media/webrtc_audio_renderer.cc b/content/renderer/media/webrtc_audio_renderer.cc index 0d33713..1b66b4d 100644 --- a/content/renderer/media/webrtc_audio_renderer.cc +++ b/content/renderer/media/webrtc_audio_renderer.cc @@ -11,7 +11,7 @@ #include "content/renderer/media/renderer_audio_output_device.h" #include "content/renderer/media/webrtc_audio_device_impl.h" #include "content/renderer/render_thread_impl.h" -#include "media/audio/audio_util.h" +#include "media/audio/audio_parameters.h" #include "media/audio/sample_rates.h" #include "media/base/audio_hardware_config.h" @@ -30,14 +30,14 @@ namespace { // current sample rate (set by the user) on Windows and Mac OS X. The listed // rates below adds restrictions and Initialize() will fail if the user selects // any rate outside these ranges. -int kValidOutputRates[] = {96000, 48000, 44100}; +const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000}; #elif defined(OS_LINUX) || defined(OS_OPENBSD) -int kValidOutputRates[] = {48000, 44100}; +const int kValidOutputRates[] = {48000, 44100}; #elif defined(OS_ANDROID) // On Android, the most popular sampling rate is 16000. -int kValidOutputRates[] = {48000, 44100, 16000}; +const int kValidOutputRates[] = {48000, 44100, 16000}; #else -int kValidOutputRates[] = {44100}; +const int kValidOutputRates[] = {44100}; #endif // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove. @@ -88,15 +88,21 @@ WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id) : state_(UNINITIALIZED), source_render_view_id_(source_render_view_id), source_(NULL), - play_ref_count_(0) { + play_ref_count_(0), + audio_delay_milliseconds_(0), + frame_duration_milliseconds_(0), + fifo_io_ratio_(1) { } WebRtcAudioRenderer::~WebRtcAudioRenderer() { + DCHECK(thread_checker_.CalledOnValidThread()); DCHECK_EQ(state_, UNINITIALIZED); buffer_.reset(); } bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) { + DVLOG(1) << "WebRtcAudioRenderer::Initialize()"; + DCHECK(thread_checker_.CalledOnValidThread()); base::AutoLock auto_lock(lock_); DCHECK_EQ(state_, UNINITIALIZED); DCHECK(source); @@ -106,6 +112,13 @@ bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) { sink_ = AudioDeviceFactory::NewOutputDevice(); DCHECK(sink_); + // Use mono on all platforms but Windows for now. + // TODO(henrika): Tracking at http://crbug.com/166771. + media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_MONO; +#if defined(OS_WIN) + channel_layout = media::CHANNEL_LAYOUT_STEREO; +#endif + // Ask the renderer for the default audio output hardware sample-rate. media::AudioHardwareConfig* hardware_config = RenderThreadImpl::current()->GetAudioHardwareConfig(); @@ -124,102 +137,87 @@ bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) { return false; } - media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO; + // Set up audio parameters for the source, i.e., the WebRTC client. + // The WebRTC client only supports multiples of 10ms as buffer size where + // 10ms is preferred for lowest possible delay. + media::AudioParameters source_params; int buffer_size = 0; - // Windows -#if defined(OS_WIN) - // Always use stereo rendering on Windows. - channel_layout = media::CHANNEL_LAYOUT_STEREO; - - // Render side: AUDIO_PCM_LOW_LATENCY is based on the Core Audio (WASAPI) - // API which was introduced in Windows Vista. For lower Windows versions, - // a callback-driven Wave implementation is used instead. An output buffer - // size of 10ms works well for WASAPI but 30ms is needed for Wave. - - // Use different buffer sizes depending on the current hardware sample rate. - if (sample_rate == 96000 || sample_rate == 48000) { + if (sample_rate % 8000 == 0) { buffer_size = (sample_rate / 100); + } else if (sample_rate == 44100) { + // The resampler in WebRTC does not support 441 as input. We hard code + // the size to 440 (~0.9977ms) instead and rely on the internal jitter + // buffer in WebRTC to deal with the resulting drift. + // TODO(henrika): ensure that WebRTC supports 44100Hz and use 441 instead. + buffer_size = 440; } else { - // We do run at 44.1kHz at the actual audio layer, but ask for frames - // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine. - // TODO(henrika): figure out why we seem to need 20ms here for glitch- - // free audio. - buffer_size = 2 * 440; + return false; } - // Windows XP and lower can't cope with 10 ms output buffer size. - // It must be extended to 30 ms (60 ms will be used internally by WaveOut). - // Note that we can't use media::CoreAudioUtil::IsSupported() here since it - // tries to load the Audioses.dll and it will always fail in the render - // process. - if (base::win::GetVersion() < base::win::VERSION_VISTA) { - buffer_size = 3 * buffer_size; - DLOG(WARNING) << "Extending the output buffer size by a factor of three " - << "since Windows XP has been detected."; + source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, + channel_layout, sample_rate, 16, buffer_size); + + // Set up audio parameters for the sink, i.e., the native audio output stream. + // We strive to open up using native parameters to achieve best possible + // performance and to ensure that no FIFO is needed on the browser side to + // match the client request. Any mismatch between the source and the sink is + // taken care of in this class instead using a pull FIFO. + + media::AudioParameters sink_params; + + buffer_size = hardware_config->GetOutputBufferSize(); + sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, + channel_layout, sample_rate, 16, buffer_size); + + // Create a FIFO if re-buffering is required to match the source input with + // the sink request. The source acts as provider here and the sink as + // consumer. + if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) { + DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer() + << " to " << sink_params.frames_per_buffer(); + audio_fifo_.reset(new media::AudioPullFifo( + source_params.channels(), + source_params.frames_per_buffer(), + base::Bind( + &WebRtcAudioRenderer::SourceCallback, + base::Unretained(this)))); + + // The I/O ratio is used in delay calculations where one scheme is used + // for |fifo_io_ratio_| > 1 and another scheme for < 1.0. + fifo_io_ratio_ = static_cast<double>(source_params.frames_per_buffer()) / + sink_params.frames_per_buffer(); } -#elif defined(OS_MACOSX) - channel_layout = media::CHANNEL_LAYOUT_MONO; - - // Render side: AUDIO_PCM_LOW_LATENCY on Mac OS X is based on a callback- - // driven Core Audio implementation. Tests have shown that 10ms is a suitable - // frame size to use for 96kHz, 48kHz and 44.1kHz. - - // Use different buffer sizes depending on the current hardware sample rate. - if (sample_rate == 96000 || sample_rate == 48000) { - buffer_size = (sample_rate / 100); - } else { - // We do run at 44.1kHz at the actual audio layer, but ask for frames - // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine. - buffer_size = 440; - } -#elif defined(OS_LINUX) || defined(OS_OPENBSD) - channel_layout = media::CHANNEL_LAYOUT_MONO; - - // Based on tests using the current ALSA implementation in Chrome, we have - // found that 10ms buffer size on the output side works fine. - buffer_size = 480; -#elif defined(OS_ANDROID) - channel_layout = media::CHANNEL_LAYOUT_MONO; - - // The buffer size lower than GetAudioHardwareBufferSize() will lead to - // choppy sound because AudioOutputResampler will read the buffer multiple - // times in a row without allowing the client to re-fill the buffer. - // TODO(dwkang): check if 2048 - GetAudioHardwareBufferSize() is the right - // value for Android and do further tuning. - buffer_size = 2048; -#else - DLOG(ERROR) << "Unsupported platform"; - return false; -#endif - // Store utilized parameters to ensure that we can check them - // after a successful initialization. - params_.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, channel_layout, - sample_rate, 16, buffer_size); + frame_duration_milliseconds_ = base::Time::kMillisecondsPerSecond / + static_cast<double>(source_params.sample_rate()); // Allocate local audio buffers based on the parameters above. // It is assumed that each audio sample contains 16 bits and each // audio frame contains one or two audio samples depending on the // number of channels. - buffer_.reset(new int16[params_.frames_per_buffer() * params_.channels()]); + buffer_.reset( + new int16[source_params.frames_per_buffer() * source_params.channels()]); source_ = source; - source->SetRenderFormat(params_); + source->SetRenderFormat(source_params); - // Configure the audio rendering client and start the rendering. - sink_->Initialize(params_, this); + // Configure the audio rendering client and start rendering. + sink_->Initialize(sink_params, this); sink_->SetSourceRenderView(source_render_view_id_); sink_->Start(); + // User must call Play() before any audio can be heard. state_ = PAUSED; UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout", - channel_layout, media::CHANNEL_LAYOUT_MAX); + source_params.channel_layout(), + media::CHANNEL_LAYOUT_MAX); UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer", - buffer_size, kUnexpectedAudioBufferSize); - AddHistogramFramesPerBuffer(buffer_size); + source_params.frames_per_buffer(), + kUnexpectedAudioBufferSize); + AddHistogramFramesPerBuffer(source_params.frames_per_buffer()); return true; } @@ -230,6 +228,8 @@ void WebRtcAudioRenderer::Start() { } void WebRtcAudioRenderer::Play() { + DVLOG(1) << "WebRtcAudioRenderer::Play()"; + DCHECK(thread_checker_.CalledOnValidThread()); base::AutoLock auto_lock(lock_); if (state_ == UNINITIALIZED) return; @@ -237,9 +237,16 @@ void WebRtcAudioRenderer::Play() { DCHECK(play_ref_count_ == 0 || state_ == PLAYING); ++play_ref_count_; state_ = PLAYING; + + if (audio_fifo_) { + audio_delay_milliseconds_ = 0; + audio_fifo_->Clear(); + } } void WebRtcAudioRenderer::Pause() { + DVLOG(1) << "WebRtcAudioRenderer::Pause()"; + DCHECK(thread_checker_.CalledOnValidThread()); base::AutoLock auto_lock(lock_); if (state_ == UNINITIALIZED) return; @@ -251,6 +258,8 @@ void WebRtcAudioRenderer::Pause() { } void WebRtcAudioRenderer::Stop() { + DVLOG(1) << "WebRtcAudioRenderer::Stop()"; + DCHECK(thread_checker_.CalledOnValidThread()); base::AutoLock auto_lock(lock_); if (state_ == UNINITIALIZED) return; @@ -262,6 +271,7 @@ void WebRtcAudioRenderer::Stop() { } void WebRtcAudioRenderer::SetVolume(float volume) { + DCHECK(thread_checker_.CalledOnValidThread()); base::AutoLock auto_lock(lock_); if (state_ == UNINITIALIZED) return; @@ -279,26 +289,24 @@ bool WebRtcAudioRenderer::IsLocalRenderer() const { int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus, int audio_delay_milliseconds) { - { - base::AutoLock auto_lock(lock_); - if (!source_) - return 0; - // We need to keep render data for the |source_| reglardless of |state_|, - // otherwise the data will be buffered up inside |source_|. - source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()), - audio_bus->channels(), audio_bus->frames(), - audio_delay_milliseconds); - - // Return 0 frames to play out silence if |state_| is not PLAYING. - if (state_ != PLAYING) - return 0; - } + base::AutoLock auto_lock(lock_); + if (!source_) + return 0; - // Deinterleave each channel and convert to 32-bit floating-point - // with nominal range -1.0 -> +1.0 to match the callback format. - audio_bus->FromInterleaved(buffer_.get(), audio_bus->frames(), - params_.bits_per_sample() / 8); - return audio_bus->frames(); + DVLOG(2) << "WebRtcAudioRenderer::Render()"; + DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds; + + if (fifo_io_ratio_ > 1.0) + audio_delay_milliseconds_ += audio_delay_milliseconds; + else + audio_delay_milliseconds_ = audio_delay_milliseconds; + + if (audio_fifo_) + audio_fifo_->Consume(audio_bus, audio_bus->frames()); + else + SourceCallback(0, audio_bus); + + return (state_ == PLAYING) ? audio_bus->frames() : 0; } void WebRtcAudioRenderer::OnRenderError() { @@ -306,4 +314,38 @@ void WebRtcAudioRenderer::OnRenderError() { LOG(ERROR) << "OnRenderError()"; } +// Called by AudioPullFifo when more data is necessary. +void WebRtcAudioRenderer::SourceCallback( + int fifo_frame_delay, media::AudioBus* audio_bus) { + DVLOG(2) << "WebRtcAudioRenderer::SourceCallback(" + << fifo_frame_delay << ", " + << audio_bus->frames() << ")"; + + int output_delay_milliseconds = audio_delay_milliseconds_; + output_delay_milliseconds += frame_duration_milliseconds_ * fifo_frame_delay; + DVLOG(2) << "output_delay_milliseconds: " << output_delay_milliseconds; + + // We need to keep render data for the |source_| regardless of |state_|, + // otherwise the data will be buffered up inside |source_|. + source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()), + audio_bus->channels(), audio_bus->frames(), + output_delay_milliseconds); + + if (fifo_io_ratio_ > 1.0) + audio_delay_milliseconds_ = 0; + + // Avoid filling up the audio bus if we are not playing; instead + // return here and ensure that the returned value in Render() is 0. + if (state_ != PLAYING) { + audio_bus->Zero(); + return; + } + + // De-interleave each channel and convert to 32-bit floating-point + // with nominal range -1.0 -> +1.0 to match the callback format. + audio_bus->FromInterleaved(buffer_.get(), + audio_bus->frames(), + sizeof(buffer_[0])); +} + } // namespace content diff --git a/content/renderer/media/webrtc_audio_renderer.h b/content/renderer/media/webrtc_audio_renderer.h index 09cccf0..e0b19c7 100644 --- a/content/renderer/media/webrtc_audio_renderer.h +++ b/content/renderer/media/webrtc_audio_renderer.h @@ -7,8 +7,10 @@ #include "base/memory/ref_counted.h" #include "base/synchronization/lock.h" +#include "base/threading/thread_checker.h" #include "content/renderer/media/webrtc_audio_device_impl.h" #include "media/base/audio_decoder.h" +#include "media/base/audio_pull_fifo.h" #include "media/base/audio_renderer_sink.h" #include "webkit/media/media_stream_audio_renderer.h" @@ -18,16 +20,15 @@ class RendererAudioOutputDevice; class WebRtcAudioRendererSource; // This renderer handles calls from the pipeline and WebRtc ADM. It is used -// for connecting WebRtc MediaStream with pipeline. +// for connecting WebRtc MediaStream with the audio pipeline. class CONTENT_EXPORT WebRtcAudioRenderer : NON_EXPORTED_BASE(public media::AudioRendererSink::RenderCallback), NON_EXPORTED_BASE(public webkit_media::MediaStreamAudioRenderer) { public: explicit WebRtcAudioRenderer(int source_render_view_id); - // Initialize function called by clients like WebRtcAudioDeviceImpl. Note, + // Initialize function called by clients like WebRtcAudioDeviceImpl. // Stop() has to be called before |source| is deleted. - // Returns false if Initialize() fails. bool Initialize(WebRtcAudioRendererSource* source); // Methods called by WebMediaPlayerMS and WebRtcAudioDeviceImpl. @@ -49,14 +50,23 @@ class CONTENT_EXPORT WebRtcAudioRenderer PLAYING, PAUSED, }; + + // Used to DCHECK that we are called on the correct thread. + base::ThreadChecker thread_checker_; + // Flag to keep track the state of the renderer. State state_; // media::AudioRendererSink::RenderCallback implementation. + // These two methods are called on the AudioOutputDevice worker thread. virtual int Render(media::AudioBus* audio_bus, int audio_delay_milliseconds) OVERRIDE; virtual void OnRenderError() OVERRIDE; + // Called by AudioPullFifo when more data is necessary. + // This method is called on the AudioOutputDevice worker thread. + void SourceCallback(int fifo_frame_delay, media::AudioBus* audio_bus); + // The render view in which the audio is rendered into |sink_|. const int source_render_view_id_; @@ -66,19 +76,29 @@ class CONTENT_EXPORT WebRtcAudioRenderer // Audio data source from the browser process. WebRtcAudioRendererSource* source_; - // Cached values of utilized audio parameters. Platform dependent. - media::AudioParameters params_; - // Buffers used for temporary storage during render callbacks. // Allocated during initialization. scoped_array<int16> buffer_; - // Protect access to |state_|. + // Protects access to |state_|, |source_| and |sink_|. base::Lock lock_; // Ref count for the MediaPlayers which are playing audio. int play_ref_count_; + // Used to buffer data between the client and the output device in cases where + // the client buffer size is not the same as the output device buffer size. + scoped_ptr<media::AudioPullFifo> audio_fifo_; + + // Contains the accumulated delay estimate which is provided to the WebRTC + // AEC. + int audio_delay_milliseconds_; + + // Lengh of an audio frame in milliseconds. + double frame_duration_milliseconds_; + + double fifo_io_ratio_; + DISALLOW_IMPLICIT_CONSTRUCTORS(WebRtcAudioRenderer); }; diff --git a/media/audio/audio_util.cc b/media/audio/audio_util.cc index 6085c83..3b4f9c5 100644 --- a/media/audio/audio_util.cc +++ b/media/audio/audio_util.cc @@ -132,9 +132,8 @@ int GetAudioHardwareSampleRate() { // Hardware sample-rate on Windows can be configured, so we must query. // TODO(henrika): improve possibility to specify an audio endpoint. - // Use the default device (same as for Wave) for now to be compatible - // or possibly remove the ERole argument completely until it is in use. - return WASAPIAudioOutputStream::HardwareSampleRate(eConsole); + // Use the default device (same as for Wave) for now to be compatible. + return WASAPIAudioOutputStream::HardwareSampleRate(); #elif defined(OS_ANDROID) return 16000; #else @@ -176,6 +175,10 @@ size_t GetAudioHardwareBufferSize() { #if defined(OS_MACOSX) return 128; #elif defined(OS_WIN) + // TODO(henrika): resolve conflict with GetUserBufferSize(). + // If the user tries to set a buffer size using GetUserBufferSize() it will + // most likely fail since only the native/perfect buffer size is allowed. + // Buffer size to use when a proper size can't be determined from the system. static const int kFallbackBufferSize = 2048; @@ -193,42 +196,10 @@ size_t GetAudioHardwareBufferSize() { return 256; } - // TODO(henrika): remove when the --enable-webaudio-input flag is no longer - // utilized. - if (cmd_line->HasSwitch(switches::kEnableWebAudioInput)) { - AudioParameters params; - HRESULT hr = CoreAudioUtil::GetPreferredAudioParameters(eRender, eConsole, - ¶ms); - return FAILED(hr) ? kFallbackBufferSize : params.frames_per_buffer(); - } - - // This call must be done on a COM thread configured as MTA. - // TODO(tommi): http://code.google.com/p/chromium/issues/detail?id=103835. - int mixing_sample_rate = - WASAPIAudioOutputStream::HardwareSampleRate(eConsole); - - // Windows will return a sample rate of 0 when no audio output is available - // (i.e. via RemoteDesktop with remote audio disabled), but we should never - // return a buffer size of zero. - if (mixing_sample_rate == 0) - return kFallbackBufferSize; - - // Use different buffer sizes depening on the sample rate . The existing - // WASAPI implementation is tuned to provide the most stable callback - // sequence using these combinations. - if (mixing_sample_rate % 11025 == 0) - // Use buffer size of ~10.15873 ms. - return (112 * (mixing_sample_rate / 11025)); - - if (mixing_sample_rate % 8000 == 0) - // Use buffer size of 10ms. - return (80 * (mixing_sample_rate / 8000)); - - // Ensure we always return a buffer size which is somewhat appropriate. - LOG(ERROR) << "Unknown sample rate " << mixing_sample_rate << " detected."; - if (mixing_sample_rate > limits::kMinSampleRate) - return (mixing_sample_rate / 100); - return kFallbackBufferSize; + AudioParameters params; + HRESULT hr = CoreAudioUtil::GetPreferredAudioParameters(eRender, eConsole, + ¶ms); + return FAILED(hr) ? kFallbackBufferSize : params.frames_per_buffer(); #else return 2048; #endif diff --git a/media/audio/win/audio_low_latency_output_win.cc b/media/audio/win/audio_low_latency_output_win.cc index a53e03c..ba88c43 100644 --- a/media/audio/win/audio_low_latency_output_win.cc +++ b/media/audio/win/audio_low_latency_output_win.cc @@ -7,6 +7,7 @@ #include <Functiondiscoverykeys_devpkey.h> #include "base/command_line.h" +#include "base/debug/trace_event.h" #include "base/logging.h" #include "base/memory/scoped_ptr.h" #include "base/metrics/histogram.h" @@ -15,6 +16,7 @@ #include "media/audio/audio_util.h" #include "media/audio/win/audio_manager_win.h" #include "media/audio/win/avrt_wrapper_win.h" +#include "media/audio/win/core_audio_util_win.h" #include "media/base/limits.h" #include "media/base/media_switches.h" @@ -26,53 +28,6 @@ namespace media { typedef uint32 ChannelConfig; -// Retrieves the stream format that the audio engine uses for its internal -// processing/mixing of shared-mode streams. -static HRESULT GetMixFormat(ERole device_role, WAVEFORMATEX** device_format) { - // Note that we are using the IAudioClient::GetMixFormat() API to get the - // device format in this function. It is in fact possible to be "more native", - // and ask the endpoint device directly for its properties. Given a reference - // to the IMMDevice interface of an endpoint object, a client can obtain a - // reference to the endpoint object's property store by calling the - // IMMDevice::OpenPropertyStore() method. However, I have not been able to - // access any valuable information using this method on my HP Z600 desktop, - // hence it feels more appropriate to use the IAudioClient::GetMixFormat() - // approach instead. - - // Calling this function only makes sense for shared mode streams, since - // if the device will be opened in exclusive mode, then the application - // specified format is used instead. However, the result of this method can - // be useful for testing purposes so we don't DCHECK here. - DLOG_IF(WARNING, WASAPIAudioOutputStream::GetShareMode() == - AUDCLNT_SHAREMODE_EXCLUSIVE) << - "The mixing sample rate will be ignored for exclusive-mode streams."; - - // It is assumed that this static method is called from a COM thread, i.e., - // CoInitializeEx() is not called here again to avoid STA/MTA conflicts. - ScopedComPtr<IMMDeviceEnumerator> enumerator; - HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), - NULL, - CLSCTX_INPROC_SERVER, - __uuidof(IMMDeviceEnumerator), - enumerator.ReceiveVoid()); - if (FAILED(hr)) - return hr; - - ScopedComPtr<IMMDevice> endpoint_device; - hr = enumerator->GetDefaultAudioEndpoint(eRender, - device_role, - endpoint_device.Receive()); - if (FAILED(hr)) - return hr; - - ScopedComPtr<IAudioClient> audio_client; - hr = endpoint_device->Activate(__uuidof(IAudioClient), - CLSCTX_INPROC_SERVER, - NULL, - audio_client.ReceiveVoid()); - return SUCCEEDED(hr) ? audio_client->GetMixFormat(device_format) : hr; -} - // Retrieves an integer mask which corresponds to the channel layout the // audio engine uses for its internal processing/mixing of shared-mode // streams. This mask indicates which channels are present in the multi- @@ -82,53 +37,23 @@ static HRESULT GetMixFormat(ERole device_role, WAVEFORMATEX** device_format) { // See http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx // for more details. static ChannelConfig GetChannelConfig() { - // Use a WAVEFORMATEXTENSIBLE structure since it can specify both the - // number of channels and the mapping of channels to speakers for - // multichannel devices. - base::win::ScopedCoMem<WAVEFORMATPCMEX> format_ex; - HRESULT hr = S_FALSE; - hr = GetMixFormat(eConsole, reinterpret_cast<WAVEFORMATEX**>(&format_ex)); - if (FAILED(hr)) - return 0; - - // The dwChannelMask member specifies which channels are present in the - // multichannel stream. The least significant bit corresponds to the - // front left speaker, the next least significant bit corresponds to the - // front right speaker, and so on. - // See http://msdn.microsoft.com/en-us/library/windows/desktop/dd757714(v=vs.85).aspx - // for more details on the channel mapping. - DVLOG(2) << "dwChannelMask: 0x" << std::hex << format_ex->dwChannelMask; - -#if !defined(NDEBUG) - // See http://en.wikipedia.org/wiki/Surround_sound for more details on - // how to name various speaker configurations. The list below is not complete. - const char* speaker_config = "Undefined"; - switch (format_ex->dwChannelMask) { - case KSAUDIO_SPEAKER_MONO: - speaker_config = "Mono"; - break; - case KSAUDIO_SPEAKER_STEREO: - speaker_config = "Stereo"; - break; - case KSAUDIO_SPEAKER_5POINT1_SURROUND: - speaker_config = "5.1 surround"; - break; - case KSAUDIO_SPEAKER_5POINT1: - speaker_config = "5.1"; - break; - case KSAUDIO_SPEAKER_7POINT1_SURROUND: - speaker_config = "7.1 surround"; - break; - case KSAUDIO_SPEAKER_7POINT1: - speaker_config = "7.1"; - break; - default: - break; - } - DVLOG(2) << "speaker configuration: " << speaker_config; -#endif + WAVEFORMATPCMEX format; + return SUCCEEDED(CoreAudioUtil::GetDefaultSharedModeMixFormat( + eRender, eConsole, &format)) ? + static_cast<int>(format.dwChannelMask) : 0; +} - return static_cast<ChannelConfig>(format_ex->dwChannelMask); +// Compare two sets of audio parameters and return true if they are equal. +// Note that bits_per_sample() is excluded from this comparison since Core +// Audio can deal with most bit depths. As an example, if the native/mixing +// bit depth is 32 bits (default), opening at 16 or 24 still works fine and +// the audio engine will do the required conversion for us. +static bool CompareAudioParametersNoBitDepth(const media::AudioParameters& a, + const media::AudioParameters& b) { + return (a.format() == b.format() && + a.channels() == b.channels() && + a.sample_rate() == b.sample_rate() && + a.frames_per_buffer() == b.frames_per_buffer()); } // Converts Microsoft's channel configuration to ChannelLayout. @@ -173,31 +98,63 @@ AUDCLNT_SHAREMODE WASAPIAudioOutputStream::GetShareMode() { return AUDCLNT_SHAREMODE_SHARED; } +// static +int WASAPIAudioOutputStream::HardwareChannelCount() { + WAVEFORMATPCMEX format; + return SUCCEEDED(CoreAudioUtil::GetDefaultSharedModeMixFormat( + eRender, eConsole, &format)) ? + static_cast<int>(format.Format.nChannels) : 0; +} + +// static +ChannelLayout WASAPIAudioOutputStream::HardwareChannelLayout() { + return ChannelConfigToChannelLayout(GetChannelConfig()); +} + +// static +int WASAPIAudioOutputStream::HardwareSampleRate() { + WAVEFORMATPCMEX format; + return SUCCEEDED(CoreAudioUtil::GetDefaultSharedModeMixFormat( + eRender, eConsole, &format)) ? + static_cast<int>(format.Format.nSamplesPerSec) : 0; +} + WASAPIAudioOutputStream::WASAPIAudioOutputStream(AudioManagerWin* manager, const AudioParameters& params, ERole device_role) : creating_thread_id_(base::PlatformThread::CurrentId()), manager_(manager), opened_(false), - restart_rendering_mode_(false), + audio_parmeters_are_valid_(false), volume_(1.0), endpoint_buffer_size_frames_(0), device_role_(device_role), share_mode_(GetShareMode()), - client_channel_count_(params.channels()), num_written_frames_(0), source_(NULL), audio_bus_(AudioBus::Create(params)) { DCHECK(manager_); + DVLOG(1) << "WASAPIAudioOutputStream::WASAPIAudioOutputStream()"; + DVLOG_IF(1, share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE) + << "Core Audio (WASAPI) EXCLUSIVE MODE is enabled."; + + if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { + // Verify that the input audio parameters are identical (bit depth is + // excluded) to the preferred (native) audio parameters. Open() will fail + // if this is not the case. + AudioParameters preferred_params; + HRESULT hr = CoreAudioUtil::GetPreferredAudioParameters( + eRender, device_role, &preferred_params); + audio_parmeters_are_valid_ = SUCCEEDED(hr) && + CompareAudioParametersNoBitDepth(params, preferred_params); + DLOG_IF(WARNING, !audio_parmeters_are_valid_) + << "Input and preferred parameters are not identical."; + } // Load the Avrt DLL if not already loaded. Required to support MMCSS. bool avrt_init = avrt::Initialize(); DCHECK(avrt_init) << "Failed to load the avrt.dll"; - if (share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE) { - VLOG(1) << ">> Note that EXCLUSIVE MODE is enabled <<"; - } - // Set up the desired render format specified by the client. We use the // WAVE_FORMAT_EXTENSIBLE structure to ensure that multiple channel ordering // and high precision data can be supported. @@ -205,7 +162,7 @@ WASAPIAudioOutputStream::WASAPIAudioOutputStream(AudioManagerWin* manager, // Begin with the WAVEFORMATEX structure that specifies the basic format. WAVEFORMATEX* format = &format_.Format; format->wFormatTag = WAVE_FORMAT_EXTENSIBLE; - format->nChannels = client_channel_count_; + format->nChannels = params.channels(); format->nSamplesPerSec = params.sample_rate(); format->wBitsPerSample = params.bits_per_sample(); format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels; @@ -217,15 +174,12 @@ WASAPIAudioOutputStream::WASAPIAudioOutputStream(AudioManagerWin* manager, format_.dwChannelMask = GetChannelConfig(); format_.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; - // Size in bytes of each audio frame. - frame_size_ = format->nBlockAlign; - // Store size (in different units) of audio packets which we expect to // get from the audio endpoint device in each render event. - packet_size_frames_ = params.GetBytesPerBuffer() / format->nBlockAlign; + packet_size_frames_ = params.frames_per_buffer(); packet_size_bytes_ = params.GetBytesPerBuffer(); packet_size_ms_ = (1000.0 * packet_size_frames_) / params.sample_rate(); - DVLOG(1) << "Number of bytes per audio frame : " << frame_size_; + DVLOG(1) << "Number of bytes per audio frame : " << format->nBlockAlign; DVLOG(1) << "Number of audio frames per packet: " << packet_size_frames_; DVLOG(1) << "Number of bytes per packet : " << packet_size_bytes_; DVLOG(1) << "Number of milliseconds per packet: " << packet_size_ms_; @@ -245,55 +199,88 @@ WASAPIAudioOutputStream::WASAPIAudioOutputStream(AudioManagerWin* manager, WASAPIAudioOutputStream::~WASAPIAudioOutputStream() {} bool WASAPIAudioOutputStream::Open() { + DVLOG(1) << "WASAPIAudioOutputStream::Open()"; DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); if (opened_) return true; - // Channel mixing is not supported, it must be handled by ChannelMixer. - if (format_.Format.nChannels != client_channel_count_) { - LOG(ERROR) << "Channel down-mixing is not supported."; - return false; + + // Audio parameters must be identical to the preferred set of parameters + // if shared mode (default) is utilized. + if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { + if (!audio_parmeters_are_valid_) { + LOG(ERROR) << "Audio parameters are not valid."; + return false; + } } - // Create an IMMDeviceEnumerator interface and obtain a reference to - // the IMMDevice interface of the default rendering device with the - // specified role. - HRESULT hr = SetRenderDevice(); - if (FAILED(hr)) { + // Create an IAudioClient interface for the default rendering IMMDevice. + ScopedComPtr<IAudioClient> audio_client = + CoreAudioUtil::CreateDefaultClient(eRender, device_role_); + if (!audio_client) return false; - } - // Obtain an IAudioClient interface which enables us to create and initialize - // an audio stream between an audio application and the audio engine. - hr = ActivateRenderDevice(); - if (FAILED(hr)) { + // Extra sanity to ensure that the provided device format is still valid. + if (!CoreAudioUtil::IsFormatSupported(audio_client, + share_mode_, + &format_)) { return false; } - // Verify that the selected audio endpoint supports the specified format - // set during construction. - // In exclusive mode, the client can choose to open the stream in any audio - // format that the endpoint device supports. In shared mode, the client must - // open the stream in the mix format that is currently in use by the audio - // engine (or a format that is similar to the mix format). The audio engine's - // input streams and the output mix from the engine are all in this format. - if (!DesiredFormatIsSupported()) { - return false; + HRESULT hr = S_FALSE; + if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { + // Initialize the audio stream between the client and the device in shared + // mode and using event-driven buffer handling. + hr = CoreAudioUtil::SharedModeInitialize( + audio_client, &format_, audio_samples_render_event_.Get(), + &endpoint_buffer_size_frames_); + if (FAILED(hr)) + return false; + + // We know from experience that the best possible callback sequence is + // achieved when the packet size (given by the native device period) + // is an even multiple of the endpoint buffer size. + // Examples: 48kHz => 960 % 480, 44.1kHz => 896 % 448 or 882 % 441. + if (endpoint_buffer_size_frames_ % packet_size_frames_ != 0) { + LOG(ERROR) << "Bailing out due to non-perfect timing."; + return false; + } + } else { + // TODO(henrika): break out to CoreAudioUtil::ExclusiveModeInitialize() + // when removing the enable-exclusive-audio flag. + hr = ExclusiveModeInitialization(audio_client, + audio_samples_render_event_.Get(), + &endpoint_buffer_size_frames_); + if (FAILED(hr)) + return false; + + // The buffer scheme for exclusive mode streams is not designed for max + // flexibility. We only allow a "perfect match" between the packet size set + // by the user and the actual endpoint buffer size. + if (endpoint_buffer_size_frames_ != packet_size_frames_) { + DLOG(ERROR) << "Bailing out due to non-perfect timing."; + return false; + } } - // Initialize the audio stream between the client and the device using - // shared or exclusive mode and a lowest possible glitch-free latency. - // We will enter different code paths depending on the specified share mode. - hr = InitializeAudioEngine(); - if (FAILED(hr)) { + // Create an IAudioRenderClient client for an initialized IAudioClient. + // The IAudioRenderClient interface enables us to write output data to + // a rendering endpoint buffer. + ScopedComPtr<IAudioRenderClient> audio_render_client = + CoreAudioUtil::CreateRenderClient(audio_client); + if (!audio_render_client) return false; - } + + // Store valid COM interfaces. + audio_client_ = audio_client; + audio_render_client_ = audio_render_client; opened_ = true; return true; } void WASAPIAudioOutputStream::Start(AudioSourceCallback* callback) { + DVLOG(1) << "WASAPIAudioOutputStream::Start()"; DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); CHECK(callback); CHECK(opened_); @@ -303,49 +290,30 @@ void WASAPIAudioOutputStream::Start(AudioSourceCallback* callback) { return; } - if (restart_rendering_mode_) { - // The selected audio device has been removed or disabled and a new - // default device has been enabled instead. The current implementation - // does not to support this sequence of events. Given that Open() - // and Start() are usually called in one sequence; it should be a very - // rare event. - // TODO(henrika): it is possible to extend the functionality here. - LOG(ERROR) << "Unable to start since the selected default device has " - "changed since Open() was called."; - return; - } - source_ = callback; - // Avoid start-up glitches by filling up the endpoint buffer with "silence" - // before starting the stream. - BYTE* data_ptr = NULL; - HRESULT hr = audio_render_client_->GetBuffer(endpoint_buffer_size_frames_, - &data_ptr); - if (FAILED(hr)) { - DLOG(ERROR) << "Failed to use rendering audio buffer: " << std::hex << hr; - return; - } - - // Using the AUDCLNT_BUFFERFLAGS_SILENT flag eliminates the need to - // explicitly write silence data to the rendering buffer. - audio_render_client_->ReleaseBuffer(endpoint_buffer_size_frames_, - AUDCLNT_BUFFERFLAGS_SILENT); - num_written_frames_ = endpoint_buffer_size_frames_; - - // Sanity check: verify that the endpoint buffer is filled with silence. - UINT32 num_queued_frames = 0; - audio_client_->GetCurrentPadding(&num_queued_frames); - DCHECK(num_queued_frames == num_written_frames_); - // Create and start the thread that will drive the rendering by waiting for // render events. render_thread_.reset( new base::DelegateSimpleThread(this, "wasapi_render_thread")); render_thread_->Start(); + if (!render_thread_->HasBeenStarted()) { + DLOG(ERROR) << "Failed to start WASAPI render thread."; + return; + } + + // Ensure that the endpoint buffer is prepared with silence. + if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { + if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence( + audio_client_, audio_render_client_)) { + DLOG(WARNING) << "Failed to prepare endpoint buffers with silence."; + return; + } + } + num_written_frames_ = endpoint_buffer_size_frames_; // Start streaming data between the endpoint buffer and the audio engine. - hr = audio_client_->Start(); + HRESULT hr = audio_client_->Start(); if (FAILED(hr)) { SetEvent(stop_render_event_.Get()); render_thread_->Join(); @@ -355,6 +323,7 @@ void WASAPIAudioOutputStream::Start(AudioSourceCallback* callback) { } void WASAPIAudioOutputStream::Stop() { + DVLOG(1) << "WASAPIAudioOutputStream::Stop()"; DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); if (!render_thread_.get()) return; @@ -397,6 +366,7 @@ void WASAPIAudioOutputStream::Stop() { } void WASAPIAudioOutputStream::Close() { + DVLOG(1) << "WASAPIAudioOutputStream::Close()"; DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); // It is valid to call Close() before calling open or Start(). @@ -422,40 +392,6 @@ void WASAPIAudioOutputStream::GetVolume(double* volume) { *volume = static_cast<double>(volume_); } -// static -int WASAPIAudioOutputStream::HardwareChannelCount() { - // Use a WAVEFORMATEXTENSIBLE structure since it can specify both the - // number of channels and the mapping of channels to speakers for - // multichannel devices. - base::win::ScopedCoMem<WAVEFORMATPCMEX> format_ex; - HRESULT hr = GetMixFormat( - eConsole, reinterpret_cast<WAVEFORMATEX**>(&format_ex)); - if (FAILED(hr)) - return 0; - - // Number of channels in the stream. Corresponds to the number of bits - // set in the dwChannelMask. - DVLOG(1) << "endpoint channels (out): " << format_ex->Format.nChannels; - - return static_cast<int>(format_ex->Format.nChannels); -} - -// static -ChannelLayout WASAPIAudioOutputStream::HardwareChannelLayout() { - return ChannelConfigToChannelLayout(GetChannelConfig()); -} - -// static -int WASAPIAudioOutputStream::HardwareSampleRate(ERole device_role) { - base::win::ScopedCoMem<WAVEFORMATEX> format; - HRESULT hr = GetMixFormat(device_role, &format); - if (FAILED(hr)) - return 0; - - DVLOG(2) << "nSamplesPerSec: " << format->nSamplesPerSec; - return static_cast<int>(format->nSamplesPerSec); -} - void WASAPIAudioOutputStream::Run() { ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA); @@ -515,6 +451,8 @@ void WASAPIAudioOutputStream::Run() { break; case WAIT_OBJECT_0 + 1: { + TRACE_EVENT0("audio", "WASAPIAudioOutputStream::Run"); + // |audio_samples_render_event_| has been set. UINT32 num_queued_frames = 0; uint8* audio_data = NULL; @@ -542,97 +480,101 @@ void WASAPIAudioOutputStream::Run() { // directly on the buffer size. num_available_frames = endpoint_buffer_size_frames_; } + if (FAILED(hr)) { + DLOG(ERROR) << "Failed to retrieve amount of available space: " + << std::hex << hr; + continue; + } - // Check if there is enough available space to fit the packet size - // specified by the client. - if (FAILED(hr) || (num_available_frames < packet_size_frames_)) + // It can happen that we were not able to find a a perfect match + // between the native device rate and the endpoint buffer size. + // In this case, we are using a packet size which equals the enpoint + // buffer size (does not lead to lowest possible delay and is rare + // case) and must therefore wait for yet another callback until we + // are able to provide data. + if ((num_available_frames > 0) && + (num_available_frames != packet_size_frames_)) { continue; + } - // Derive the number of packets we need get from the client to - // fill up the available area in the endpoint buffer. - // |num_packets| will always be one for exclusive-mode streams. - size_t num_packets = (num_available_frames / packet_size_frames_); - - // Get data from the client/source. - for (size_t n = 0; n < num_packets; ++n) { - // Grab all available space in the rendering endpoint buffer - // into which the client can write a data packet. - hr = audio_render_client_->GetBuffer(packet_size_frames_, - &audio_data); - if (FAILED(hr)) { - DLOG(ERROR) << "Failed to use rendering audio buffer: " - << std::hex << hr; - continue; - } - - // Derive the audio delay which corresponds to the delay between - // a render event and the time when the first audio sample in a - // packet is played out through the speaker. This delay value - // can typically be utilized by an acoustic echo-control (AEC) - // unit at the render side. - UINT64 position = 0; - int audio_delay_bytes = 0; - hr = audio_clock->GetPosition(&position, NULL); - if (SUCCEEDED(hr)) { - // Stream position of the sample that is currently playing - // through the speaker. - double pos_sample_playing_frames = format_.Format.nSamplesPerSec * - (static_cast<double>(position) / device_frequency); - - // Stream position of the last sample written to the endpoint - // buffer. Note that, the packet we are about to receive in - // the upcoming callback is also included. - size_t pos_last_sample_written_frames = - num_written_frames_ + packet_size_frames_; - - // Derive the actual delay value which will be fed to the - // render client using the OnMoreData() callback. - audio_delay_bytes = (pos_last_sample_written_frames - - pos_sample_playing_frames) * frame_size_; - } - - // Read a data packet from the registered client source and - // deliver a delay estimate in the same callback to the client. - // A time stamp is also stored in the AudioBuffersState. This - // time stamp can be used at the client side to compensate for - // the delay between the usage of the delay value and the time - // of generation. - - uint32 num_filled_bytes = 0; - const int bytes_per_sample = format_.Format.wBitsPerSample >> 3; - - int frames_filled = source_->OnMoreData( - audio_bus_.get(), AudioBuffersState(0, audio_delay_bytes)); - num_filled_bytes = frames_filled * frame_size_; - DCHECK_LE(num_filled_bytes, packet_size_bytes_); - // Note: If this ever changes to output raw float the data must be - // clipped and sanitized since it may come from an untrusted - // source such as NaCl. - audio_bus_->ToInterleaved( - frames_filled, bytes_per_sample, audio_data); - - // Perform in-place, software-volume adjustments. - media::AdjustVolume(audio_data, - num_filled_bytes, - audio_bus_->channels(), - bytes_per_sample, - volume_); - - // Zero out the part of the packet which has not been filled by - // the client. Using silence is the least bad option in this - // situation. - if (num_filled_bytes < packet_size_bytes_) { - memset(&audio_data[num_filled_bytes], 0, - (packet_size_bytes_ - num_filled_bytes)); - } - - // Release the buffer space acquired in the GetBuffer() call. - DWORD flags = 0; - audio_render_client_->ReleaseBuffer(packet_size_frames_, - flags); - - num_written_frames_ += packet_size_frames_; + // Grab all available space in the rendering endpoint buffer + // into which the client can write a data packet. + hr = audio_render_client_->GetBuffer(packet_size_frames_, + &audio_data); + if (FAILED(hr)) { + DLOG(ERROR) << "Failed to use rendering audio buffer: " + << std::hex << hr; + continue; } + + // Derive the audio delay which corresponds to the delay between + // a render event and the time when the first audio sample in a + // packet is played out through the speaker. This delay value + // can typically be utilized by an acoustic echo-control (AEC) + // unit at the render side. + UINT64 position = 0; + int audio_delay_bytes = 0; + hr = audio_clock->GetPosition(&position, NULL); + if (SUCCEEDED(hr)) { + // Stream position of the sample that is currently playing + // through the speaker. + double pos_sample_playing_frames = format_.Format.nSamplesPerSec * + (static_cast<double>(position) / device_frequency); + + // Stream position of the last sample written to the endpoint + // buffer. Note that, the packet we are about to receive in + // the upcoming callback is also included. + size_t pos_last_sample_written_frames = + num_written_frames_ + packet_size_frames_; + + // Derive the actual delay value which will be fed to the + // render client using the OnMoreData() callback. + audio_delay_bytes = (pos_last_sample_written_frames - + pos_sample_playing_frames) * format_.Format.nBlockAlign; + } + + // Read a data packet from the registered client source and + // deliver a delay estimate in the same callback to the client. + // A time stamp is also stored in the AudioBuffersState. This + // time stamp can be used at the client side to compensate for + // the delay between the usage of the delay value and the time + // of generation. + + uint32 num_filled_bytes = 0; + const int bytes_per_sample = format_.Format.wBitsPerSample >> 3; + + int frames_filled = source_->OnMoreData( + audio_bus_.get(), AudioBuffersState(0, audio_delay_bytes)); + num_filled_bytes = frames_filled * format_.Format.nBlockAlign; + DCHECK_LE(num_filled_bytes, packet_size_bytes_); + + // Note: If this ever changes to output raw float the data must be + // clipped and sanitized since it may come from an untrusted + // source such as NaCl. + audio_bus_->ToInterleaved( + frames_filled, bytes_per_sample, audio_data); + + // Perform in-place, software-volume adjustments. + media::AdjustVolume(audio_data, + num_filled_bytes, + audio_bus_->channels(), + bytes_per_sample, + volume_); + + // Zero out the part of the packet which has not been filled by + // the client. Using silence is the least bad option in this + // situation. + if (num_filled_bytes < packet_size_bytes_) { + memset(&audio_data[num_filled_bytes], 0, + (packet_size_bytes_ - num_filled_bytes)); + } + + // Release the buffer space acquired in the GetBuffer() call. + DWORD flags = 0; + audio_render_client_->ReleaseBuffer(packet_size_frames_, + flags); + + num_written_frames_ += packet_size_frames_; } break; default: @@ -663,224 +605,21 @@ void WASAPIAudioOutputStream::HandleError(HRESULT err) { source_->OnError(this, static_cast<int>(err)); } -HRESULT WASAPIAudioOutputStream::SetRenderDevice() { - ScopedComPtr<IMMDeviceEnumerator> device_enumerator; - ScopedComPtr<IMMDevice> endpoint_device; - - // Create the IMMDeviceEnumerator interface. - HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), - NULL, - CLSCTX_INPROC_SERVER, - __uuidof(IMMDeviceEnumerator), - device_enumerator.ReceiveVoid()); - if (SUCCEEDED(hr)) { - // Retrieve the default render audio endpoint for the specified role. - // Note that, in Windows Vista, the MMDevice API supports device roles - // but the system-supplied user interface programs do not. - hr = device_enumerator->GetDefaultAudioEndpoint( - eRender, device_role_, endpoint_device.Receive()); - if (FAILED(hr)) - return hr; - - // Verify that the audio endpoint device is active. That is, the audio - // adapter that connects to the endpoint device is present and enabled. - DWORD state = DEVICE_STATE_DISABLED; - hr = endpoint_device->GetState(&state); - if (SUCCEEDED(hr)) { - if (!(state & DEVICE_STATE_ACTIVE)) { - DLOG(ERROR) << "Selected render device is not active."; - hr = E_ACCESSDENIED; - } - } - } - - if (SUCCEEDED(hr)) { - device_enumerator_ = device_enumerator; - endpoint_device_ = endpoint_device; - } - - return hr; -} - -HRESULT WASAPIAudioOutputStream::ActivateRenderDevice() { - ScopedComPtr<IAudioClient> audio_client; - - // Creates and activates an IAudioClient COM object given the selected - // render endpoint device. - HRESULT hr = endpoint_device_->Activate(__uuidof(IAudioClient), - CLSCTX_INPROC_SERVER, - NULL, - audio_client.ReceiveVoid()); - if (SUCCEEDED(hr)) { - // Retrieve the stream format that the audio engine uses for its internal - // processing/mixing of shared-mode streams. - audio_engine_mix_format_.Reset(NULL); - hr = audio_client->GetMixFormat( - reinterpret_cast<WAVEFORMATEX**>(&audio_engine_mix_format_)); - - if (SUCCEEDED(hr)) { - audio_client_ = audio_client; - } - } - - return hr; -} - -bool WASAPIAudioOutputStream::DesiredFormatIsSupported() { - // Determine, before calling IAudioClient::Initialize(), whether the audio - // engine supports a particular stream format. - // In shared mode, the audio engine always supports the mix format, - // which is stored in the |audio_engine_mix_format_| member and it is also - // possible to receive a proposed (closest) format if the current format is - // not supported. - base::win::ScopedCoMem<WAVEFORMATEXTENSIBLE> closest_match; - HRESULT hr = audio_client_->IsFormatSupported( - share_mode_, reinterpret_cast<WAVEFORMATEX*>(&format_), - reinterpret_cast<WAVEFORMATEX**>(&closest_match)); - - // This log can only be triggered for shared mode. - DLOG_IF(ERROR, hr == S_FALSE) << "Format is not supported " - << "but a closest match exists."; - // This log can be triggered both for shared and exclusive modes. - DLOG_IF(ERROR, hr == AUDCLNT_E_UNSUPPORTED_FORMAT) << "Unsupported format."; - if (hr == S_FALSE) { - DVLOG(1) << "wFormatTag : " << closest_match->Format.wFormatTag; - DVLOG(1) << "nChannels : " << closest_match->Format.nChannels; - DVLOG(1) << "nSamplesPerSec: " << closest_match->Format.nSamplesPerSec; - DVLOG(1) << "wBitsPerSample: " << closest_match->Format.wBitsPerSample; - } - - return (hr == S_OK); -} - -HRESULT WASAPIAudioOutputStream::InitializeAudioEngine() { -#if !defined(NDEBUG) - // The period between processing passes by the audio engine is fixed for a - // particular audio endpoint device and represents the smallest processing - // quantum for the audio engine. This period plus the stream latency between - // the buffer and endpoint device represents the minimum possible latency - // that an audio application can achieve in shared mode. - { - REFERENCE_TIME default_device_period = 0; - REFERENCE_TIME minimum_device_period = 0; - HRESULT hr_dbg = audio_client_->GetDevicePeriod(&default_device_period, - &minimum_device_period); - if (SUCCEEDED(hr_dbg)) { - // Shared mode device period. - DVLOG(1) << "shared mode (default) device period: " - << static_cast<double>(default_device_period / 10000.0) - << " [ms]"; - // Exclusive mode device period. - DVLOG(1) << "exclusive mode (minimum) device period: " - << static_cast<double>(minimum_device_period / 10000.0) - << " [ms]"; - } - - REFERENCE_TIME latency = 0; - hr_dbg = audio_client_->GetStreamLatency(&latency); - if (SUCCEEDED(hr_dbg)) { - DVLOG(1) << "stream latency: " << static_cast<double>(latency / 10000.0) - << " [ms]"; - } - } -#endif - - HRESULT hr = S_FALSE; - - // Perform different initialization depending on if the device shall be - // opened in shared mode or in exclusive mode. - hr = (share_mode_ == AUDCLNT_SHAREMODE_SHARED) ? - SharedModeInitialization() : ExclusiveModeInitialization(); - if (FAILED(hr)) { - LOG(WARNING) << "IAudioClient::Initialize() failed: " << std::hex << hr; - return hr; - } - - // Retrieve the length of the endpoint buffer. The buffer length represents - // the maximum amount of rendering data that the client can write to - // the endpoint buffer during a single processing pass. - // A typical value is 960 audio frames <=> 20ms @ 48kHz sample rate. - hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_); - if (FAILED(hr)) - return hr; - DVLOG(1) << "endpoint buffer size: " << endpoint_buffer_size_frames_ - << " [frames]"; - - // The buffer scheme for exclusive mode streams is not designed for max - // flexibility. We only allow a "perfect match" between the packet size set - // by the user and the actual endpoint buffer size. - if (share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE && - endpoint_buffer_size_frames_ != packet_size_frames_) { - hr = AUDCLNT_E_INVALID_SIZE; - DLOG(ERROR) << "AUDCLNT_E_INVALID_SIZE"; - return hr; - } - - // Set the event handle that the audio engine will signal each time - // a buffer becomes ready to be processed by the client. - hr = audio_client_->SetEventHandle(audio_samples_render_event_.Get()); - if (FAILED(hr)) - return hr; - - // Get access to the IAudioRenderClient interface. This interface - // enables us to write output data to a rendering endpoint buffer. - // The methods in this interface manage the movement of data packets - // that contain audio-rendering data. - hr = audio_client_->GetService(__uuidof(IAudioRenderClient), - audio_render_client_.ReceiveVoid()); - return hr; -} - -HRESULT WASAPIAudioOutputStream::SharedModeInitialization() { - DCHECK_EQ(share_mode_, AUDCLNT_SHAREMODE_SHARED); - - // TODO(henrika): this buffer scheme is still under development. - // The exact details are yet to be determined based on tests with different - // audio clients. - int glitch_free_buffer_size_ms = static_cast<int>(packet_size_ms_ + 0.5); - if (audio_engine_mix_format_->Format.nSamplesPerSec % 8000 == 0) { - // Initial tests have shown that we have to add 10 ms extra to - // ensure that we don't run empty for any packet size. - glitch_free_buffer_size_ms += 10; - } else if (audio_engine_mix_format_->Format.nSamplesPerSec % 11025 == 0) { - // Initial tests have shown that we have to add 20 ms extra to - // ensure that we don't run empty for any packet size. - glitch_free_buffer_size_ms += 20; - } else { - DLOG(WARNING) << "Unsupported sample rate " - << audio_engine_mix_format_->Format.nSamplesPerSec << " detected"; - glitch_free_buffer_size_ms += 20; - } - DVLOG(1) << "glitch_free_buffer_size_ms: " << glitch_free_buffer_size_ms; - REFERENCE_TIME requested_buffer_duration = - static_cast<REFERENCE_TIME>(glitch_free_buffer_size_ms * 10000); - - // Initialize the audio stream between the client and the device. - // We connect indirectly through the audio engine by using shared mode - // and WASAPI is initialized in an event driven mode. - // Note that this API ensures that the buffer is never smaller than the - // minimum buffer size needed to ensure glitch-free rendering. - // If we requests a buffer size that is smaller than the audio engine's - // minimum required buffer size, the method sets the buffer size to this - // minimum buffer size rather than to the buffer size requested. - HRESULT hr = S_FALSE; - hr = audio_client_->Initialize(AUDCLNT_SHAREMODE_SHARED, - AUDCLNT_STREAMFLAGS_EVENTCALLBACK | - AUDCLNT_STREAMFLAGS_NOPERSIST, - requested_buffer_duration, - 0, - reinterpret_cast<WAVEFORMATEX*>(&format_), - NULL); - return hr; -} - -HRESULT WASAPIAudioOutputStream::ExclusiveModeInitialization() { +HRESULT WASAPIAudioOutputStream::ExclusiveModeInitialization( + IAudioClient* client, HANDLE event_handle, size_t* endpoint_buffer_size) { DCHECK_EQ(share_mode_, AUDCLNT_SHAREMODE_EXCLUSIVE); float f = (1000.0 * packet_size_frames_) / format_.Format.nSamplesPerSec; REFERENCE_TIME requested_buffer_duration = static_cast<REFERENCE_TIME>(f * 10000.0 + 0.5); + DWORD stream_flags = AUDCLNT_STREAMFLAGS_NOPERSIST; + bool use_event = (event_handle != NULL && + event_handle != INVALID_HANDLE_VALUE); + if (use_event) + stream_flags |= AUDCLNT_STREAMFLAGS_EVENTCALLBACK; + DVLOG(2) << "stream_flags: 0x" << std::hex << stream_flags; + // Initialize the audio stream between the client and the device. // For an exclusive-mode stream that uses event-driven buffering, the // caller must specify nonzero values for hnsPeriodicity and @@ -890,21 +629,19 @@ HRESULT WASAPIAudioOutputStream::ExclusiveModeInitialization() { // Following the Initialize call for a rendering stream, the caller should // fill the first of the two buffers before starting the stream. HRESULT hr = S_FALSE; - hr = audio_client_->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE, - AUDCLNT_STREAMFLAGS_EVENTCALLBACK | - AUDCLNT_STREAMFLAGS_NOPERSIST, - requested_buffer_duration, - requested_buffer_duration, - reinterpret_cast<WAVEFORMATEX*>(&format_), - NULL); + hr = client->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE, + stream_flags, + requested_buffer_duration, + requested_buffer_duration, + reinterpret_cast<WAVEFORMATEX*>(&format_), + NULL); if (FAILED(hr)) { if (hr == AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED) { LOG(ERROR) << "AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED"; UINT32 aligned_buffer_size = 0; - audio_client_->GetBufferSize(&aligned_buffer_size); + client->GetBufferSize(&aligned_buffer_size); DVLOG(1) << "Use aligned buffer size instead: " << aligned_buffer_size; - audio_client_.Release(); // Calculate new aligned periodicity. Each unit of reference time // is 100 nanoseconds. @@ -924,33 +661,27 @@ HRESULT WASAPIAudioOutputStream::ExclusiveModeInitialization() { // the minimum supported size (usually ~3ms on Windows 7). LOG(ERROR) << "AUDCLNT_E_INVALID_DEVICE_PERIOD"; } + return hr; } - return hr; -} - -std::string WASAPIAudioOutputStream::GetDeviceName(LPCWSTR device_id) const { - std::string name; - ScopedComPtr<IMMDevice> audio_device; - - // Get the IMMDevice interface corresponding to the given endpoint ID string. - HRESULT hr = device_enumerator_->GetDevice(device_id, audio_device.Receive()); - if (SUCCEEDED(hr)) { - // Retrieve user-friendly name of endpoint device. - // Example: "Speakers (Realtek High Definition Audio)". - ScopedComPtr<IPropertyStore> properties; - hr = audio_device->OpenPropertyStore(STGM_READ, properties.Receive()); - if (SUCCEEDED(hr)) { - base::win::ScopedPropVariant friendly_name; - hr = properties->GetValue(PKEY_Device_FriendlyName, - friendly_name.Receive()); - if (SUCCEEDED(hr) && friendly_name.get().vt == VT_LPWSTR) { - if (friendly_name.get().pwszVal) - name = WideToUTF8(friendly_name.get().pwszVal); - } + if (use_event) { + hr = client->SetEventHandle(event_handle); + if (FAILED(hr)) { + DVLOG(1) << "IAudioClient::SetEventHandle: " << std::hex << hr; + return hr; } } - return name; + + UINT32 buffer_size_in_frames = 0; + hr = client->GetBufferSize(&buffer_size_in_frames); + if (FAILED(hr)) { + DVLOG(1) << "IAudioClient::GetBufferSize: " << std::hex << hr; + return hr; + } + + *endpoint_buffer_size = static_cast<size_t>(buffer_size_in_frames); + DVLOG(2) << "endpoint buffer size: " << buffer_size_in_frames; + return hr; } } // namespace media diff --git a/media/audio/win/audio_low_latency_output_win.h b/media/audio/win/audio_low_latency_output_win.h index 4cfe556..a50b6c3 100644 --- a/media/audio/win/audio_low_latency_output_win.h +++ b/media/audio/win/audio_low_latency_output_win.h @@ -21,17 +21,10 @@ // render thread and at the same time stops audio streaming. // - The same thread that called stop will call Close() where we cleanup // and notify the audio manager, which likely will destroy this object. -// - Initial tests on Windows 7 shows that this implementation results in a -// latency of approximately 35 ms if the selected packet size is less than -// or equal to 20 ms. Using a packet size of 10 ms does not result in a -// lower latency but only affects the size of the data buffer in each -// OnMoreData() callback. // - A total typical delay of 35 ms contains three parts: // o Audio endpoint device period (~10 ms). // o Stream latency between the buffer and endpoint device (~5 ms). // o Endpoint buffer (~20 ms to ensure glitch-free rendering). -// - Note that, if the user selects a packet size of e.g. 100 ms, the total -// delay will be approximately 115 ms (10 + 5 + 100). // // Implementation notes: // @@ -39,18 +32,11 @@ // - This implementation is single-threaded, hence: // o Construction and destruction must take place from the same thread. // o All APIs must be called from the creating thread as well. -// - It is recommended to first acquire the native sample rate of the default -// input device and then use the same rate when creating this object. Use -// WASAPIAudioOutputStream::HardwareSampleRate() to retrieve the sample rate. +// - It is required to first acquire the native audio parameters of the default +// output device and then use the same rate when creating this object. Use +// e.g. WASAPIAudioOutputStream::HardwareSampleRate() to retrieve the sample +// rate. Open() will fail unless "perfect" audio parameters are utilized. // - Calling Close() also leads to self destruction. -// - Stream switching is not supported if the user shifts the audio device -// after Open() is called but before Start() has been called. -// - Stream switching can fail if streaming starts on one device with a -// supported format (X) and the new default device - to which we would like -// to switch - uses another format (Y), which is not supported given the -// configured audio parameters. -// - The audio device must be opened with the same number of channels as it -// supports natively (see HardwareChannelCount()) otherwise Open() will fail. // - Support for 8-bit audio has not yet been verified and tested. // // Core Audio API details: @@ -164,7 +150,7 @@ class MEDIA_EXPORT WASAPIAudioOutputStream : // Retrieves the sample rate the audio engine uses for its internal // processing/mixing of shared-mode streams for the default endpoint device. - static int HardwareSampleRate(ERole device_role); + static int HardwareSampleRate(); // Returns AUDCLNT_SHAREMODE_EXCLUSIVE if --enable-exclusive-mode is used // as command-line flag and AUDCLNT_SHAREMODE_SHARED otherwise (default). @@ -172,10 +158,6 @@ class MEDIA_EXPORT WASAPIAudioOutputStream : bool started() const { return render_thread_.get() != NULL; } - // Returns the number of channels the audio engine uses for its internal - // processing/mixing of shared-mode streams for the default endpoint device. - int GetEndpointChannelCountForTesting() { return format_.Format.nChannels; } - private: // DelegateSimpleThread::Delegate implementation. virtual void Run() OVERRIDE; @@ -183,22 +165,13 @@ class MEDIA_EXPORT WASAPIAudioOutputStream : // Issues the OnError() callback to the |sink_|. void HandleError(HRESULT err); - // The Open() method is divided into these sub methods. - HRESULT SetRenderDevice(); - HRESULT ActivateRenderDevice(); - bool DesiredFormatIsSupported(); - HRESULT InitializeAudioEngine(); - - // Called when the device will be opened in shared mode and use the - // internal audio engine's mix format. - HRESULT SharedModeInitialization(); - // Called when the device will be opened in exclusive mode and use the // application specified format. - HRESULT ExclusiveModeInitialization(); - - // Converts unique endpoint ID to user-friendly device name. - std::string GetDeviceName(LPCWSTR device_id) const; + // TODO(henrika): rewrite and move to CoreAudioUtil when removing flag + // for exclusive audio mode. + HRESULT ExclusiveModeInitialization(IAudioClient* client, + HANDLE event_handle, + size_t* endpoint_buffer_size); // Contains the thread ID of the creating thread. base::PlatformThreadId creating_thread_id_; @@ -215,25 +188,17 @@ class MEDIA_EXPORT WASAPIAudioOutputStream : // Use this for multiple channel and hi-resolution PCM data. WAVEFORMATPCMEX format_; - // Copy of the audio format which we know the audio engine supports. - // It is recommended to ensure that the sample rate in |format_| is identical - // to the sample rate in |audio_engine_mix_format_|. - base::win::ScopedCoMem<WAVEFORMATPCMEX> audio_engine_mix_format_; - + // Set to true when stream is successfully opened. bool opened_; - // Set to true as soon as a new default device is detected, and cleared when - // the streaming has switched from using the old device to the new device. - // All additional device detections during an active state are ignored to - // ensure that the ongoing switch can finalize without disruptions. - bool restart_rendering_mode_; + // We check if the input audio parameters are identical (bit depth is + // excluded) to the preferred (native) audio parameters during construction. + // Open() will fail if |audio_parmeters_are_valid_| is false. + bool audio_parmeters_are_valid_; // Volume level from 0 to 1. float volume_; - // Size in bytes of each audio frame (4 bytes for 16-bit stereo PCM). - size_t frame_size_; - // Size in audio frames of each audio packet where an audio packet // is defined as the block of data which the source is expected to deliver // in each OnMoreData() callback. @@ -256,11 +221,6 @@ class MEDIA_EXPORT WASAPIAudioOutputStream : // where AUDCLNT_SHAREMODE_SHARED is the default. AUDCLNT_SHAREMODE share_mode_; - // The channel count set by the client in |params| which is provided to the - // constructor. The client must feed the AudioSourceCallback::OnMoreData() - // callback with PCM-data that contains this number of channels. - int client_channel_count_; - // Counts the number of audio frames written to the endpoint buffer. UINT64 num_written_frames_; @@ -270,9 +230,6 @@ class MEDIA_EXPORT WASAPIAudioOutputStream : // An IMMDeviceEnumerator interface which represents a device enumerator. base::win::ScopedComPtr<IMMDeviceEnumerator> device_enumerator_; - // An IMMDevice interface which represents an audio endpoint device. - base::win::ScopedComPtr<IMMDevice> endpoint_device_; - // An IAudioClient interface which enables a client to create and initialize // an audio stream between an audio application and the audio engine. base::win::ScopedComPtr<IAudioClient> audio_client_; diff --git a/media/audio/win/audio_low_latency_output_win_unittest.cc b/media/audio/win/audio_low_latency_output_win_unittest.cc index 9836c09..b4efbb5 100644 --- a/media/audio/win/audio_low_latency_output_win_unittest.cc +++ b/media/audio/win/audio_low_latency_output_win_unittest.cc @@ -28,6 +28,7 @@ using ::testing::_; using ::testing::AnyNumber; +using ::testing::AtLeast; using ::testing::Between; using ::testing::CreateFunctor; using ::testing::DoAll; @@ -44,7 +45,6 @@ static const char kSpeechFile_16b_s_44k[] = "speech_16b_stereo_44kHz.raw"; static const size_t kFileDurationMs = 20000; static const size_t kNumFileSegments = 2; static const int kBitsPerSample = 16; -static const ChannelLayout kChannelLayout = CHANNEL_LAYOUT_STEREO; static const size_t kMaxDeltaSamples = 1000; static const char* kDeltaTimeMsFileName = "delta_times_ms.txt"; @@ -52,7 +52,7 @@ MATCHER_P(HasValidDelay, value, "") { // It is difficult to come up with a perfect test condition for the delay // estimation. For now, verify that the produced output delay is always // larger than the selected buffer size. - return arg.hardware_delay_bytes > value.hardware_delay_bytes; + return arg.hardware_delay_bytes >= value.hardware_delay_bytes; } // Used to terminate a loop from a different thread than the loop belongs to. @@ -181,11 +181,6 @@ static bool CanRunAudioTests(AudioManager* audio_man) { return false; } - if (WASAPIAudioOutputStream::HardwareChannelLayout() != kChannelLayout) { - LOG(WARNING) << "This test requires stereo audio output."; - return false; - } - return true; } @@ -194,16 +189,15 @@ static bool CanRunAudioTests(AudioManager* audio_man) { class AudioOutputStreamWrapper { public: explicit AudioOutputStreamWrapper(AudioManager* audio_manager) - : com_init_(ScopedCOMInitializer::kMTA), - audio_man_(audio_manager), + : audio_man_(audio_manager), format_(AudioParameters::AUDIO_PCM_LOW_LATENCY), - channel_layout_(kChannelLayout), bits_per_sample_(kBitsPerSample) { - // Use native/mixing sample rate and 10ms frame size as default. - sample_rate_ = static_cast<int>( - WASAPIAudioOutputStream::HardwareSampleRate(eConsole)); - samples_per_packet_ = sample_rate_ / 100; - DCHECK(sample_rate_); + AudioParameters preferred_params; + EXPECT_TRUE(SUCCEEDED(CoreAudioUtil::GetPreferredAudioParameters( + eRender, eConsole, &preferred_params))); + channel_layout_ = preferred_params.channel_layout(); + sample_rate_ = preferred_params.sample_rate(); + samples_per_packet_ = preferred_params.frames_per_buffer(); } ~AudioOutputStreamWrapper() {} @@ -243,7 +237,6 @@ class AudioOutputStreamWrapper { return aos; } - ScopedCOMInitializer com_init_; AudioManager* audio_man_; AudioParameters::Format format_; ChannelLayout channel_layout_; @@ -261,9 +254,7 @@ static AudioOutputStream* CreateDefaultAudioOutputStream( } // Verify that we can retrieve the current hardware/mixing sample rate -// for all supported device roles. The ERole enumeration defines constants -// that indicate the role that the system/user has assigned to an audio -// endpoint device. +// for the default audio device. // TODO(henrika): modify this test when we support full device enumeration. TEST(WASAPIAudioOutputStreamTest, HardwareSampleRate) { // Skip this test in exclusive mode since the resulting rate is only utilized @@ -272,22 +263,10 @@ TEST(WASAPIAudioOutputStreamTest, HardwareSampleRate) { if (!CanRunAudioTests(audio_manager.get()) || ExclusiveModeIsEnabled()) return; - ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA); - // Default device intended for games, system notification sounds, // and voice commands. int fs = static_cast<int>( - WASAPIAudioOutputStream::HardwareSampleRate(eConsole)); - EXPECT_GE(fs, 0); - - // Default communication device intended for e.g. VoIP communication. - fs = static_cast<int>( - WASAPIAudioOutputStream::HardwareSampleRate(eCommunications)); - EXPECT_GE(fs, 0); - - // Multimedia device for music, movies and live music recording. - fs = static_cast<int>( - WASAPIAudioOutputStream::HardwareSampleRate(eMultimedia)); + WASAPIAudioOutputStream::HardwareSampleRate()); EXPECT_GE(fs, 0); } @@ -300,29 +279,6 @@ TEST(WASAPIAudioOutputStreamTest, CreateAndClose) { aos->Close(); } -// Verify that the created object is configured to use the same number of -// audio channels as is reported by the static HardwareChannelCount() method. -TEST(WASAPIAudioOutputStreamTest, HardwareChannelCount) { - scoped_ptr<AudioManager> audio_manager(AudioManager::Create()); - if (!CanRunAudioTests(audio_manager.get())) - return; - - ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA); - - // First, verify that we can read a valid native/hardware channel-count. - int hardware_channel_count = WASAPIAudioOutputStream::HardwareChannelCount(); - EXPECT_GE(hardware_channel_count, 1); - - AudioOutputStreamWrapper aosw(audio_manager.get()); - WASAPIAudioOutputStream* aos = - static_cast<WASAPIAudioOutputStream*>(aosw.Create()); - - // Next, ensure that the created output stream object is really using the - // hardware channel-count. - EXPECT_EQ(hardware_channel_count, aos->GetEndpointChannelCountForTesting()); - aos->Close(); -} - // Test Open(), Close() calling sequence. TEST(WASAPIAudioOutputStreamTest, OpenAndClose) { scoped_ptr<AudioManager> audio_manager(AudioManager::Create()); @@ -439,8 +395,8 @@ TEST(WASAPIAudioOutputStreamTest, MiscCallingSequences) { aos->Close(); } -// Use default packet size (10ms) and verify that rendering starts. -TEST(WASAPIAudioOutputStreamTest, PacketSizeInMilliseconds) { +// Use preferred packet size and verify that rendering starts. +TEST(WASAPIAudioOutputStreamTest, ValidPacketSize) { scoped_ptr<AudioManager> audio_manager(AudioManager::Create()); if (!CanRunAudioTests(audio_manager.get())) return; @@ -475,42 +431,24 @@ TEST(WASAPIAudioOutputStreamTest, PacketSizeInMilliseconds) { aos->Close(); } -// Use a fixed packets size (independent of sample rate) and verify -// that rendering starts. -TEST(WASAPIAudioOutputStreamTest, PacketSizeInSamples) { +// Use a non-preferred packet size and verify that Open() fails. +TEST(WASAPIAudioOutputStreamTest, InvalidPacketSize) { scoped_ptr<AudioManager> audio_manager(AudioManager::Create()); if (!CanRunAudioTests(audio_manager.get())) return; - MessageLoopForUI loop; - MockAudioSourceCallback source; - - // Create default WASAPI output stream which reads data in stereo using - // the native mixing rate and channel count. The buffer size is set to - // 1024 samples. - AudioOutputStreamWrapper aosw(audio_manager.get()); - AudioOutputStream* aos = aosw.Create(1024); - EXPECT_TRUE(aos->Open()); - - // Derive the expected size in bytes of each packet. - uint32 bytes_per_packet = aosw.channels() * aosw.samples_per_packet() * - (aosw.bits_per_sample() / 8); + if (ExclusiveModeIsEnabled()) + return; - // Set up expected minimum delay estimation. - AudioBuffersState state(0, bytes_per_packet); + AudioParameters preferred_params; + EXPECT_TRUE(SUCCEEDED(CoreAudioUtil::GetPreferredAudioParameters( + eRender, eConsole, &preferred_params))); + int too_large_packet_size = 2 * preferred_params.frames_per_buffer(); - // Ensure that callbacks start correctly. - EXPECT_CALL(source, OnMoreData(NotNull(), HasValidDelay(state))) - .WillOnce(DoAll( - QuitLoop(loop.message_loop_proxy()), - Return(aosw.samples_per_packet()))) - .WillRepeatedly(Return(aosw.samples_per_packet())); + AudioOutputStreamWrapper aosw(audio_manager.get()); + AudioOutputStream* aos = aosw.Create(too_large_packet_size); + EXPECT_FALSE(aos->Open()); - aos->Start(&source); - loop.PostDelayedTask(FROM_HERE, MessageLoop::QuitClosure(), - TestTimeouts::action_timeout()); - loop.Run(); - aos->Stop(); aos->Close(); } @@ -704,7 +642,7 @@ TEST(WASAPIAudioOutputStreamTest, ExclusiveModeMinBufferSizeAt48kHz) { // Set up expected minimum delay estimation. AudioBuffersState state(0, bytes_per_packet); - // Wait for the first callback and verify its parameters. + // Wait for the first callback and verify its parameters. EXPECT_CALL(source, OnMoreData(NotNull(), HasValidDelay(state))) .WillOnce(DoAll( QuitLoop(loop.message_loop_proxy()), diff --git a/media/audio/win/audio_unified_win.cc b/media/audio/win/audio_unified_win.cc index 677f9e0..f3eb193 100644 --- a/media/audio/win/audio_unified_win.cc +++ b/media/audio/win/audio_unified_win.cc @@ -9,6 +9,7 @@ #include "base/debug/trace_event.h" #include "base/time.h" #include "base/win/scoped_com_initializer.h" +#include "media/audio/audio_util.h" #include "media/audio/win/audio_manager_win.h" #include "media/audio/win/avrt_wrapper_win.h" #include "media/audio/win/core_audio_util_win.h" @@ -76,6 +77,7 @@ WASAPIUnifiedStream::WASAPIUnifiedStream(AudioManagerWin* manager, share_mode_(CoreAudioUtil::GetShareMode()), audio_io_thread_(NULL), opened_(false), + volume_(1.0), endpoint_render_buffer_size_frames_(0), endpoint_capture_buffer_size_frames_(0), num_written_frames_(0), @@ -246,12 +248,15 @@ void WASAPIUnifiedStream::Start(AudioSourceCallback* callback) { return; } - // Reset the counter for number of rendered frames taking into account the - // fact that we always initialize the render side with silence. - UINT32 num_queued_frames = 0; - audio_output_client_->GetCurrentPadding(&num_queued_frames); - DCHECK_EQ(num_queued_frames, endpoint_render_buffer_size_frames_); - num_written_frames_ = num_queued_frames; + // Ensure that the endpoint buffer is prepared with silence. + if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { + if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence( + audio_output_client_, audio_render_client_)) { + DLOG(WARNING) << "Failed to prepare endpoint buffers with silence."; + return; + } + } + num_written_frames_ = endpoint_render_buffer_size_frames_; // Start output streaming data between the endpoint buffer and the audio // engine. @@ -328,11 +333,15 @@ void WASAPIUnifiedStream::Close() { } void WASAPIUnifiedStream::SetVolume(double volume) { - NOTIMPLEMENTED(); + DVLOG(1) << "SetVolume(volume=" << volume << ")"; + if (volume < 0 || volume > 1) + return; + volume_ = volume; } void WASAPIUnifiedStream::GetVolume(double* volume) { - NOTIMPLEMENTED(); + DVLOG(1) << "GetVolume()"; + *volume = static_cast<double>(volume_); } // static @@ -523,6 +532,13 @@ void WASAPIUnifiedStream::Run() { render_bus_->ToInterleaved( packet_size_frames_, bytes_per_sample, audio_data); + // Perform in-place, software-volume adjustments. + media::AdjustVolume(audio_data, + frames_filled * format_.Format.nBlockAlign, + render_bus_->channels(), + bytes_per_sample, + volume_); + // Release the buffer space acquired in the GetBuffer() call. audio_render_client_->ReleaseBuffer(packet_size_frames_, 0); DLOG_IF(ERROR, FAILED(hr)) << "Failed to release render buffer"; diff --git a/media/audio/win/audio_unified_win.h b/media/audio/win/audio_unified_win.h index 0e8e829..3f7cd68 100644 --- a/media/audio/win/audio_unified_win.h +++ b/media/audio/win/audio_unified_win.h @@ -117,6 +117,9 @@ class MEDIA_EXPORT WASAPIUnifiedStream // True when successfully opened. bool opened_; + // Volume level from 0 to 1. + double volume_; + // Size in bytes of each audio frame (4 bytes for 16-bit stereo PCM). size_t frame_size_; diff --git a/media/audio/win/core_audio_util_win.cc b/media/audio/win/core_audio_util_win.cc index 54ff590..027fbca 100644 --- a/media/audio/win/core_audio_util_win.cc +++ b/media/audio/win/core_audio_util_win.cc @@ -106,7 +106,7 @@ AUDCLNT_SHAREMODE CoreAudioUtil::GetShareMode() { } int CoreAudioUtil::NumberOfActiveDevices(EDataFlow data_flow) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); // Create the IMMDeviceEnumerator interface. ScopedComPtr<IMMDeviceEnumerator> device_enumerator = CreateDeviceEnumerator(); @@ -134,7 +134,7 @@ int CoreAudioUtil::NumberOfActiveDevices(EDataFlow data_flow) { } ScopedComPtr<IMMDeviceEnumerator> CoreAudioUtil::CreateDeviceEnumerator() { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); ScopedComPtr<IMMDeviceEnumerator> device_enumerator; HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, @@ -149,7 +149,7 @@ ScopedComPtr<IMMDeviceEnumerator> CoreAudioUtil::CreateDeviceEnumerator() { ScopedComPtr<IMMDevice> CoreAudioUtil::CreateDefaultDevice(EDataFlow data_flow, ERole role) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); ScopedComPtr<IMMDevice> endpoint_device; // Create the IMMDeviceEnumerator interface. @@ -184,7 +184,7 @@ ScopedComPtr<IMMDevice> CoreAudioUtil::CreateDefaultDevice(EDataFlow data_flow, ScopedComPtr<IMMDevice> CoreAudioUtil::CreateDevice( const std::string& device_id) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); ScopedComPtr<IMMDevice> endpoint_device; // Create the IMMDeviceEnumerator interface. @@ -203,7 +203,7 @@ ScopedComPtr<IMMDevice> CoreAudioUtil::CreateDevice( } HRESULT CoreAudioUtil::GetDeviceName(IMMDevice* device, AudioDeviceName* name) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); // Retrieve unique name of endpoint device. // Example: "{0.0.1.00000000}.{8db6020f-18e3-4f25-b6f5-7726c9122574}". @@ -238,7 +238,7 @@ HRESULT CoreAudioUtil::GetDeviceName(IMMDevice* device, AudioDeviceName* name) { } std::string CoreAudioUtil::GetFriendlyName(const std::string& device_id) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); ScopedComPtr<IMMDevice> audio_device = CreateDevice(device_id); if (!audio_device) return std::string(); @@ -254,7 +254,7 @@ std::string CoreAudioUtil::GetFriendlyName(const std::string& device_id) { bool CoreAudioUtil::DeviceIsDefault(EDataFlow flow, ERole role, std::string device_id) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); ScopedComPtr<IMMDevice> device = CreateDefaultDevice(flow, role); if (!device) return false; @@ -272,7 +272,7 @@ bool CoreAudioUtil::DeviceIsDefault(EDataFlow flow, } EDataFlow CoreAudioUtil::GetDataFlow(IMMDevice* device) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); ScopedComPtr<IMMEndpoint> endpoint; HRESULT hr = device->QueryInterface(endpoint.Receive()); if (FAILED(hr)) { @@ -291,7 +291,7 @@ EDataFlow CoreAudioUtil::GetDataFlow(IMMDevice* device) { ScopedComPtr<IAudioClient> CoreAudioUtil::CreateClient( IMMDevice* audio_device) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); // Creates and activates an IAudioClient COM object given the selected // endpoint device. @@ -306,7 +306,7 @@ ScopedComPtr<IAudioClient> CoreAudioUtil::CreateClient( ScopedComPtr<IAudioClient> CoreAudioUtil::CreateDefaultClient( EDataFlow data_flow, ERole role) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); ScopedComPtr<IMMDevice> default_device(CreateDefaultDevice(data_flow, role)); return (default_device ? CreateClient(default_device) : ScopedComPtr<IAudioClient>()); @@ -314,7 +314,7 @@ ScopedComPtr<IAudioClient> CoreAudioUtil::CreateDefaultClient( HRESULT CoreAudioUtil::GetSharedModeMixFormat( IAudioClient* client, WAVEFORMATPCMEX* format) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); ScopedCoMem<WAVEFORMATPCMEX> format_pcmex; HRESULT hr = client->GetMixFormat( reinterpret_cast<WAVEFORMATEX**>(&format_pcmex)); @@ -339,10 +339,22 @@ HRESULT CoreAudioUtil::GetSharedModeMixFormat( return hr; } +HRESULT CoreAudioUtil::GetDefaultSharedModeMixFormat( + EDataFlow data_flow, ERole role, WAVEFORMATPCMEX* format) { + DCHECK(IsSupported()); + ScopedComPtr<IAudioClient> client(CreateDefaultClient(data_flow, role)); + if (!client) { + // Map NULL-pointer to new error code which can be different from the + // actual error code. The exact value is not important here. + return AUDCLNT_E_ENDPOINT_CREATE_FAILED; + } + return CoreAudioUtil::GetSharedModeMixFormat(client, format); +} + bool CoreAudioUtil::IsFormatSupported(IAudioClient* client, AUDCLNT_SHAREMODE share_mode, const WAVEFORMATPCMEX* format) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); ScopedCoMem<WAVEFORMATEXTENSIBLE> closest_match; HRESULT hr = client->IsFormatSupported( share_mode, reinterpret_cast<const WAVEFORMATEX*>(format), @@ -366,7 +378,7 @@ bool CoreAudioUtil::IsFormatSupported(IAudioClient* client, HRESULT CoreAudioUtil::GetDevicePeriod(IAudioClient* client, AUDCLNT_SHAREMODE share_mode, REFERENCE_TIME* device_period) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); // Get the period of the engine thread. REFERENCE_TIME default_period = 0; @@ -385,9 +397,9 @@ HRESULT CoreAudioUtil::GetDevicePeriod(IAudioClient* client, HRESULT CoreAudioUtil::GetPreferredAudioParameters( IAudioClient* client, AudioParameters* params) { - DCHECK(CoreAudioUtil::IsSupported()); - WAVEFORMATPCMEX format; - HRESULT hr = GetSharedModeMixFormat(client, &format); + DCHECK(IsSupported()); + WAVEFORMATPCMEX mix_format; + HRESULT hr = GetSharedModeMixFormat(client, &mix_format); if (FAILED(hr)) return hr; @@ -404,25 +416,30 @@ HRESULT CoreAudioUtil::GetPreferredAudioParameters( // speaker, and so on, continuing in the order defined in KsMedia.h. // See http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083.aspx // for more details. - ChannelConfig channel_config = format.dwChannelMask; + ChannelConfig channel_config = mix_format.dwChannelMask; // Convert Microsoft's channel configuration to genric ChannelLayout. ChannelLayout channel_layout = ChannelConfigToChannelLayout(channel_config); - // Store preferred sample rate and buffer size. - int sample_rate = format.Format.nSamplesPerSec; - int frames_per_buffer = static_cast<int>(sample_rate * - RefererenceTimeToTimeDelta(default_period).InSecondsF() + 0.5); + // Preferred sample rate. + int sample_rate = mix_format.Format.nSamplesPerSec; // TODO(henrika): possibly use format.Format.wBitsPerSample here instead. // We use a hard-coded value of 16 bits per sample today even if most audio // engines does the actual mixing in 32 bits per sample. int bits_per_sample = 16; - DVLOG(2) << "channel_layout : " << channel_layout; - DVLOG(2) << "sample_rate : " << sample_rate; - DVLOG(2) << "bits_per_sample : " << bits_per_sample; - DVLOG(2) << "frames_per_buffer: " << frames_per_buffer; + // We are using the native device period to derive the smallest possible + // buffer size in shared mode. Note that the actual endpoint buffer will be + // larger than this size but it will be possible to fill it up in two calls. + // TODO(henrika): ensure that this scheme works for capturing as well. + int frames_per_buffer = static_cast<int>(sample_rate * + RefererenceTimeToTimeDelta(default_period).InSecondsF() + 0.5); + + DVLOG(1) << "channel_layout : " << channel_layout; + DVLOG(1) << "sample_rate : " << sample_rate; + DVLOG(1) << "bits_per_sample : " << bits_per_sample; + DVLOG(1) << "frames_per_buffer: " << frames_per_buffer; AudioParameters audio_params(AudioParameters::AUDIO_PCM_LOW_LATENCY, channel_layout, @@ -436,9 +453,8 @@ HRESULT CoreAudioUtil::GetPreferredAudioParameters( HRESULT CoreAudioUtil::GetPreferredAudioParameters( EDataFlow data_flow, ERole role, AudioParameters* params) { - DCHECK(CoreAudioUtil::IsSupported()); - - ScopedComPtr<IAudioClient> client = CreateDefaultClient(data_flow, role); + DCHECK(IsSupported()); + ScopedComPtr<IAudioClient> client(CreateDefaultClient(data_flow, role)); if (!client) { // Map NULL-pointer to new error code which can be different from the // actual error code. The exact value is not important here. @@ -451,8 +467,7 @@ HRESULT CoreAudioUtil::SharedModeInitialize(IAudioClient* client, const WAVEFORMATPCMEX* format, HANDLE event_handle, size_t* endpoint_buffer_size) { - DCHECK(CoreAudioUtil::IsSupported()); - + DCHECK(IsSupported()); DWORD stream_flags = AUDCLNT_STREAMFLAGS_NOPERSIST; // Enable event-driven streaming if a valid event handle is provided. @@ -506,7 +521,7 @@ HRESULT CoreAudioUtil::SharedModeInitialize(IAudioClient* client, ScopedComPtr<IAudioRenderClient> CoreAudioUtil::CreateRenderClient( IAudioClient* client) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); // Get access to the IAudioRenderClient interface. This interface // enables us to write output data to a rendering endpoint buffer. @@ -517,40 +532,12 @@ ScopedComPtr<IAudioRenderClient> CoreAudioUtil::CreateRenderClient( DVLOG(1) << "IAudioClient::GetService: " << std::hex << hr; return ScopedComPtr<IAudioRenderClient>(); } - - // TODO(henrika): verify that this scheme is the same for shared mode and - // exclusive mode streams. - - // Avoid start-up glitches by filling up the endpoint buffer with "silence" - // before starting the stream. - UINT32 endpoint_buffer_size = 0; - hr = client->GetBufferSize(&endpoint_buffer_size); - DVLOG_IF(1, FAILED(hr)) << "IAudioClient::GetBufferSize: " << std::hex << hr; - - BYTE* data = NULL; - hr = audio_render_client->GetBuffer(endpoint_buffer_size, &data); - DVLOG_IF(1, FAILED(hr)) << "IAudioRenderClient::GetBuffer: " - << std::hex << hr; - if (SUCCEEDED(hr)) { - // Using the AUDCLNT_BUFFERFLAGS_SILENT flag eliminates the need to - // explicitly write silence data to the rendering buffer. - hr = audio_render_client->ReleaseBuffer(endpoint_buffer_size, - AUDCLNT_BUFFERFLAGS_SILENT); - DVLOG_IF(1, FAILED(hr)) << "IAudioRenderClient::ReleaseBuffer: " - << std::hex << hr; - } - - // Sanity check: verify that the endpoint buffer is filled with silence. - UINT32 num_queued_frames = 0; - client->GetCurrentPadding(&num_queued_frames); - DCHECK(num_queued_frames == endpoint_buffer_size); - return audio_render_client; } ScopedComPtr<IAudioCaptureClient> CoreAudioUtil::CreateCaptureClient( IAudioClient* client) { - DCHECK(CoreAudioUtil::IsSupported()); + DCHECK(IsSupported()); // Get access to the IAudioCaptureClient interface. This interface // enables us to read input data from a capturing endpoint buffer. @@ -564,4 +551,28 @@ ScopedComPtr<IAudioCaptureClient> CoreAudioUtil::CreateCaptureClient( return audio_capture_client; } +bool CoreAudioUtil::FillRenderEndpointBufferWithSilence( + IAudioClient* client, IAudioRenderClient* render_client) { + DCHECK(IsSupported()); + + UINT32 endpoint_buffer_size = 0; + if (FAILED(client->GetBufferSize(&endpoint_buffer_size))) + return false; + + UINT32 num_queued_frames = 0; + if (FAILED(client->GetCurrentPadding(&num_queued_frames))) + return false; + + BYTE* data = NULL; + int num_frames_to_fill = endpoint_buffer_size - num_queued_frames; + if (FAILED(render_client->GetBuffer(num_frames_to_fill, &data))) + return false; + + // Using the AUDCLNT_BUFFERFLAGS_SILENT flag eliminates the need to + // explicitly write silence data to the rendering buffer. + DVLOG(2) << "filling up " << num_frames_to_fill << " frames with silence"; + return SUCCEEDED(render_client->ReleaseBuffer(num_frames_to_fill, + AUDCLNT_BUFFERFLAGS_SILENT)); +} + } // namespace media diff --git a/media/audio/win/core_audio_util_win.h b/media/audio/win/core_audio_util_win.h index c8a37d6..6dbdbfb 100644 --- a/media/audio/win/core_audio_util_win.h +++ b/media/audio/win/core_audio_util_win.h @@ -104,6 +104,13 @@ class MEDIA_EXPORT CoreAudioUtil { static HRESULT GetSharedModeMixFormat(IAudioClient* client, WAVEFORMATPCMEX* format); + // Get the mix format that the audio engine uses internally for processing + // of shared-mode streams using the default IMMDevice where flow direction + // and role is define by |data_flow| and |role|. + static HRESULT GetDefaultSharedModeMixFormat(EDataFlow data_flow, + ERole role, + WAVEFORMATPCMEX* format); + // Returns true if the specified |client| supports the format in |format| // for the given |share_mode| (shared or exclusive). static bool IsFormatSupported(IAudioClient* client, @@ -156,6 +163,12 @@ class MEDIA_EXPORT CoreAudioUtil { static ScopedComPtr<IAudioCaptureClient> CreateCaptureClient( IAudioClient* client); + // Fills up the endpoint rendering buffer with silence for an existing + // IAudioClient given by |client| and a corresponding IAudioRenderClient + // given by |render_client|. + static bool FillRenderEndpointBufferWithSilence( + IAudioClient* client, IAudioRenderClient* render_client); + private: CoreAudioUtil() {} ~CoreAudioUtil() {} diff --git a/media/audio/win/core_audio_util_win_unittest.cc b/media/audio/win/core_audio_util_win_unittest.cc index b1edf47..f3aff48 100644 --- a/media/audio/win/core_audio_util_win_unittest.cc +++ b/media/audio/win/core_audio_util_win_unittest.cc @@ -384,6 +384,43 @@ TEST_F(CoreAudioUtilWinTest, CreateRenderAndCaptureClients) { } } +TEST_F(CoreAudioUtilWinTest, FillRenderEndpointBufferWithSilence) { + if (!CanRunAudioTest()) + return; + + // Create default clients using the default mixing format for shared mode. + ScopedComPtr<IAudioClient> client( + CoreAudioUtil::CreateDefaultClient(eRender, eConsole)); + EXPECT_TRUE(client); + + WAVEFORMATPCMEX format; + size_t endpoint_buffer_size = 0; + EXPECT_TRUE(SUCCEEDED(CoreAudioUtil::GetSharedModeMixFormat(client, + &format))); + CoreAudioUtil::SharedModeInitialize(client, &format, NULL, + &endpoint_buffer_size); + EXPECT_GT(endpoint_buffer_size, 0u); + + ScopedComPtr<IAudioRenderClient> render_client( + CoreAudioUtil::CreateRenderClient(client)); + EXPECT_TRUE(render_client); + + // The endpoint audio buffer should not be filled up by default after being + // created. + UINT32 num_queued_frames = 0; + client->GetCurrentPadding(&num_queued_frames); + EXPECT_EQ(num_queued_frames, 0u); + + // Fill it up with zeros and verify that the buffer is full. + // It is not possible to verify that the actual data consists of zeros + // since we can't access data that has already been sent to the endpoint + // buffer. + EXPECT_TRUE(CoreAudioUtil::FillRenderEndpointBufferWithSilence( + client, render_client)); + client->GetCurrentPadding(&num_queued_frames); + EXPECT_EQ(num_queued_frames, endpoint_buffer_size); +} + // } // namespace media |