1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
|
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/audio/win/audio_unified_win.h"
#include <Functiondiscoverykeys_devpkey.h>
#include "base/debug/trace_event.h"
#ifndef NDEBUG
#include "base/file_util.h"
#include "base/path_service.h"
#endif
#include "base/time/time.h"
#include "base/win/scoped_com_initializer.h"
#include "media/audio/win/audio_manager_win.h"
#include "media/audio/win/avrt_wrapper_win.h"
#include "media/audio/win/core_audio_util_win.h"
using base::win::ScopedComPtr;
using base::win::ScopedCOMInitializer;
using base::win::ScopedCoMem;
// Smoothing factor in exponential smoothing filter where 0 < alpha < 1.
// Larger values of alpha reduce the level of smoothing.
// See http://en.wikipedia.org/wiki/Exponential_smoothing for details.
static const double kAlpha = 0.1;
// Compute a rate compensation which always attracts us back to a specified
// target level over a period of |kCorrectionTimeSeconds|.
static const double kCorrectionTimeSeconds = 0.1;
#ifndef NDEBUG
// Max number of columns in the output text file |kUnifiedAudioDebugFileName|.
// See LogElementNames enumerator for details on what each column represents.
static const size_t kMaxNumSampleTypes = 4;
static const size_t kMaxNumParams = 2;
// Max number of rows in the output file |kUnifiedAudioDebugFileName|.
// Each row corresponds to one set of sample values for (approximately) the
// same time instant (stored in the first column).
static const size_t kMaxFileSamples = 10000;
// Name of output debug file used for off-line analysis of measurements which
// can be utilized for performance tuning of this class.
static const char kUnifiedAudioDebugFileName[] = "unified_win_debug.txt";
// Name of output debug file used for off-line analysis of measurements.
// This file will contain a list of audio parameters.
static const char kUnifiedAudioParamsFileName[] = "unified_win_params.txt";
#endif
typedef uint32 ChannelConfig;
// Retrieves an integer mask which corresponds to the channel layout the
// audio engine uses for its internal processing/mixing of shared-mode
// streams. This mask indicates which channels are present in the multi-
// channel stream. The least significant bit corresponds with the Front Left
// speaker, the next least significant bit corresponds to the Front Right
// speaker, and so on, continuing in the order defined in KsMedia.h.
// See http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx
// for more details.
static ChannelConfig GetChannelConfig(EDataFlow data_flow) {
WAVEFORMATPCMEX format;
return SUCCEEDED(media::CoreAudioUtil::GetDefaultSharedModeMixFormat(
data_flow, eConsole, &format)) ?
static_cast<int>(format.dwChannelMask) : 0;
}
// Use the acquired IAudioClock interface to derive a time stamp of the audio
// sample which is currently playing through the speakers.
static double SpeakerStreamPosInMilliseconds(IAudioClock* clock) {
UINT64 device_frequency = 0, position = 0;
if (FAILED(clock->GetFrequency(&device_frequency)) ||
FAILED(clock->GetPosition(&position, NULL))) {
return 0.0;
}
return base::Time::kMillisecondsPerSecond *
(static_cast<double>(position) / device_frequency);
}
// Get a time stamp in milliseconds given number of audio frames in |num_frames|
// using the current sample rate |fs| as scale factor.
// Example: |num_frames| = 960 and |fs| = 48000 => 20 [ms].
static double CurrentStreamPosInMilliseconds(UINT64 num_frames, DWORD fs) {
return base::Time::kMillisecondsPerSecond *
(static_cast<double>(num_frames) / fs);
}
// Convert a timestamp in milliseconds to byte units given the audio format
// in |format|.
// Example: |ts_milliseconds| equals 10, sample rate is 48000 and frame size
// is 4 bytes per audio frame => 480 * 4 = 1920 [bytes].
static int MillisecondsToBytes(double ts_milliseconds,
const WAVEFORMATPCMEX& format) {
double seconds = ts_milliseconds / base::Time::kMillisecondsPerSecond;
return static_cast<int>(seconds * format.Format.nSamplesPerSec *
format.Format.nBlockAlign + 0.5);
}
// Convert frame count to milliseconds given the audio format in |format|.
static double FrameCountToMilliseconds(int num_frames,
const WAVEFORMATPCMEX& format) {
return (base::Time::kMillisecondsPerSecond * num_frames) /
static_cast<double>(format.Format.nSamplesPerSec);
}
namespace media {
WASAPIUnifiedStream::WASAPIUnifiedStream(AudioManagerWin* manager,
const AudioParameters& params,
const std::string& input_device_id)
: creating_thread_id_(base::PlatformThread::CurrentId()),
manager_(manager),
params_(params),
input_channels_(params.input_channels()),
output_channels_(params.channels()),
input_device_id_(input_device_id),
share_mode_(CoreAudioUtil::GetShareMode()),
opened_(false),
volume_(1.0),
output_buffer_size_frames_(0),
input_buffer_size_frames_(0),
endpoint_render_buffer_size_frames_(0),
endpoint_capture_buffer_size_frames_(0),
num_written_frames_(0),
total_delay_ms_(0.0),
total_delay_bytes_(0),
source_(NULL),
input_callback_received_(false),
io_sample_rate_ratio_(1),
target_fifo_frames_(0),
average_delta_(0),
fifo_rate_compensation_(1),
update_output_delay_(false),
capture_delay_ms_(0) {
TRACE_EVENT0("audio", "WASAPIUnifiedStream::WASAPIUnifiedStream");
VLOG(1) << "WASAPIUnifiedStream::WASAPIUnifiedStream()";
DCHECK(manager_);
VLOG(1) << "Input channels : " << input_channels_;
VLOG(1) << "Output channels: " << output_channels_;
VLOG(1) << "Sample rate : " << params_.sample_rate();
VLOG(1) << "Buffer size : " << params.frames_per_buffer();
#ifndef NDEBUG
input_time_stamps_.reset(new int64[kMaxFileSamples]);
num_frames_in_fifo_.reset(new int[kMaxFileSamples]);
resampler_margin_.reset(new int[kMaxFileSamples]);
fifo_rate_comps_.reset(new double[kMaxFileSamples]);
num_elements_.reset(new int[kMaxNumSampleTypes]);
std::fill(num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes, 0);
input_params_.reset(new int[kMaxNumParams]);
output_params_.reset(new int[kMaxNumParams]);
#endif
DVLOG_IF(1, share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE)
<< "Core Audio (WASAPI) EXCLUSIVE MODE is enabled.";
// Load the Avrt DLL if not already loaded. Required to support MMCSS.
bool avrt_init = avrt::Initialize();
DCHECK(avrt_init) << "Failed to load the avrt.dll";
// All events are auto-reset events and non-signaled initially.
// Create the event which the audio engine will signal each time a buffer
// has been recorded.
capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
// Create the event which will be set in Stop() when straeming shall stop.
stop_streaming_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
}
WASAPIUnifiedStream::~WASAPIUnifiedStream() {
VLOG(1) << "WASAPIUnifiedStream::~WASAPIUnifiedStream()";
#ifndef NDEBUG
base::FilePath data_file_name;
PathService::Get(base::DIR_EXE, &data_file_name);
data_file_name = data_file_name.AppendASCII(kUnifiedAudioDebugFileName);
data_file_ = file_util::OpenFile(data_file_name, "wt");
DVLOG(1) << ">> Output file " << data_file_name.value() << " is created.";
size_t n = 0;
size_t elements_to_write = *std::min_element(
num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes);
while (n < elements_to_write) {
fprintf(data_file_, "%I64d %d %d %10.9f\n",
input_time_stamps_[n],
num_frames_in_fifo_[n],
resampler_margin_[n],
fifo_rate_comps_[n]);
++n;
}
file_util::CloseFile(data_file_);
base::FilePath param_file_name;
PathService::Get(base::DIR_EXE, ¶m_file_name);
param_file_name = param_file_name.AppendASCII(kUnifiedAudioParamsFileName);
param_file_ = file_util::OpenFile(param_file_name, "wt");
DVLOG(1) << ">> Output file " << param_file_name.value() << " is created.";
fprintf(param_file_, "%d %d\n", input_params_[0], input_params_[1]);
fprintf(param_file_, "%d %d\n", output_params_[0], output_params_[1]);
file_util::CloseFile(param_file_);
#endif
}
bool WASAPIUnifiedStream::Open() {
TRACE_EVENT0("audio", "WASAPIUnifiedStream::Open");
DVLOG(1) << "WASAPIUnifiedStream::Open()";
DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
if (opened_)
return true;
AudioParameters hw_output_params;
HRESULT hr = CoreAudioUtil::GetPreferredAudioParameters(
eRender, eConsole, &hw_output_params);
if (FAILED(hr)) {
LOG(ERROR) << "Failed to get preferred output audio parameters.";
return false;
}
AudioParameters hw_input_params;
if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) {
// Query native parameters for the default capture device.
hr = CoreAudioUtil::GetPreferredAudioParameters(
eCapture, eConsole, &hw_input_params);
} else {
// Query native parameters for the capture device given by
// |input_device_id_|.
hr = CoreAudioUtil::GetPreferredAudioParameters(
input_device_id_, &hw_input_params);
}
if (FAILED(hr)) {
LOG(ERROR) << "Failed to get preferred input audio parameters.";
return false;
}
// It is currently only possible to open up the output audio device using
// the native number of channels.
if (output_channels_ != hw_output_params.channels()) {
LOG(ERROR) << "Audio device does not support requested output channels.";
return false;
}
// It is currently only possible to open up the input audio device using
// the native number of channels. If the client asks for a higher channel
// count, we will do channel upmixing in this class. The most typical
// example is that the client provides stereo but the hardware can only be
// opened in mono mode. We will do mono to stereo conversion in this case.
if (input_channels_ < hw_input_params.channels()) {
LOG(ERROR) << "Audio device does not support requested input channels.";
return false;
} else if (input_channels_ > hw_input_params.channels()) {
ChannelLayout input_layout =
GuessChannelLayout(hw_input_params.channels());
ChannelLayout output_layout = GuessChannelLayout(input_channels_);
channel_mixer_.reset(new ChannelMixer(input_layout, output_layout));
DVLOG(1) << "Remixing input channel layout from " << input_layout
<< " to " << output_layout << "; from "
<< hw_input_params.channels() << " channels to "
<< input_channels_;
}
if (hw_output_params.sample_rate() != params_.sample_rate()) {
LOG(ERROR) << "Requested sample-rate: " << params_.sample_rate()
<< " must match the hardware sample-rate: "
<< hw_output_params.sample_rate();
return false;
}
if (hw_output_params.frames_per_buffer() != params_.frames_per_buffer()) {
LOG(ERROR) << "Requested buffer size: " << params_.frames_per_buffer()
<< " must match the hardware buffer size: "
<< hw_output_params.frames_per_buffer();
return false;
}
// Set up WAVEFORMATPCMEX structures for input and output given the specified
// audio parameters.
SetIOFormats(hw_input_params, params_);
// Create the input and output busses.
input_bus_ = AudioBus::Create(
hw_input_params.channels(), input_buffer_size_frames_);
output_bus_ = AudioBus::Create(params_);
// One extra bus is needed for the input channel mixing case.
if (channel_mixer_) {
DCHECK_LT(hw_input_params.channels(), input_channels_);
// The size of the |channel_bus_| must be the same as the size of the
// output bus to ensure that the channel manager can deal with both
// resampled and non-resampled data as input.
channel_bus_ = AudioBus::Create(
input_channels_, params_.frames_per_buffer());
}
// Check if FIFO and resampling is required to match the input rate to the
// output rate. If so, a special thread loop, optimized for this case, will
// be used. This mode is also called varispeed mode.
// Note that we can also use this mode when input and output rates are the
// same but native buffer sizes differ (can happen if two different audio
// devices are used). For this case, the resampler uses a target ratio of
// 1.0 but SetRatio is called to compensate for clock-drift. The FIFO is
// required to compensate for the difference in buffer sizes.
// TODO(henrika): we could perhaps improve the performance for the second
// case here by only using the FIFO and avoid resampling. Not sure how much
// that would give and we risk not compensation for clock drift.
if (hw_input_params.sample_rate() != params_.sample_rate() ||
hw_input_params.frames_per_buffer() != params_.frames_per_buffer()) {
DoVarispeedInitialization(hw_input_params, params_);
}
// Render side (event driven only in varispeed mode):
ScopedComPtr<IAudioClient> audio_output_client =
CoreAudioUtil::CreateDefaultClient(eRender, eConsole);
if (!audio_output_client)
return false;
if (!CoreAudioUtil::IsFormatSupported(audio_output_client,
share_mode_,
&output_format_)) {
return false;
}
if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
// The |render_event_| will be NULL unless varispeed mode is utilized.
hr = CoreAudioUtil::SharedModeInitialize(
audio_output_client, &output_format_, render_event_.Get(),
&endpoint_render_buffer_size_frames_);
} else {
// TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
}
if (FAILED(hr))
return false;
ScopedComPtr<IAudioRenderClient> audio_render_client =
CoreAudioUtil::CreateRenderClient(audio_output_client);
if (!audio_render_client)
return false;
// Capture side (always event driven but format depends on varispeed or not):
ScopedComPtr<IAudioClient> audio_input_client;
if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) {
audio_input_client = CoreAudioUtil::CreateDefaultClient(eCapture, eConsole);
} else {
ScopedComPtr<IMMDevice> audio_input_device(
CoreAudioUtil::CreateDevice(input_device_id_));
audio_input_client = CoreAudioUtil::CreateClient(audio_input_device);
}
if (!audio_input_client)
return false;
if (!CoreAudioUtil::IsFormatSupported(audio_input_client,
share_mode_,
&input_format_)) {
return false;
}
if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
// Include valid event handle for event-driven initialization.
// The input side is always event driven independent of if varispeed is
// used or not.
hr = CoreAudioUtil::SharedModeInitialize(
audio_input_client, &input_format_, capture_event_.Get(),
&endpoint_capture_buffer_size_frames_);
} else {
// TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE.
}
if (FAILED(hr))
return false;
ScopedComPtr<IAudioCaptureClient> audio_capture_client =
CoreAudioUtil::CreateCaptureClient(audio_input_client);
if (!audio_capture_client)
return false;
// Varispeed mode requires additional preparations.
if (VarispeedMode())
ResetVarispeed();
// Store all valid COM interfaces.
audio_output_client_ = audio_output_client;
audio_render_client_ = audio_render_client;
audio_input_client_ = audio_input_client;
audio_capture_client_ = audio_capture_client;
opened_ = true;
return SUCCEEDED(hr);
}
void WASAPIUnifiedStream::Start(AudioSourceCallback* callback) {
TRACE_EVENT0("audio", "WASAPIUnifiedStream::Start");
DVLOG(1) << "WASAPIUnifiedStream::Start()";
DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
CHECK(callback);
CHECK(opened_);
if (audio_io_thread_) {
CHECK_EQ(callback, source_);
return;
}
source_ = callback;
if (VarispeedMode()) {
ResetVarispeed();
fifo_rate_compensation_ = 1.0;
average_delta_ = 0.0;
input_callback_received_ = false;
update_output_delay_ = false;
}
// Create and start the thread that will listen for capture events.
// We will also listen on render events on the same thread if varispeed
// mode is utilized.
audio_io_thread_.reset(
new base::DelegateSimpleThread(this, "wasapi_io_thread"));
audio_io_thread_->Start();
if (!audio_io_thread_->HasBeenStarted()) {
DLOG(ERROR) << "Failed to start WASAPI IO thread.";
return;
}
// Start input streaming data between the endpoint buffer and the audio
// engine.
HRESULT hr = audio_input_client_->Start();
if (FAILED(hr)) {
StopAndJoinThread(hr);
return;
}
// Ensure that the endpoint buffer is prepared with silence.
if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
audio_output_client_, audio_render_client_)) {
DLOG(WARNING) << "Failed to prepare endpoint buffers with silence.";
return;
}
}
num_written_frames_ = endpoint_render_buffer_size_frames_;
// Start output streaming data between the endpoint buffer and the audio
// engine.
hr = audio_output_client_->Start();
if (FAILED(hr)) {
StopAndJoinThread(hr);
return;
}
}
void WASAPIUnifiedStream::Stop() {
TRACE_EVENT0("audio", "WASAPIUnifiedStream::Stop");
DVLOG(1) << "WASAPIUnifiedStream::Stop()";
DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
if (!audio_io_thread_)
return;
// Stop input audio streaming.
HRESULT hr = audio_input_client_->Stop();
if (FAILED(hr)) {
DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
<< "Failed to stop input streaming: " << std::hex << hr;
}
// Stop output audio streaming.
hr = audio_output_client_->Stop();
if (FAILED(hr)) {
DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
<< "Failed to stop output streaming: " << std::hex << hr;
}
// Wait until the thread completes and perform cleanup.
SetEvent(stop_streaming_event_.Get());
audio_io_thread_->Join();
audio_io_thread_.reset();
// Ensure that we don't quit the main thread loop immediately next
// time Start() is called.
ResetEvent(stop_streaming_event_.Get());
// Clear source callback, it'll be set again on the next Start() call.
source_ = NULL;
// Flush all pending data and reset the audio clock stream position to 0.
hr = audio_output_client_->Reset();
if (FAILED(hr)) {
DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
<< "Failed to reset output streaming: " << std::hex << hr;
}
audio_input_client_->Reset();
if (FAILED(hr)) {
DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
<< "Failed to reset input streaming: " << std::hex << hr;
}
// Extra safety check to ensure that the buffers are cleared.
// If the buffers are not cleared correctly, the next call to Start()
// would fail with AUDCLNT_E_BUFFER_ERROR at IAudioRenderClient::GetBuffer().
// TODO(henrika): this check is is only needed for shared-mode streams.
UINT32 num_queued_frames = 0;
audio_output_client_->GetCurrentPadding(&num_queued_frames);
DCHECK_EQ(0u, num_queued_frames);
}
void WASAPIUnifiedStream::Close() {
TRACE_EVENT0("audio", "WASAPIUnifiedStream::Close");
DVLOG(1) << "WASAPIUnifiedStream::Close()";
DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
// It is valid to call Close() before calling open or Start().
// It is also valid to call Close() after Start() has been called.
Stop();
// Inform the audio manager that we have been closed. This will cause our
// destruction.
manager_->ReleaseOutputStream(this);
}
void WASAPIUnifiedStream::SetVolume(double volume) {
DVLOG(1) << "SetVolume(volume=" << volume << ")";
if (volume < 0 || volume > 1)
return;
volume_ = volume;
}
void WASAPIUnifiedStream::GetVolume(double* volume) {
DVLOG(1) << "GetVolume()";
*volume = static_cast<double>(volume_);
}
void WASAPIUnifiedStream::ProvideInput(int frame_delay, AudioBus* audio_bus) {
// TODO(henrika): utilize frame_delay?
// A non-zero framed delay means multiple callbacks were necessary to
// fulfill the requested number of frames.
if (frame_delay > 0)
DVLOG(3) << "frame_delay: " << frame_delay;
#ifndef NDEBUG
resampler_margin_[num_elements_[RESAMPLER_MARGIN]] =
fifo_->frames() - audio_bus->frames();
num_elements_[RESAMPLER_MARGIN]++;
#endif
if (fifo_->frames() < audio_bus->frames()) {
DVLOG(ERROR) << "Not enough data in the FIFO ("
<< fifo_->frames() << " < " << audio_bus->frames() << ")";
audio_bus->Zero();
return;
}
fifo_->Consume(audio_bus, 0, audio_bus->frames());
}
void WASAPIUnifiedStream::SetIOFormats(const AudioParameters& input_params,
const AudioParameters& output_params) {
for (int n = 0; n < 2; ++n) {
const AudioParameters& params = (n == 0) ? input_params : output_params;
WAVEFORMATPCMEX* xformat = (n == 0) ? &input_format_ : &output_format_;
WAVEFORMATEX* format = &xformat->Format;
// Begin with the WAVEFORMATEX structure that specifies the basic format.
format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
format->nChannels = params.channels();
format->nSamplesPerSec = params.sample_rate();
format->wBitsPerSample = params.bits_per_sample();
format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels;
format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign;
format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
// Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE.
// Note that we always open up using the native channel layout.
(*xformat).Samples.wValidBitsPerSample = format->wBitsPerSample;
(*xformat).dwChannelMask = (n == 0) ?
GetChannelConfig(eCapture) : GetChannelConfig(eRender);
(*xformat).SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
}
input_buffer_size_frames_ = input_params.frames_per_buffer();
output_buffer_size_frames_ = output_params.frames_per_buffer();
VLOG(1) << "#audio frames per input buffer : " << input_buffer_size_frames_;
VLOG(1) << "#audio frames per output buffer: " << output_buffer_size_frames_;
#ifndef NDEBUG
input_params_[0] = input_format_.Format.nSamplesPerSec;
input_params_[1] = input_buffer_size_frames_;
output_params_[0] = output_format_.Format.nSamplesPerSec;
output_params_[1] = output_buffer_size_frames_;
#endif
}
void WASAPIUnifiedStream::DoVarispeedInitialization(
const AudioParameters& input_params, const AudioParameters& output_params) {
DVLOG(1) << "WASAPIUnifiedStream::DoVarispeedInitialization()";
// A FIFO is required in this mode for input to output buffering.
// Note that it will add some latency.
fifo_.reset(new AudioFifo(input_params.channels(), kFifoSize));
VLOG(1) << "Using FIFO of size " << fifo_->max_frames()
<< " (#channels=" << input_params.channels() << ")";
// Create the multi channel resampler using the initial sample rate ratio.
// We will call MultiChannelResampler::SetRatio() during runtime to
// allow arbitrary combinations of input and output devices running off
// different clocks and using different drivers, with potentially
// differing sample-rates. Note that the requested block size is given by
// the native input buffer size |input_buffer_size_frames_|.
io_sample_rate_ratio_ = input_params.sample_rate() /
static_cast<double>(output_params.sample_rate());
DVLOG(2) << "io_sample_rate_ratio: " << io_sample_rate_ratio_;
resampler_.reset(new MultiChannelResampler(
input_params.channels(), io_sample_rate_ratio_, input_buffer_size_frames_,
base::Bind(&WASAPIUnifiedStream::ProvideInput, base::Unretained(this))));
VLOG(1) << "Resampling from " << input_params.sample_rate() << " to "
<< output_params.sample_rate();
// The optimal number of frames we'd like to keep in the FIFO at all times.
// The actual size will vary but the goal is to ensure that the average size
// is given by this value.
target_fifo_frames_ = kTargetFifoSafetyFactor * input_buffer_size_frames_;
VLOG(1) << "Target FIFO size: " << target_fifo_frames_;
// Create the event which the audio engine will signal each time it
// wants an audio buffer to render.
render_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
// Allocate memory for temporary audio bus used to store resampled input
// audio.
resampled_bus_ = AudioBus::Create(
input_params.channels(), output_buffer_size_frames_);
// Buffer initial silence corresponding to target I/O buffering.
ResetVarispeed();
}
void WASAPIUnifiedStream::ResetVarispeed() {
DCHECK(VarispeedMode());
// Buffer initial silence corresponding to target I/O buffering.
fifo_->Clear();
scoped_ptr<AudioBus> silence =
AudioBus::Create(input_format_.Format.nChannels,
target_fifo_frames_);
silence->Zero();
fifo_->Push(silence.get());
resampler_->Flush();
}
void WASAPIUnifiedStream::Run() {
ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);
// Increase the thread priority.
audio_io_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);
// Enable MMCSS to ensure that this thread receives prioritized access to
// CPU resources.
// TODO(henrika): investigate if it is possible to include these additional
// settings in SetThreadPriority() as well.
DWORD task_index = 0;
HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",
&task_index);
bool mmcss_is_ok =
(mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
if (!mmcss_is_ok) {
// Failed to enable MMCSS on this thread. It is not fatal but can lead
// to reduced QoS at high load.
DWORD err = GetLastError();
LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
}
// The IAudioClock interface enables us to monitor a stream's data
// rate and the current position in the stream. Allocate it before we
// start spinning.
ScopedComPtr<IAudioClock> audio_output_clock;
HRESULT hr = audio_output_client_->GetService(
__uuidof(IAudioClock), audio_output_clock.ReceiveVoid());
LOG_IF(WARNING, FAILED(hr)) << "Failed to create IAudioClock: "
<< std::hex << hr;
bool streaming = true;
bool error = false;
HANDLE wait_array[3];
size_t num_handles = 0;
wait_array[num_handles++] = stop_streaming_event_;
wait_array[num_handles++] = capture_event_;
if (render_event_) {
// One extra event handle is needed in varispeed mode.
wait_array[num_handles++] = render_event_;
}
// Keep streaming audio until stop event is signaled.
// Capture events are always used but render events are only active in
// varispeed mode.
while (streaming && !error) {
// Wait for a close-down event, or a new capture event.
DWORD wait_result = WaitForMultipleObjects(num_handles,
wait_array,
FALSE,
INFINITE);
switch (wait_result) {
case WAIT_OBJECT_0 + 0:
// |stop_streaming_event_| has been set.
streaming = false;
break;
case WAIT_OBJECT_0 + 1:
// |capture_event_| has been set
if (VarispeedMode()) {
ProcessInputAudio();
} else {
ProcessInputAudio();
ProcessOutputAudio(audio_output_clock);
}
break;
case WAIT_OBJECT_0 + 2:
DCHECK(VarispeedMode());
// |render_event_| has been set
ProcessOutputAudio(audio_output_clock);
break;
default:
error = true;
break;
}
}
if (streaming && error) {
// Stop audio streaming since something has gone wrong in our main thread
// loop. Note that, we are still in a "started" state, hence a Stop() call
// is required to join the thread properly.
audio_input_client_->Stop();
audio_output_client_->Stop();
PLOG(ERROR) << "WASAPI streaming failed.";
}
// Disable MMCSS.
if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
PLOG(WARNING) << "Failed to disable MMCSS";
}
}
void WASAPIUnifiedStream::ProcessInputAudio() {
TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessInputAudio");
BYTE* data_ptr = NULL;
UINT32 num_captured_frames = 0;
DWORD flags = 0;
UINT64 device_position = 0;
UINT64 capture_time_stamp = 0;
const int bytes_per_sample = input_format_.Format.wBitsPerSample >> 3;
base::TimeTicks now_tick = base::TimeTicks::HighResNow();
#ifndef NDEBUG
if (VarispeedMode()) {
input_time_stamps_[num_elements_[INPUT_TIME_STAMP]] =
now_tick.ToInternalValue();
num_elements_[INPUT_TIME_STAMP]++;
}
#endif
// Retrieve the amount of data in the capture endpoint buffer.
// |endpoint_capture_time_stamp| is the value of the performance
// counter at the time that the audio endpoint device recorded
// the device position of the first audio frame in the data packet.
HRESULT hr = audio_capture_client_->GetBuffer(&data_ptr,
&num_captured_frames,
&flags,
&device_position,
&capture_time_stamp);
if (FAILED(hr)) {
DLOG(ERROR) << "Failed to get data from the capture buffer";
return;
}
if (hr == AUDCLNT_S_BUFFER_EMPTY) {
// The return coded is a success code but a new packet is *not* available
// and none of the output parameters in the GetBuffer() call contains valid
// values. Best we can do is to deliver silence and avoid setting
// |input_callback_received_| since this only seems to happen for the
// initial event(s) on some devices.
input_bus_->Zero();
} else {
// Valid data has been recorded and it is now OK to set the flag which
// informs the render side that capturing has started.
input_callback_received_ = true;
}
if (num_captured_frames != 0) {
if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
// Clear out the capture buffer since silence is reported.
input_bus_->Zero();
} else {
// Store captured data in an audio bus after de-interleaving
// the data to match the audio bus structure.
input_bus_->FromInterleaved(
data_ptr, num_captured_frames, bytes_per_sample);
}
}
hr = audio_capture_client_->ReleaseBuffer(num_captured_frames);
DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer";
// Buffer input into FIFO if varispeed mode is used. The render event
// will drive resampling of this data to match the output side.
if (VarispeedMode()) {
int available_frames = fifo_->max_frames() - fifo_->frames();
if (input_bus_->frames() <= available_frames) {
fifo_->Push(input_bus_.get());
}
#ifndef NDEBUG
num_frames_in_fifo_[num_elements_[NUM_FRAMES_IN_FIFO]] =
fifo_->frames();
num_elements_[NUM_FRAMES_IN_FIFO]++;
#endif
}
// Save resource by not asking for new delay estimates each time.
// These estimates are fairly stable and it is perfectly safe to only
// sample at a rate of ~1Hz.
// TODO(henrika): we might have to increase the update rate in varispeed
// mode since the delay variations are higher in this mode.
if ((now_tick - last_delay_sample_time_).InMilliseconds() >
kTimeDiffInMillisecondsBetweenDelayMeasurements &&
input_callback_received_) {
// Calculate the estimated capture delay, i.e., the latency between
// the recording time and the time we when we are notified about
// the recorded data. Note that the capture time stamp is given in
// 100-nanosecond (0.1 microseconds) units.
base::TimeDelta diff =
now_tick - base::TimeTicks::FromInternalValue(0.1 * capture_time_stamp);
capture_delay_ms_ = diff.InMillisecondsF();
last_delay_sample_time_ = now_tick;
update_output_delay_ = true;
}
}
void WASAPIUnifiedStream::ProcessOutputAudio(IAudioClock* audio_output_clock) {
TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessOutputAudio");
if (!input_callback_received_) {
if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
audio_output_client_, audio_render_client_))
DLOG(WARNING) << "Failed to prepare endpoint buffers with silence.";
}
return;
}
// Rate adjusted resampling is required in varispeed mode. It means that
// recorded audio samples will be read from the FIFO, resampled to match the
// output sample-rate and then stored in |resampled_bus_|.
if (VarispeedMode()) {
// Calculate a varispeed rate scalar factor to compensate for drift between
// input and output. We use the actual number of frames still in the FIFO
// compared with the ideal value of |target_fifo_frames_|.
int delta = fifo_->frames() - target_fifo_frames_;
// Average |delta| because it can jitter back/forth quite frequently
// by +/- the hardware buffer-size *if* the input and output callbacks are
// happening at almost exactly the same time. Also, if the input and output
// sample-rates are different then |delta| will jitter quite a bit due to
// the rate conversion happening in the varispeed, plus the jittering of
// the callbacks. The average value is what's important here.
// We use an exponential smoothing filter to reduce the variations.
average_delta_ += kAlpha * (delta - average_delta_);
// Compute a rate compensation which always attracts us back to the
// |target_fifo_frames_| over a period of kCorrectionTimeSeconds.
double correction_time_frames =
kCorrectionTimeSeconds * output_format_.Format.nSamplesPerSec;
fifo_rate_compensation_ =
(correction_time_frames + average_delta_) / correction_time_frames;
#ifndef NDEBUG
fifo_rate_comps_[num_elements_[RATE_COMPENSATION]] =
fifo_rate_compensation_;
num_elements_[RATE_COMPENSATION]++;
#endif
// Adjust for FIFO drift.
const double new_ratio = io_sample_rate_ratio_ * fifo_rate_compensation_;
resampler_->SetRatio(new_ratio);
// Get resampled input audio from FIFO where the size is given by the
// output side.
resampler_->Resample(resampled_bus_->frames(), resampled_bus_.get());
}
// Derive a new total delay estimate if the capture side has set the
// |update_output_delay_| flag.
if (update_output_delay_) {
// Calculate the estimated render delay, i.e., the time difference
// between the time when data is added to the endpoint buffer and
// when the data is played out on the actual speaker.
const double stream_pos = CurrentStreamPosInMilliseconds(
num_written_frames_ + output_buffer_size_frames_,
output_format_.Format.nSamplesPerSec);
const double speaker_pos =
SpeakerStreamPosInMilliseconds(audio_output_clock);
const double render_delay_ms = stream_pos - speaker_pos;
const double fifo_delay_ms = VarispeedMode() ?
FrameCountToMilliseconds(target_fifo_frames_, input_format_) : 0;
// Derive the total delay, i.e., the sum of the input and output
// delays. Also convert the value into byte units. An extra FIFO delay
// is added for varispeed usage cases.
total_delay_ms_ = VarispeedMode() ?
capture_delay_ms_ + render_delay_ms + fifo_delay_ms :
capture_delay_ms_ + render_delay_ms;
DVLOG(2) << "total_delay_ms : " << total_delay_ms_;
DVLOG(3) << " capture_delay_ms: " << capture_delay_ms_;
DVLOG(3) << " render_delay_ms : " << render_delay_ms;
DVLOG(3) << " fifo_delay_ms : " << fifo_delay_ms;
total_delay_bytes_ = MillisecondsToBytes(total_delay_ms_, output_format_);
// Wait for new signal from the capture side.
update_output_delay_ = false;
}
// Select source depending on if varispeed is utilized or not.
// Also, the source might be the output of a channel mixer if channel mixing
// is required to match the native input channels to the number of input
// channels used by the client (given by |input_channels_| in this case).
AudioBus* input_bus = VarispeedMode() ?
resampled_bus_.get() : input_bus_.get();
if (channel_mixer_) {
DCHECK_EQ(input_bus->frames(), channel_bus_->frames());
// Most common case is 1->2 channel upmixing.
channel_mixer_->Transform(input_bus, channel_bus_.get());
// Use the output from the channel mixer as new input bus.
input_bus = channel_bus_.get();
}
// Prepare for rendering by calling OnMoreIOData().
int frames_filled = source_->OnMoreIOData(
input_bus,
output_bus_.get(),
AudioBuffersState(0, total_delay_bytes_));
DCHECK_EQ(frames_filled, output_bus_->frames());
// Keep track of number of rendered frames since we need it for
// our delay calculations.
num_written_frames_ += frames_filled;
// Derive the the amount of available space in the endpoint buffer.
// Avoid render attempt if there is no room for a captured packet.
UINT32 num_queued_frames = 0;
audio_output_client_->GetCurrentPadding(&num_queued_frames);
if (endpoint_render_buffer_size_frames_ - num_queued_frames <
output_buffer_size_frames_)
return;
// Grab all available space in the rendering endpoint buffer
// into which the client can write a data packet.
uint8* audio_data = NULL;
HRESULT hr = audio_render_client_->GetBuffer(output_buffer_size_frames_,
&audio_data);
if (FAILED(hr)) {
DLOG(ERROR) << "Failed to access render buffer";
return;
}
const int bytes_per_sample = output_format_.Format.wBitsPerSample >> 3;
// Convert the audio bus content to interleaved integer data using
// |audio_data| as destination.
output_bus_->Scale(volume_);
output_bus_->ToInterleaved(
output_buffer_size_frames_, bytes_per_sample, audio_data);
// Release the buffer space acquired in the GetBuffer() call.
audio_render_client_->ReleaseBuffer(output_buffer_size_frames_, 0);
DLOG_IF(ERROR, FAILED(hr)) << "Failed to release render buffer";
return;
}
void WASAPIUnifiedStream::HandleError(HRESULT err) {
CHECK((started() && GetCurrentThreadId() == audio_io_thread_->tid()) ||
(!started() && GetCurrentThreadId() == creating_thread_id_));
NOTREACHED() << "Error code: " << std::hex << err;
if (source_)
source_->OnError(this);
}
void WASAPIUnifiedStream::StopAndJoinThread(HRESULT err) {
CHECK(GetCurrentThreadId() == creating_thread_id_);
DCHECK(audio_io_thread_.get());
SetEvent(stop_streaming_event_.Get());
audio_io_thread_->Join();
audio_io_thread_.reset();
HandleError(err);
}
} // namespace media
|