media/audio/win/audio_unified_win.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_
#define MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_

#include <Audioclient.h>
#include <MMDeviceAPI.h>

#include <string>

#include "base/compiler_specific.h"
#include "base/gtest_prod_util.h"
#include "base/threading/platform_thread.h"
#include "base/threading/simple_thread.h"
#include "base/win/scoped_co_mem.h"
#include "base/win/scoped_comptr.h"
#include "base/win/scoped_handle.h"
#include "media/audio/audio_io.h"
#include "media/audio/audio_parameters.h"
#include "media/base/audio_fifo.h"
#include "media/base/channel_mixer.h"
#include "media/base/media_export.h"
#include "media/base/multi_channel_resampler.h"

namespace media {

class AudioManagerWin;

// Implementation of AudioOutputStream for Windows using the Core Audio API
// where both capturing and rendering takes place on the same thread to enable
// audio I/O. This class allows arbitrary combinations of input and output
// devices running off different clocks and using different drivers, with
// potentially differing sample-rates.
//
// It is required to first acquire the native sample rate of the selected
// output device and then use the same rate when creating this object.
// The inner operation depends on the input sample rate which is determined
// during construction. Three different main modes are supported:
//
//  1)  input rate == output rate => input side drives output side directly.
//  2)  input rate != output rate => both sides are driven independently by
//      events and a FIFO plus a resampling unit is used to compensate for
//      differences in sample rates between the two sides.
//  3)  input rate == output rate but native buffer sizes are not identical =>
//      same inner functionality as in (2) to compensate for the differences
//      in buffer sizes and also compensate for any potential clock drift
//      between the two devices.
//
// Mode detection is is done at construction and using mode (1) will lead to
// best performance (lower delay and no "varispeed distortion"), i.e., it is
// recommended to use same sample rates for input and output. Mode (2) uses a
// resampler which supports rate adjustments to fine tune for things like
// clock drift and differences in sample rates between different devices.
// Mode (2) - which uses a FIFO and a adjustable multi-channel resampler -
// is also called the varispeed mode and it is used for case (3) as well to
// compensate for the difference in buffer sizes mainly.
// Mode (3) can happen if two different audio devices are used.
// As an example: some devices needs a buffer size of 441 @ 44.1kHz and others
// 448 @ 44.1kHz. This is a rare case and will only happen for sample rates
// which are even multiples of 11025 Hz (11025, 22050, 44100, 88200 etc.).
//
// Implementation notes:
//
//  - Open() can fail if the input and output parameters do not fulfill
//    certain conditions. See source for Open() for more details.
//  - Channel mixing will be performed if the clients asks for a larger
//    number of channels than the native audio layer provides.
//    Example: client wants stereo but audio layer provides mono. In this case
//    upmixing from mono to stereo (1->2) will be done.
//
// TODO(henrika):
//
//  - Add support for exclusive mode.
//  - Add support for KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, i.e., 32-bit float
//    as internal sample-value representation.
//  - Perform fine-tuning for non-matching sample rates to reduce latency.
//
class MEDIA_EXPORT WASAPIUnifiedStream
    : public AudioOutputStream,
      public base::DelegateSimpleThread::Delegate {
 public:
  // The ctor takes all the usual parameters, plus |manager| which is the
  // the audio manager who is creating this object.
  WASAPIUnifiedStream(AudioManagerWin* manager,
                      const AudioParameters& params,
                      const std::string& input_device_id);

  // The dtor is typically called by the AudioManager only and it is usually
  // triggered by calling AudioOutputStream::Close().
  virtual ~WASAPIUnifiedStream();

  // Implementation of AudioOutputStream.
  virtual bool Open() OVERRIDE;
  virtual void Start(AudioSourceCallback* callback) OVERRIDE;
  virtual void Stop() OVERRIDE;
  virtual void Close() OVERRIDE;
  virtual void SetVolume(double volume) OVERRIDE;
  virtual void GetVolume(double* volume) OVERRIDE;

  bool started() const {
    return audio_io_thread_.get() != NULL;
  }

  // Returns true if input sample rate differs from the output sample rate.
  // A FIFO and a adjustable multi-channel resampler are utilized in this mode.
  bool VarispeedMode() const { return (fifo_ && resampler_); }

 private:
  enum {
    // Time in milliseconds between two successive delay measurements.
    // We save resources by not updating the delay estimates for each capture
    // event (typically 100Hz rate).
    kTimeDiffInMillisecondsBetweenDelayMeasurements = 1000,

    // Max possible FIFO size.
    kFifoSize = 16384,

    // This value was determined empirically for minimum latency while still
    // guarding against FIFO under-runs. The actual target size will be equal
    // to kTargetFifoSafetyFactor * (native input buffer size).
    // TODO(henrika): tune this value for lowest possible latency for all
    // possible sample rate combinations.
    kTargetFifoSafetyFactor = 2
  };

  // Additional initialization required when input and output sample rate
  // differs. Allocates resources for |fifo_|, |resampler_|, |render_event_|,
  // and the |capture_bus_| and configures the |input_format_| structure
  // given the provided input and output audio parameters.
  void DoVarispeedInitialization(const AudioParameters& input_params,
                                 const AudioParameters& output_params);

  // Clears varispeed related components such as the FIFO and the resampler.
  void ResetVarispeed();

  // Builds WAVEFORMATEX structures for input and output based on input and
  // output audio parameters.
  void SetIOFormats(const AudioParameters& input_params,
                    const AudioParameters& output_params);

  // DelegateSimpleThread::Delegate implementation.
  virtual void Run() OVERRIDE;

  // MultiChannelResampler::MultiChannelAudioSourceProvider implementation.
  // Callback for providing more data into the resampler.
  // Only used in varispeed mode, i.e., when input rate != output rate.
  virtual void ProvideInput(int frame_delay, AudioBus* audio_bus);

  // Issues the OnError() callback to the |source_|.
  void HandleError(HRESULT err);

  // Stops and joins the audio thread in case of an error.
  void StopAndJoinThread(HRESULT err);

  // Converts unique endpoint ID to user-friendly device name.
  std::string GetDeviceName(LPCWSTR device_id) const;

  // Called on the audio IO thread for each capture event.
  // Buffers captured audio into a FIFO if varispeed is used or into an audio
  // bus if input and output sample rates are identical.
  void ProcessInputAudio();

  // Called on the audio IO thread for each render event when varispeed is
  // active or for each capture event when varispeed is not used.
  // In varispeed mode, it triggers a resampling callback, which reads from the
  // FIFO, and calls AudioSourceCallback::OnMoreIOData using the resampled
  // input signal and at the same time asks for data to play out.
  // If input and output rates are the same - instead of reading from the FIFO
  // and do resampling - we read directly from the audio bus used to store
  // captured data in ProcessInputAudio.
  void ProcessOutputAudio(IAudioClock* audio_output_clock);

  // Contains the thread ID of the creating thread.
  base::PlatformThreadId creating_thread_id_;

  // Our creator, the audio manager needs to be notified when we close.
  AudioManagerWin* manager_;

  // Contains the audio parameter structure provided at construction.
  AudioParameters params_;
  // For convenience, same as in params_.
  int input_channels_;
  int output_channels_;

  // Unique ID of the input device to be opened.
  const std::string input_device_id_;

  // The sharing mode for the streams.
  // Valid values are AUDCLNT_SHAREMODE_SHARED and AUDCLNT_SHAREMODE_EXCLUSIVE
  // where AUDCLNT_SHAREMODE_SHARED is the default.
  AUDCLNT_SHAREMODE share_mode_;

  // Rendering and capturing is driven by this thread (no message loop).
  // All OnMoreIOData() callbacks will be called from this thread.
  scoped_ptr<base::DelegateSimpleThread> audio_io_thread_;

  // Contains the desired audio output format which is set up at construction.
  // It is required to first acquire the native sample rate of the selected
  // output device and then use the same rate when creating this object.
  WAVEFORMATPCMEX output_format_;

  // Contains the native audio input format which is set up at construction
  // if varispeed mode is utilized.
  WAVEFORMATPCMEX input_format_;

  // True when successfully opened.
  bool opened_;

  // Volume level from 0 to 1 used for output scaling.
  double volume_;

  // Size in audio frames of each audio packet where an audio packet
  // is defined as the block of data which the destination is expected to
  // receive in each OnMoreIOData() callback.
  size_t output_buffer_size_frames_;

  // Size in audio frames of each audio packet where an audio packet
  // is defined as the block of data which the source is expected to
  // deliver in each OnMoreIOData() callback.
  size_t input_buffer_size_frames_;

  // Length of the audio endpoint buffer.
  uint32 endpoint_render_buffer_size_frames_;
  uint32 endpoint_capture_buffer_size_frames_;

  // Counts the number of audio frames written to the endpoint buffer.
  uint64 num_written_frames_;

  // Time stamp for last delay measurement.
  base::TimeTicks last_delay_sample_time_;

  // Contains the total (sum of render and capture) delay in milliseconds.
  double total_delay_ms_;

  // Contains the total (sum of render and capture and possibly FIFO) delay
  // in bytes. The update frequency is set by a constant called
  // |kTimeDiffInMillisecondsBetweenDelayMeasurements|.
  int total_delay_bytes_;

  // Pointer to the client that will deliver audio samples to be played out.
  AudioSourceCallback* source_;

  // IMMDevice interfaces which represents audio endpoint devices.
  base::win::ScopedComPtr<IMMDevice> endpoint_render_device_;
  base::win::ScopedComPtr<IMMDevice> endpoint_capture_device_;

  // IAudioClient interfaces which enables a client to create and initialize
  // an audio stream between an audio application and the audio engine.
  base::win::ScopedComPtr<IAudioClient> audio_output_client_;
  base::win::ScopedComPtr<IAudioClient> audio_input_client_;

  // IAudioRenderClient interfaces enables a client to write output
  // data to a rendering endpoint buffer.
  base::win::ScopedComPtr<IAudioRenderClient> audio_render_client_;

  // IAudioCaptureClient interfaces enables a client to read input
  // data from a capturing endpoint buffer.
  base::win::ScopedComPtr<IAudioCaptureClient> audio_capture_client_;

  // The audio engine will signal this event each time a buffer has been
  // recorded.
  base::win::ScopedHandle capture_event_;

  // The audio engine will signal this event each time it needs a new
  // audio buffer to play out.
  // Only utilized in varispeed mode.
  base::win::ScopedHandle render_event_;

  // This event will be signaled when streaming shall stop.
  base::win::ScopedHandle stop_streaming_event_;

  // Container for retrieving data from AudioSourceCallback::OnMoreIOData().
  scoped_ptr<AudioBus> output_bus_;

  // Container for sending data to AudioSourceCallback::OnMoreIOData().
  scoped_ptr<AudioBus> input_bus_;

  // Container for storing output from the channel mixer.
  scoped_ptr<AudioBus> channel_bus_;

  // All members below are only allocated, or used, in varispeed mode:

  // Temporary storage of resampled input audio data.
  scoped_ptr<AudioBus> resampled_bus_;

  // Set to true first time a capture event has been received in varispeed
  // mode.
  bool input_callback_received_;

  // MultiChannelResampler is a multi channel wrapper for SincResampler;
  // allowing high quality sample rate conversion of multiple channels at once.
  scoped_ptr<MultiChannelResampler> resampler_;

  // Resampler I/O ratio.
  double io_sample_rate_ratio_;

  // Used for input to output buffering.
  scoped_ptr<AudioFifo> fifo_;

  // The channel mixer is only created and utilized if number of input channels
  // is larger than the native number of input channels (e.g client wants
  // stereo but the audio device only supports mono).
  scoped_ptr<ChannelMixer> channel_mixer_;

  // The optimal number of frames we'd like to keep in the FIFO at all times.
  int target_fifo_frames_;

  // A running average of the measured delta between actual number of frames
  // in the FIFO versus |target_fifo_frames_|.
  double average_delta_;

  // A varispeed rate scalar which is calculated based on FIFO drift.
  double fifo_rate_compensation_;

  // Set to true when input side signals output side that a new delay
  // estimate is needed.
  bool update_output_delay_;

  // Capture side stores its delay estimate so the sum can be derived in
  // the render side.
  double capture_delay_ms_;

  // TODO(henrika): possibly remove these members once the performance is
  // properly tuned. Only used for off-line debugging.
#ifndef NDEBUG
  enum LogElementNames {
    INPUT_TIME_STAMP,
    NUM_FRAMES_IN_FIFO,
    RESAMPLER_MARGIN,
    RATE_COMPENSATION
  };

  scoped_ptr<int64[]> input_time_stamps_;
  scoped_ptr<int[]> num_frames_in_fifo_;
  scoped_ptr<int[]> resampler_margin_;
  scoped_ptr<double[]> fifo_rate_comps_;
  scoped_ptr<int[]> num_elements_;
  scoped_ptr<int[]> input_params_;
  scoped_ptr<int[]> output_params_;

  FILE* data_file_;
  FILE* param_file_;
#endif

  DISALLOW_COPY_AND_ASSIGN(WASAPIUnifiedStream);
};

}  // namespace media

#endif  // MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_