content/renderer/media/webrtc_audio_device_impl.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
#define CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
#pragma once

#include <string>
#include <vector>

#include "base/basictypes.h"
#include "base/compiler_specific.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
#include "base/message_loop_proxy.h"
#include "base/time.h"
#include "content/common/content_export.h"
#include "content/renderer/media/audio_device.h"
#include "content/renderer/media/audio_input_device.h"
#include "third_party/webrtc/modules/audio_device/main/interface/audio_device.h"

// A WebRtcAudioDeviceImpl instance implements the abstract interface
// webrtc::AudioDeviceModule which makes it possible for a user (e.g. webrtc::
// VoiceEngine) to register this class as an external AudioDeviceModule (ADM).
// Then WebRtcAudioDeviceImpl::SetSessionId() needs to be called to set the
// session id that tells which device to use. The user can either get the
// session id from the MediaStream or use a value of 1 (AudioInputDeviceManager
// ::kFakeOpenSessionId), the later will open the default device without going
// through the MediaStream. The user can then call WebRtcAudioDeviceImpl::
// StartPlayout() and WebRtcAudioDeviceImpl::StartRecording() from the render
// process to initiate and start audio rendering and capturing in the browser
// process. IPC is utilized to set up the media streams.
//
// Usage example:
//
//   using namespace webrtc;
//
//   {
//      scoped_refptr<WebRtcAudioDeviceImpl> external_adm;
//      external_adm = new WebRtcAudioDeviceImpl();
//      external_adm->SetSessionId(1);
//      VoiceEngine* voe = VoiceEngine::Create();
//      VoEBase* base = VoEBase::GetInterface(voe);
//      base->Init(external_adm);
//      int ch = base->CreateChannel();
//      ...
//      base->StartReceive(ch)
//      base->StartPlayout(ch);
//      base->StartSending(ch);
//      ...
//      <== full-duplex audio session with AGC enabled ==>
//      ...
//      base->DeleteChannel(ch);
//      base->Terminate();
//      base->Release();
//      VoiceEngine::Delete(voe);
//   }
//
// webrtc::VoiceEngine::Init() calls these ADM methods (in this order):
//
//  RegisterAudioCallback(this)
//    webrtc::VoiceEngine is an webrtc::AudioTransport implementation and
//    implements the RecordedDataIsAvailable() and NeedMorePlayData() callbacks.
//
//  Init()
//    Creates and initializes the AudioDevice and AudioInputDevice objects.
//
//  SetAGC(true)
//    Enables the adaptive analog mode of the AGC which ensures that a
//    suitable microphone volume level will be set. This scheme will affect
//    the actual microphone control slider.
//
// Media example:
//
// When the underlying audio layer wants data samples to be played out, the
// AudioDevice::RenderCallback() will be called, which in turn uses the
// registered webrtc::AudioTransport callback and gets the data to be played
// out from the webrtc::VoiceEngine.
//
// The picture below illustrates the media flow on the capture side where the
// AudioInputDevice client acts as link between the renderer and browser
// process:
//
//                   .------------------.            .----------------------.
// (Native audio) => | AudioInputStream |-> OnData ->| AudioInputController |-.
//                   .------------------.            .----------------------. |
//                                                                            |
//                               browser process                              |
//   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - (*)
//                               renderer process                             |
//                                                                            |
//      .-------------------------------.             .------------------.    |
//  .---|    WebRtcAudioDeviceImpl      |<- Capture <-| AudioInputDevice | <--.
//  |   .-------------------------------.             .------------------.
//  |
//  |                             .---------------------.
//  .-> RecordedDataIsAvailable ->| webrtc::VoiceEngine | => (encode+transmit)
//                                .---------------------.
//
//  (*) Using SyncSocket for inter-process synchronization with low latency.
//      The actual data is transferred via SharedMemory. IPC is not involved
//      in the actual media transfer.
//
// AGC overview:
//
// It aims to maintain a constant speech loudness level from the microphone.
// This is done by both controlling the analog microphone gain and applying
// digital gain. The microphone gain on the sound card is slowly
// increased/decreased during speech only. By observing the microphone control
// slider you can see it move when you speak. If you scream, the slider moves
// downwards and then upwards again when you return to normal. It is not
// uncommon that the slider hits the maximum. This means that the maximum
// analog gain is not large enough to give the desired loudness. Nevertheless,
// we can in general still attain the desired loudness. If the microphone
// control slider is moved manually, the gain adaptation restarts and returns
// to roughly the same position as before the change if the circumstances are
// still the same. When the input microphone signal causes saturation, the
// level is decreased dramatically and has to re-adapt towards the old level.
// The adaptation is a slowly varying process and at the beginning of capture
// this is noticed by a slow increase in volume. Smaller changes in microphone
// input level is leveled out by the built-in digital control. For larger
// differences we need to rely on the slow adaptation.
// See http://en.wikipedia.org/wiki/Automatic_gain_control for more details.
//
// AGC implementation details:
//
// The adaptive analog mode of the AGC is always enabled for desktop platforms
// in WebRTC.
//
// Before recording starts, the ADM sets an AGC state in the
// AudioInputDevice by calling AudioInputDevice::SetAutomaticGainControl(true).
//
// A capture session with AGC is started up as follows (simplified):
//
//                            [renderer]
//                                |
//                     ADM::StartRecording()
//             AudioInputDevice::InitializeOnIOThread()
//           AudioInputHostMsg_CreateStream(..., agc=true)               [IPC]
//                                |
//                       [IPC to the browser]
//                                |
//              AudioInputRendererHost::OnCreateStream()
//              AudioInputController::CreateLowLatency()
//         AudioInputController::DoSetAutomaticGainControl(true)
//            AudioInputStream::SetAutomaticGainControl(true)
//                                |
// AGC is now enabled in the media layer and streaming starts (details omitted).
// The figure below illustrates the AGC scheme which is active in combination
// with the default media flow explained earlier.
//                                |
//                            [browser]
//                                |
//                AudioInputStream::(Capture thread loop)
//   AudioInputStreamImpl::QueryAgcVolume() => new volume once per second
//                 AudioInputData::OnData(..., volume)
//              AudioInputController::OnData(..., volume)
//               AudioInputSyncWriter::Write(..., volume)
//                                |
//      [volume | size | data] is sent to the renderer         [shared memory]
//                                |
//                            [renderer]
//                                |
//          AudioInputDevice::AudioThreadCallback::Process()
//            WebRtcAudioDeviceImpl::Capture(..., volume)
//    AudioTransport::RecordedDataIsAvailable(...,volume, new_volume)
//                                |
// The AGC now uses the current volume input and computes a suitable new
// level given by the |new_level| output. This value is only non-zero if the
// AGC has take a decision that the microphone level should change.
//                                |
//                      if (new_volume != 0)
//              AudioInputDevice::SetVolume(new_volume)
//              AudioInputHostMsg_SetVolume(new_volume)                  [IPC]
//                                |
//                       [IPC to the browser]
//                                |
//                 AudioInputRendererHost::OnSetVolume()
//                  AudioInputController::SetVolume()
//             AudioInputStream::SetVolume(scaled_volume)
//                                |
// Here we set the new microphone level in the media layer and at the same time
// read the new setting (we might not get exactly what is set).
//                                |
//             AudioInputData::OnData(..., updated_volume)
//           AudioInputController::OnData(..., updated_volume)
//                                |
//                                |
// This process repeats until we stop capturing data. Note that, a common
// steady state is that the volume control reaches its max and the new_volume
// value from the AGC is zero. A loud voice input is required to break this
// state and start lowering the level again.
//
// Implementation notes:
//
//  - This class must be created on the main render thread.
//  - The webrtc::AudioDeviceModule is reference counted.
//  - AGC is only supported in combination with the WASAPI-based audio layer
//    on Windows, i.e., it is not supported on Windows XP.
//  - All volume levels required for the AGC scheme are transfered in a
//    normalized range [0.0, 1.0]. Scaling takes place in both endpoints
//    (WebRTC client a media layer). This approach ensures that we can avoid
//    transferring maximum levels between the renderer and the browser.
//
class CONTENT_EXPORT WebRtcAudioDeviceImpl
    : NON_EXPORTED_BASE(public webrtc::AudioDeviceModule),
      public media::AudioRendererSink::RenderCallback,
      public AudioInputDevice::CaptureCallback,
      public AudioInputDevice::CaptureEventHandler {
 public:
  // Methods called on main render thread.
  WebRtcAudioDeviceImpl();

  // webrtc::RefCountedModule implementation.
  // The creator must call AddRef() after construction and use Release()
  // to release the reference and delete this object.
  virtual int32_t AddRef() OVERRIDE;
  virtual int32_t Release() OVERRIDE;

  // We need this one to support runnable method tasks.
  static bool ImplementsThreadSafeReferenceCounting() { return true; }

  // AudioDevice::RenderCallback implementation.
  virtual size_t Render(const std::vector<float*>& audio_data,
                        size_t number_of_frames,
                        size_t audio_delay_milliseconds) OVERRIDE;
  virtual void OnRenderError() OVERRIDE;

  // AudioInputDevice::CaptureCallback implementation.
  virtual void Capture(const std::vector<float*>& audio_data,
                       size_t number_of_frames,
                       size_t audio_delay_milliseconds,
                       double volume) OVERRIDE;
  virtual void OnCaptureError() OVERRIDE;

  // AudioInputDevice::CaptureEventHandler implementation.
  virtual void OnDeviceStarted(const std::string& device_id) OVERRIDE;
  virtual void OnDeviceStopped() OVERRIDE;

  // webrtc::Module implementation.
  virtual int32_t ChangeUniqueId(const int32_t id) OVERRIDE;
  virtual int32_t TimeUntilNextProcess() OVERRIDE;
  virtual int32_t Process() OVERRIDE;

  // webrtc::AudioDeviceModule implementation.
  virtual int32_t ActiveAudioLayer(AudioLayer* audio_layer) const OVERRIDE;
  virtual ErrorCode LastError() const OVERRIDE;

  virtual int32_t RegisterEventObserver(
      webrtc::AudioDeviceObserver* event_callback) OVERRIDE;
  virtual int32_t RegisterAudioCallback(webrtc::AudioTransport* audio_callback)
      OVERRIDE;

  virtual int32_t Init() OVERRIDE;
  virtual int32_t Terminate() OVERRIDE;
  virtual bool Initialized() const OVERRIDE;

  virtual int16_t PlayoutDevices() OVERRIDE;
  virtual int16_t RecordingDevices() OVERRIDE;
  virtual int32_t PlayoutDeviceName(uint16_t index,
                                    char name[webrtc::kAdmMaxDeviceNameSize],
                                    char guid[webrtc::kAdmMaxGuidSize])
                                    OVERRIDE;
  virtual int32_t RecordingDeviceName(uint16_t index,
                                      char name[webrtc::kAdmMaxDeviceNameSize],
                                      char guid[webrtc::kAdmMaxGuidSize])
                                      OVERRIDE;
  virtual int32_t SetPlayoutDevice(uint16_t index) OVERRIDE;
  virtual int32_t SetPlayoutDevice(WindowsDeviceType device) OVERRIDE;
  virtual int32_t SetRecordingDevice(uint16_t index) OVERRIDE;
  virtual int32_t SetRecordingDevice(WindowsDeviceType device) OVERRIDE;

  virtual int32_t PlayoutIsAvailable(bool* available) OVERRIDE;
  virtual int32_t InitPlayout() OVERRIDE;
  virtual bool PlayoutIsInitialized() const OVERRIDE;
  virtual int32_t RecordingIsAvailable(bool* available) OVERRIDE;
  virtual int32_t InitRecording() OVERRIDE;
  virtual bool RecordingIsInitialized() const OVERRIDE;

  virtual int32_t StartPlayout() OVERRIDE;
  virtual int32_t StopPlayout() OVERRIDE;
  virtual bool Playing() const OVERRIDE;
  virtual int32_t StartRecording() OVERRIDE;
  virtual int32_t StopRecording() OVERRIDE;
  virtual bool Recording() const OVERRIDE;

  virtual int32_t SetAGC(bool enable) OVERRIDE;
  virtual bool AGC() const OVERRIDE;

  virtual int32_t SetWaveOutVolume(uint16_t volume_left,
                                   uint16_t volume_right) OVERRIDE;
  virtual int32_t WaveOutVolume(uint16_t* volume_left,
                                uint16_t* volume_right) const OVERRIDE;

  virtual int32_t SpeakerIsAvailable(bool* available) OVERRIDE;
  virtual int32_t InitSpeaker() OVERRIDE;
  virtual bool SpeakerIsInitialized() const OVERRIDE;
  virtual int32_t MicrophoneIsAvailable(bool* available) OVERRIDE;
  virtual int32_t InitMicrophone() OVERRIDE;
  virtual bool MicrophoneIsInitialized() const OVERRIDE;

  virtual int32_t SpeakerVolumeIsAvailable(bool* available) OVERRIDE;
  virtual int32_t SetSpeakerVolume(uint32_t volume) OVERRIDE;
  virtual int32_t SpeakerVolume(uint32_t* volume) const OVERRIDE;
  virtual int32_t MaxSpeakerVolume(uint32_t* max_volume) const OVERRIDE;
  virtual int32_t MinSpeakerVolume(uint32_t* min_volume) const OVERRIDE;
  virtual int32_t SpeakerVolumeStepSize(uint16_t* step_size) const OVERRIDE;

  virtual int32_t MicrophoneVolumeIsAvailable(bool* available) OVERRIDE;
  virtual int32_t SetMicrophoneVolume(uint32_t volume) OVERRIDE;
  virtual int32_t MicrophoneVolume(uint32_t* volume) const OVERRIDE;
  virtual int32_t MaxMicrophoneVolume(uint32_t* max_volume) const OVERRIDE;
  virtual int32_t MinMicrophoneVolume(uint32_t* min_volume) const OVERRIDE;
  virtual int32_t MicrophoneVolumeStepSize(uint16_t* step_size) const OVERRIDE;

  virtual int32_t SpeakerMuteIsAvailable(bool* available) OVERRIDE;
  virtual int32_t SetSpeakerMute(bool enable) OVERRIDE;
  virtual int32_t SpeakerMute(bool* enabled) const OVERRIDE;

  virtual int32_t MicrophoneMuteIsAvailable(bool* available) OVERRIDE;
  virtual int32_t SetMicrophoneMute(bool enable) OVERRIDE;
  virtual int32_t MicrophoneMute(bool* enabled) const OVERRIDE;

  virtual int32_t MicrophoneBoostIsAvailable(bool* available) OVERRIDE;
  virtual int32_t SetMicrophoneBoost(bool enable) OVERRIDE;
  virtual int32_t MicrophoneBoost(bool* enabled) const OVERRIDE;

  virtual int32_t StereoPlayoutIsAvailable(bool* available) const OVERRIDE;
  virtual int32_t SetStereoPlayout(bool enable) OVERRIDE;
  virtual int32_t StereoPlayout(bool* enabled) const OVERRIDE;
  virtual int32_t StereoRecordingIsAvailable(bool* available) const OVERRIDE;
  virtual int32_t SetStereoRecording(bool enable) OVERRIDE;
  virtual int32_t StereoRecording(bool* enabled) const OVERRIDE;
  virtual int32_t SetRecordingChannel(const ChannelType channel) OVERRIDE;
  virtual int32_t RecordingChannel(ChannelType* channel) const OVERRIDE;

  virtual int32_t SetPlayoutBuffer(
      const BufferType type, uint16_t size_ms) OVERRIDE;
  virtual int32_t PlayoutBuffer(
      BufferType* type, uint16_t* size_ms) const OVERRIDE;
  virtual int32_t PlayoutDelay(uint16_t* delay_ms) const OVERRIDE;
  virtual int32_t RecordingDelay(uint16_t* delay_ms) const OVERRIDE;

  virtual int32_t CPULoad(uint16_t* load) const OVERRIDE;

  virtual int32_t StartRawOutputFileRecording(
      const char pcm_file_name_utf8[webrtc::kAdmMaxFileNameSize]) OVERRIDE;
  virtual int32_t StopRawOutputFileRecording() OVERRIDE;
  virtual int32_t StartRawInputFileRecording(
      const char pcm_file_name_utf8[webrtc::kAdmMaxFileNameSize]) OVERRIDE;
  virtual int32_t StopRawInputFileRecording() OVERRIDE;

  virtual int32_t SetRecordingSampleRate(
      const uint32_t samples_per_sec) OVERRIDE;
  virtual int32_t RecordingSampleRate(uint32_t* samples_per_sec) const OVERRIDE;
  virtual int32_t SetPlayoutSampleRate(const uint32_t samples_per_sec) OVERRIDE;
  virtual int32_t PlayoutSampleRate(uint32_t* samples_per_sec) const OVERRIDE;

  virtual int32_t ResetAudioDevice() OVERRIDE;
  virtual int32_t SetLoudspeakerStatus(bool enable) OVERRIDE;
  virtual int32_t GetLoudspeakerStatus(bool* enabled) const OVERRIDE;

  // Sets the session id.
  void SetSessionId(int session_id);

  // Accessors.
  size_t input_buffer_size() const {
    return input_audio_parameters_.frames_per_buffer();
  }
  size_t output_buffer_size() const {
    return output_audio_parameters_.frames_per_buffer();
  }
  int input_channels() const {
    return input_audio_parameters_.channels();
  }
  int output_channels() const {
    return output_audio_parameters_.channels();
  }
  int input_sample_rate() const {
    return input_audio_parameters_.sample_rate();
  }
  int output_sample_rate() const {
    return output_audio_parameters_.sample_rate();
  }
  int input_delay_ms() const { return input_delay_ms_; }
  int output_delay_ms() const { return output_delay_ms_; }
  bool initialized() const { return initialized_; }
  bool playing() const { return playing_; }
  bool recording() const { return recording_; }

 private:
  // Make destructor private to ensure that we can only be deleted by Release().
  virtual ~WebRtcAudioDeviceImpl();

  // Methods called on the main render thread ----------------------------------
  // The following methods are tasks posted on the render thread that needs to
  // be executed on that thread.
  void InitOnRenderThread(int32_t* error, base::WaitableEvent* event);

  int ref_count_;

  // Gives access to the message loop of the render thread on which this
  // object is created.
  scoped_refptr<base::MessageLoopProxy> render_loop_;

  // Provides access to the native audio input layer in the browser process.
  scoped_refptr<AudioInputDevice> audio_input_device_;

  // Provides access to the native audio output layer in the browser process.
  scoped_refptr<AudioDevice> audio_output_device_;

  // Weak reference to the audio callback.
  // The webrtc client defines |audio_transport_callback_| by calling
  // RegisterAudioCallback().
  webrtc::AudioTransport* audio_transport_callback_;

  // Cached values of utilized audio parameters. Platform dependent.
  media::AudioParameters input_audio_parameters_;
  media::AudioParameters output_audio_parameters_;

  // Cached value of the current audio delay on the input/capture side.
  int input_delay_ms_;

  // Cached value of the current audio delay on the output/renderer side.
  int output_delay_ms_;

  // Buffers used for temporary storage during capture/render callbacks.
  // Allocated during initialization to save stack.
  scoped_array<int16> input_buffer_;
  scoped_array<int16> output_buffer_;

  webrtc::AudioDeviceModule::ErrorCode last_error_;

  base::TimeTicks last_process_time_;

  // Id of the media session to be started, it tells which device to be used
  // on the input/capture side.
  int session_id_;

  // Protects |recording_|, |output_delay_ms_|, |input_delay_ms_|.
  mutable base::Lock lock_;

  int bytes_per_sample_;

  bool initialized_;
  bool playing_;
  bool recording_;

  // Local copy of the current Automatic Gain Control state.
  bool agc_is_enabled_;

  DISALLOW_COPY_AND_ASSIGN(WebRtcAudioDeviceImpl);
};

#endif  // CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_