blob: d1200b2f29dbf8cda407d345bf15bc145f07d8e8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_
#define CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_
#include "base/callback.h"
#include "base/memory/scoped_ptr.h"
#include "base/memory/shared_memory.h"
#include "base/sync_socket.h"
#include "base/threading/thread_checker.h"
#include "content/common/content_export.h"
#include "content/public/renderer/media_stream_audio_sink.h"
#include "media/audio/audio_parameters.h"
#include "media/base/audio_converter.h"
#include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"
namespace media {
class AudioBus;
class AudioFifo;
}
namespace content {
// SpeechRecognitionAudioSink works as an audio sink to the
// WebRtcLocalAudioTrack. It stores the capture data into a FIFO.
// When the FIFO has enough data for resampling, it converts it,
// passes the buffer to the WebSpeechRecognizer via SharedMemory
// and notifies it via SyncSocket followed by incrementing the |buffer_index_|.
// WebSpeechRecognizer increments the shared buffer index to synchronize.
class CONTENT_EXPORT SpeechRecognitionAudioSink
: NON_EXPORTED_BASE(public media::AudioConverter::InputCallback),
NON_EXPORTED_BASE(public MediaStreamAudioSink) {
public:
typedef base::Callback<void()> OnStoppedCB;
// Socket ownership is transferred to the class via constructor.
SpeechRecognitionAudioSink(const blink::WebMediaStreamTrack& track,
const media::AudioParameters& params,
const base::SharedMemoryHandle memory,
scoped_ptr<base::SyncSocket> socket,
const OnStoppedCB& on_stopped_cb);
~SpeechRecognitionAudioSink() override;
// Returns whether the provided track is supported.
static bool IsSupportedTrack(const blink::WebMediaStreamTrack& track);
private:
// content::MediaStreamAudioSink implementation.
void OnReadyStateChanged(
blink::WebMediaStreamSource::ReadyState state) override;
void OnData(const media::AudioBus& audio_bus,
base::TimeTicks estimated_capture_time) override;
void OnSetFormat(const media::AudioParameters& params) override;
// media::AudioConverter::Inputcallback implementation.
double ProvideInput(media::AudioBus* audio_bus,
base::TimeDelta buffer_delay) override;
// Returns the pointer to the audio input buffer mapped in the shared memory.
media::AudioInputBuffer* GetAudioInputBuffer() const;
// Number of frames per buffer in FIFO. When the buffer is full we convert and
// consume it on the |output_bus_|. Size of the buffer depends on the
// resampler. Example: for 44.1 to 16.0 conversion, it should be 4100 frames.
int fifo_buffer_size_;
// Used to DCHECK that some methods are called on the main render thread.
base::ThreadChecker main_render_thread_checker_;
// Used to DCHECK that some methods are called on the capture audio thread.
base::ThreadChecker capture_thread_checker_;
// The audio track that this audio sink is connected to.
const blink::WebMediaStreamTrack track_;
// Shared memory used by audio buses on both browser and renderer processes.
base::SharedMemory shared_memory_;
// Socket for synchronization of audio bus reads/writes.
// Created on the renderer client and passed here. Accessed on capture thread.
scoped_ptr<base::SyncSocket> socket_;
// Used as a resampler to deliver appropriate format to speech recognition.
scoped_ptr<media::AudioConverter> audio_converter_;
// FIFO is used for queuing audio frames before we resample.
scoped_ptr<media::AudioFifo> fifo_;
// Audio bus shared with the browser process via |shared_memory_|.
scoped_ptr<media::AudioBus> output_bus_;
// Params of the source audio. Can change when |OnSetFormat()| occurs.
media::AudioParameters input_params_;
// Params used by speech recognition.
const media::AudioParameters output_params_;
// Whether the track has been stopped.
bool track_stopped_;
// Local counter of audio buffers for synchronization.
uint32 buffer_index_;
// Callback for the renderer client. Called when the audio track was stopped.
const OnStoppedCB on_stopped_cb_;
DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionAudioSink);
};
} // namespace content
#endif // CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_
|