Introduced experimental support for interacting with the google remote streaming speech recognition webservice (Speech CL2.3).

The support is very experimental by now and has a lot of debugging code for helping the development. BUG=116954 TEST=content_unittests:GoogleStreamingRemoteEngineTest Review URL: https://chromiumcodereview.appspot.com/10546020 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@143616 0039d316-1c4b-4281-b951-d872f2087c98
author: primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-06-22 16:57:14 +0000
committer: primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-06-22 16:57:14 +0000
commit: c766aa9d160a4da403ff48e62c7f1a5dccdb3421 (patch)
tree: e3deca24d0149086b59a549bf4e87d229f03b88e /content/browser/speech/google_streaming_remote_engine.h
parent: 03a94ccc9e67b37d2871290b73609c615e95f61f (diff)
download: chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.zip
chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.tar.gz
chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.tar.bz2
1 files changed, 165 insertions, 0 deletions
diff --git a/content/browser/speech/google_streaming_remote_engine.h b/content/browser/speech/google_streaming_remote_engine.h
new file mode 100644
index 0000000..fde4957
--- /dev/null
+++ b/content/browser/speech/google_streaming_remote_engine.h
@@ -0,0 +1,165 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_
+#define CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/memory/ref_counted.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/threading/non_thread_safe.h"
+#include "content/browser/speech/audio_encoder.h"
+#include "content/browser/speech/chunked_byte_buffer.h"
+#include "content/browser/speech/speech_recognition_engine.h"
+#include "content/common/content_export.h"
+#include "content/public/common/speech_recognition_error.h"
+#include "googleurl/src/gurl.h"
+#include "net/url_request/url_fetcher_delegate.h"
+
+namespace content {
+struct SpeechRecognitionError;
+struct SpeechRecognitionResult;
+}
+
+namespace net {
+class URLRequestContextGetter;
+}
+
+namespace speech {
+
+class AudioChunk;
+
+// Implements a SpeechRecognitionEngine supporting continuous recognition by
+// means of interaction with Google streaming speech recognition webservice.
+// More in details, this class establishes two HTTP(S) connections with the
+// webservice, for each session, herein called "upstream" and "downstream".
+// Audio chunks are sent on the upstream by means of a chunked HTTP POST upload.
+// Recognition results are retrieved in a full-duplex fashion (i.e. while
+// pushing audio on the upstream) on the downstream by means of a chunked
+// HTTP GET request. Pairing between the two stream is handled through a
+// randomly generated key, unique for each request, which is passed in the
+// &pair= arg to both stream request URLs.
+// In the case of a regular session, the upstream is closed when the audio
+// capture ends (notified through a |AudioChunksEnded| call) and the downstream
+// waits for a corresponding server closure (eventually some late results can
+// come after closing the upstream).
+// Both stream are guaranteed to be closed when |EndRecognition| call is issued.
+class CONTENT_EXPORT GoogleStreamingRemoteEngine
+    : public NON_EXPORTED_BASE(SpeechRecognitionEngine),
+      public net::URLFetcherDelegate,
+      public NON_EXPORTED_BASE(base::NonThreadSafe) {
+ public:
+  explicit GoogleStreamingRemoteEngine(net::URLRequestContextGetter* context);
+  virtual ~GoogleStreamingRemoteEngine();
+
+  // SpeechRecognitionEngine methods.
+  virtual void SetConfig(const SpeechRecognitionEngineConfig& config) OVERRIDE;
+  virtual void StartRecognition() OVERRIDE;
+  virtual void EndRecognition() OVERRIDE;
+  virtual void TakeAudioChunk(const AudioChunk& data) OVERRIDE;
+  virtual void AudioChunksEnded() OVERRIDE;
+  virtual bool IsRecognitionPending() const OVERRIDE;
+  virtual int GetDesiredAudioChunkDurationMs() const OVERRIDE;
+
+  // net::URLFetcherDelegate methods.
+  virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
+  virtual void OnURLFetchDownloadProgress(const net::URLFetcher* source,
+                                          int64 current, int64 total) OVERRIDE;
+
+ private:
+  friend class GoogleStreamingRemoteEngineTest;
+
+  // IDs passed to URLFetcher::Create(). Used for testing.
+  static const int kUpstreamUrlFetcherIdForTests;
+  static const int kDownstreamUrlFetcherIdForTests;
+
+  // Response status codes from the speech recognition webservice.
+  static const int kWebserviceStatusNoError;
+  static const int kWebserviceStatusErrorNoMatch;
+
+  // Data types for the internal Finite State Machine (FSM).
+  enum FSMState {
+    STATE_IDLE = 0,
+    STATE_BOTH_STREAMS_CONNECTED,
+    STATE_WAITING_DOWNSTREAM_RESULTS,
+    STATE_MAX_VALUE = STATE_WAITING_DOWNSTREAM_RESULTS
+  };
+
+  enum FSMEvent {
+    EVENT_END_RECOGNITION = 0,
+    EVENT_START_RECOGNITION,
+    EVENT_AUDIO_CHUNK,
+    EVENT_AUDIO_CHUNKS_ENDED,
+    EVENT_UPSTREAM_ERROR,
+    EVENT_DOWNSTREAM_ERROR,
+    EVENT_DOWNSTREAM_RESPONSE,
+    EVENT_DOWNSTREAM_CLOSED,
+    EVENT_MAX_VALUE = EVENT_DOWNSTREAM_CLOSED
+  };
+
+  struct FSMEventArgs {
+    explicit FSMEventArgs(FSMEvent event_value);
+    ~FSMEventArgs();
+
+    FSMEvent event;
+
+    // In case of EVENT_AUDIO_CHUNK, holds the chunk pushed by |TakeAudioChunk|.
+    scoped_refptr<const AudioChunk> audio_data;
+
+    // In case of EVENT_DOWNSTREAM_RESPONSE, hold the current chunk bytes.
+    scoped_ptr<std::vector<uint8> > response;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(FSMEventArgs);
+  };
+
+  // Invoked by both upstream and downstream URLFetcher callbacks to handle
+  // new chunk data, connection closed or errors notifications.
+  void DispatchHTTPResponse(const net::URLFetcher* source,
+                            bool end_of_response);
+
+  // Entry point for pushing any new external event into the recognizer FSM.
+  void DispatchEvent(const FSMEventArgs& event_args);
+
+  // Defines the behavior of the recognizer FSM, selecting the appropriate
+  // transition according to the current state and event.
+  FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& event_args);
+
+  // The methods below handle transitions of the recognizer FSM.
+  FSMState ConnectBothStreams(const FSMEventArgs& event_args);
+  FSMState TransmitAudioUpstream(const FSMEventArgs& event_args);
+  FSMState ProcessDownstreamResponse(const FSMEventArgs& event_args);
+  FSMState RaiseNoMatchErrorIfGotNoResults(const FSMEventArgs& event_args);
+  FSMState CloseUpstreamAndWaitForResults(const FSMEventArgs& event_args);
+  FSMState CloseDownstream(const FSMEventArgs& event_args);
+  FSMState AbortSilently(const FSMEventArgs& event_args);
+  FSMState AbortWithError(const FSMEventArgs& event_args);
+  FSMState Abort(content::SpeechRecognitionErrorCode error);
+  FSMState DoNothing(const FSMEventArgs& event_args);
+  FSMState NotFeasible(const FSMEventArgs& event_args);
+
+  std::string GetAcceptedLanguages() const;
+  std::string GenerateRequestKey() const;
+
+  SpeechRecognitionEngineConfig config_;
+  scoped_ptr<net::URLFetcher> upstream_fetcher_;
+  scoped_ptr<net::URLFetcher> downstream_fetcher_;
+  scoped_refptr<net::URLRequestContextGetter> url_context_;
+  scoped_ptr<AudioEncoder> encoder_;
+  ChunkedByteBuffer chunked_byte_buffer_;
+  size_t previous_response_length_;
+  bool got_last_definitive_result_;
+  bool is_dispatching_event_;
+  FSMState state_;
+
+  DISALLOW_COPY_AND_ASSIGN(GoogleStreamingRemoteEngine);
+};
+
+}  // namespace speech
+
+#endif  // CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_
author	primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-06-22 16:57:14 +0000
committer	primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-06-22 16:57:14 +0000
commit	c766aa9d160a4da403ff48e62c7f1a5dccdb3421 (patch)
tree	e3deca24d0149086b59a549bf4e87d229f03b88e /content/browser/speech/google_streaming_remote_engine.h
parent	03a94ccc9e67b37d2871290b73609c615e95f61f (diff)
download	chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.zip chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.tar.gz chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.tar.bz2