Introduced experimental support for interacting with the google remote streaming speech recognition webservice (Speech CL2.3).

The support is very experimental by now and has a lot of debugging code for helping the development. BUG=116954 TEST=content_unittests:GoogleStreamingRemoteEngineTest Review URL: https://chromiumcodereview.appspot.com/10546020 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@143616 0039d316-1c4b-4281-b951-d872f2087c98
author: primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-06-22 16:57:14 +0000
committer: primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-06-22 16:57:14 +0000
commit: c766aa9d160a4da403ff48e62c7f1a5dccdb3421 (patch)
tree: e3deca24d0149086b59a549bf4e87d229f03b88e
parent: 03a94ccc9e67b37d2871290b73609c615e95f61f (diff)
download: chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.zip
chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.tar.gz
chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.tar.bz2
17 files changed, 1304 insertions, 36 deletions
diff --git a/content/browser/speech/chunked_byte_buffer.cc b/content/browser/speech/chunked_byte_buffer.cc
index 123755a..115fa82 100644
--- a/content/browser/speech/chunked_byte_buffer.cc
+++ b/content/browser/speech/chunked_byte_buffer.cc
@@ -38,7 +38,6 @@ ChunkedByteBuffer::~ChunkedByteBuffer() {
 }
 
 void ChunkedByteBuffer::Append(const uint8* start, size_t length) {
-  DCHECK(length > 0);
   size_t remaining_bytes = length;
   const uint8* next_data = start;
 
diff --git a/content/browser/speech/google_streaming_remote_engine.cc b/content/browser/speech/google_streaming_remote_engine.cc
new file mode 100644
index 0000000..34ff853
--- /dev/null
+++ b/content/browser/speech/google_streaming_remote_engine.cc
@@ -0,0 +1,586 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/browser/speech/google_streaming_remote_engine.h"
+
+#include <vector>
+
+#include "base/bind.h"
+#include "base/command_line.h"
+#include "base/rand_util.h"
+#include "base/string_number_conversions.h"
+#include "base/string_util.h"
+#include "base/time.h"
+#include "base/utf_string_conversions.h"
+#include "content/browser/speech/audio_buffer.h"
+#include "content/browser/speech/proto/google_streaming_api.pb.h"
+#include "content/public/browser/browser_thread.h"
+#include "content/public/common/content_switches.h"
+#include "content/public/common/speech_recognition_error.h"
+#include "content/public/common/speech_recognition_result.h"
+#include "net/base/escape.h"
+#include "net/base/load_flags.h"
+#include "net/url_request/url_fetcher.h"
+#include "net/url_request/url_request_context.h"
+#include "net/url_request/url_request_context_getter.h"
+#include "net/url_request/url_request_status.h"
+
+using content::BrowserThread;
+using content::SpeechRecognitionError;
+using content::SpeechRecognitionErrorCode;
+using content::SpeechRecognitionHypothesis;
+using content::SpeechRecognitionResult;
+using net::URLFetcher;
+
+namespace {
+
+// TODO(primiano): This shouldn't be a const, rather it should be taken from
+// maxNBest property (which is not yet implemented in WebKit).
+const int kMaxResults = 5;
+const char kDownstreamUrl[] = "/down?";
+const char kUpstreamUrl[] = "/up?";
+const int kAudioPacketIntervalMs = 100;
+const speech::AudioEncoder::Codec kDefaultAudioCodec =
+    speech::AudioEncoder::CODEC_FLAC;
+
+// TODO(primiano): /////////// Remove this after debug stage. /////////////////
+void DumpResponse(const std::string& response) {
+  bool parse_ok;
+  speech::HttpStreamingResult res;
+  parse_ok = res.ParseFromString(response);
+  DVLOG(1) << "------------";
+  if(!parse_ok) {
+    DVLOG(1) << "Parse failed!";
+    return;
+  }
+  if (res.has_id())
+    DVLOG(1) << "ID\t" << res.id();
+  if (res.has_status())
+    DVLOG(1) << "STATUS\t" << res.status();
+  if (res.has_upstream_connected())
+    DVLOG(1) << "UPCON\t" << res.upstream_connected();
+  if (res.hypotheses_size() > 0)
+    DVLOG(1) << "HYPS\t" << res.hypotheses_size();
+  if (res.has_provisional())
+    DVLOG(1) << "PROV\t" << res.provisional();
+  if (res.has_ephemeral())
+    DVLOG(1) << "EPHM\t" << res.ephemeral();
+  DVLOG(1) << "------------\n";
+}
+
+std::string GetWebserviceBaseURL() {
+  const CommandLine& command_line = *CommandLine::ForCurrentProcess();
+  return command_line.GetSwitchValueASCII(
+      switches::kSpeechRecognitionWebserviceURL);
+}
+
+std::string GetWebserviceKey() {
+  const CommandLine& command_line = *CommandLine::ForCurrentProcess();
+  return command_line.GetSwitchValueASCII(
+      switches::kSpeechRecognitionWebserviceKey);
+}
+
+}  // namespace
+
+namespace speech {
+
+const int GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTests = 0;
+const int GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTests = 1;
+const int GoogleStreamingRemoteEngine::kWebserviceStatusNoError = 0;
+const int GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch = 5;
+
+GoogleStreamingRemoteEngine::GoogleStreamingRemoteEngine(
+    net::URLRequestContextGetter* context)
+    : url_context_(context),
+      encoder_(NULL),
+      previous_response_length_(0),
+      got_last_definitive_result_(false),
+      is_dispatching_event_(false),
+      state_(STATE_IDLE) {
+}
+
+GoogleStreamingRemoteEngine::~GoogleStreamingRemoteEngine() {}
+
+void GoogleStreamingRemoteEngine::SetConfig(
+    const SpeechRecognitionEngineConfig& config) {
+  config_ = config;
+}
+
+void GoogleStreamingRemoteEngine::StartRecognition() {
+  FSMEventArgs event_args(EVENT_START_RECOGNITION);
+  DispatchEvent(event_args);
+}
+
+void GoogleStreamingRemoteEngine::EndRecognition() {
+  FSMEventArgs event_args(EVENT_END_RECOGNITION);
+  DispatchEvent(event_args);
+}
+
+void GoogleStreamingRemoteEngine::TakeAudioChunk(const AudioChunk& data) {
+  FSMEventArgs event_args(EVENT_AUDIO_CHUNK);
+  event_args.audio_data = &data;
+  DispatchEvent(event_args);
+}
+
+void GoogleStreamingRemoteEngine::AudioChunksEnded() {
+  FSMEventArgs event_args(EVENT_AUDIO_CHUNKS_ENDED);
+  DispatchEvent(event_args);
+}
+
+void GoogleStreamingRemoteEngine::OnURLFetchComplete(const URLFetcher* source) {
+  const bool kResponseComplete = true;
+  DispatchHTTPResponse(source, kResponseComplete);
+}
+
+void GoogleStreamingRemoteEngine::OnURLFetchDownloadProgress(
+    const URLFetcher* source, int64 current, int64 total) {
+  const bool kPartialResponse = false;
+  DispatchHTTPResponse(source, kPartialResponse);
+}
+
+void GoogleStreamingRemoteEngine::DispatchHTTPResponse(const URLFetcher* source,
+                                                       bool end_of_response) {
+  DCHECK(CalledOnValidThread());
+  DCHECK(source);
+  const bool response_is_good = source->GetStatus().is_success() &&
+                                source->GetResponseCode() == 200;
+  std::string response;
+  if (response_is_good)
+    source->GetResponseAsString(&response);
+  const size_t current_response_length = response.size();
+
+  // TODO(primiano): /////////// Remove this after debug stage. ////////////////
+  DVLOG(1) << (source == downstream_fetcher_.get() ? "Downstream" : "Upstream")
+           << "HTTP, code: " << source->GetResponseCode()
+           << "      length: " << current_response_length
+           << "      eor: " << end_of_response;
+
+  // URLFetcher provides always the entire response buffer, but we are only
+  // interested in the fresh data introduced by the last chunk. Therefore, we
+  // drop the previous content we have already processed.
+  if (current_response_length != 0) {
+    DCHECK_GE(current_response_length, previous_response_length_);
+    response.erase(0, previous_response_length_);
+    previous_response_length_ = current_response_length;
+  }
+
+  if (!response_is_good && source == downstream_fetcher_.get()) {
+    // TODO(primiano): /////////// Remove this after debug stage. /////////////
+    DVLOG(1) << "Downstream error " << source->GetResponseCode();
+    FSMEventArgs event_args(EVENT_DOWNSTREAM_ERROR);
+    DispatchEvent(event_args);
+    return;
+  }
+  if (!response_is_good && source == upstream_fetcher_.get()) {
+    // TODO(primiano): /////////// Remove this after debug stage. /////////////
+    DVLOG(1) << "Upstream error " << source->GetResponseCode()
+             << " EOR " << end_of_response;
+    FSMEventArgs event_args(EVENT_UPSTREAM_ERROR);
+    DispatchEvent(event_args);
+    return;
+  }
+  if (source == upstream_fetcher_.get())
+    return;
+
+  DCHECK(response_is_good && source == downstream_fetcher_.get());
+
+  // The downstream response is organized in chunks, whose size is determined
+  // by a 4 bytes prefix, transparently handled by the ChunkedByteBuffer class.
+  // Such chunks are sent by the speech recognition webservice over the HTTP
+  // downstream channel using HTTP chunked transfer (unrelated to our chunks).
+  // This function is called every time an HTTP chunk is received by the
+  // url fetcher. However there isn't any particular matching beween our
+  // protocol chunks and HTTP chunks, in the sense that a single HTTP chunk can
+  // contain a portion of one chunk or even more chunks together.
+  chunked_byte_buffer_.Append(response);
+
+  // A single HTTP chunk can contain more than one data chunk, thus the while.
+  while (chunked_byte_buffer_.HasChunks()) {
+    FSMEventArgs event_args(EVENT_DOWNSTREAM_RESPONSE);
+    event_args.response = chunked_byte_buffer_.PopChunk();
+    DCHECK(event_args.response.get());
+    // TODO(primiano): /////////// Remove this after debug stage. /////////////
+    DumpResponse(std::string((char*)&(*event_args.response->begin()),
+                             event_args.response->size()));
+    DispatchEvent(event_args);
+  }
+  if (end_of_response) {
+    FSMEventArgs event_args(EVENT_DOWNSTREAM_CLOSED);
+    DispatchEvent(event_args);
+  }
+}
+
+bool GoogleStreamingRemoteEngine::IsRecognitionPending() const {
+  DCHECK(CalledOnValidThread());
+  return state_ != STATE_IDLE;
+}
+
+int GoogleStreamingRemoteEngine::GetDesiredAudioChunkDurationMs() const {
+  return kAudioPacketIntervalMs;
+}
+
+// -----------------------  Core FSM implementation ---------------------------
+
+void GoogleStreamingRemoteEngine::DispatchEvent(
+    const FSMEventArgs& event_args) {
+  DCHECK(CalledOnValidThread());
+  DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
+  DCHECK_LE(state_, STATE_MAX_VALUE);
+
+  // Event dispatching must be sequential, otherwise it will break all the rules
+  // and the assumptions of the finite state automata model.
+  DCHECK(!is_dispatching_event_);
+  is_dispatching_event_ = true;
+
+  // TODO(primiano): /////////// Remove this after debug stage. ////////////////
+  //DVLOG(1) << "State " << state_ << ", Event " << event_args.event;
+  state_ = ExecuteTransitionAndGetNextState(event_args);
+
+  is_dispatching_event_ = false;
+}
+
+GoogleStreamingRemoteEngine::FSMState
+GoogleStreamingRemoteEngine::ExecuteTransitionAndGetNextState(
+    const FSMEventArgs& event_args) {
+  const FSMEvent event = event_args.event;
+  switch (state_) {
+    case STATE_IDLE:
+      switch (event) {
+        case EVENT_START_RECOGNITION:
+          return ConnectBothStreams(event_args);
+        case EVENT_END_RECOGNITION:
+        // Note AUDIO_CHUNK and AUDIO_END events can remain enqueued in case of
+        // abort, so we just silently drop them here.
+        case EVENT_AUDIO_CHUNK:
+        case EVENT_AUDIO_CHUNKS_ENDED:
+        // DOWNSTREAM_CLOSED can be received if we end up here due to an error.
+        case EVENT_DOWNSTREAM_CLOSED:
+          return DoNothing(event_args);
+        case EVENT_UPSTREAM_ERROR:
+        case EVENT_DOWNSTREAM_ERROR:
+        case EVENT_DOWNSTREAM_RESPONSE:
+          return NotFeasible(event_args);
+      }
+      break;
+    case STATE_BOTH_STREAMS_CONNECTED:
+      switch (event) {
+        case EVENT_AUDIO_CHUNK:
+          return TransmitAudioUpstream(event_args);
+        case EVENT_DOWNSTREAM_RESPONSE:
+          return ProcessDownstreamResponse(event_args);
+        case EVENT_AUDIO_CHUNKS_ENDED:
+          return CloseUpstreamAndWaitForResults(event_args);
+        case EVENT_END_RECOGNITION:
+          return AbortSilently(event_args);
+        case EVENT_UPSTREAM_ERROR:
+        case EVENT_DOWNSTREAM_ERROR:
+        case EVENT_DOWNSTREAM_CLOSED:
+          return AbortWithError(event_args);
+        case EVENT_START_RECOGNITION:
+          return NotFeasible(event_args);
+      }
+      break;
+    case STATE_WAITING_DOWNSTREAM_RESULTS:
+      switch (event) {
+        case EVENT_DOWNSTREAM_RESPONSE:
+          return ProcessDownstreamResponse(event_args);
+        case EVENT_DOWNSTREAM_CLOSED:
+          return RaiseNoMatchErrorIfGotNoResults(event_args);
+        case EVENT_END_RECOGNITION:
+          return AbortSilently(event_args);
+        case EVENT_UPSTREAM_ERROR:
+        case EVENT_DOWNSTREAM_ERROR:
+          return AbortWithError(event_args);
+        case EVENT_START_RECOGNITION:
+        case EVENT_AUDIO_CHUNK:
+        case EVENT_AUDIO_CHUNKS_ENDED:
+          return NotFeasible(event_args);
+      }
+      break;
+  }
+  return NotFeasible(event_args);
+}
+
+// ----------- Contract for all the FSM evolution functions below -------------
+//  - Are guaranteed to be executed in the same thread (IO, except for tests);
+//  - Are guaranteed to be not reentrant (themselves and each other);
+//  - event_args members are guaranteed to be stable during the call;
+
+GoogleStreamingRemoteEngine::FSMState
+GoogleStreamingRemoteEngine::ConnectBothStreams(const FSMEventArgs&) {
+  DCHECK(!upstream_fetcher_.get());
+  DCHECK(!downstream_fetcher_.get());
+
+  encoder_.reset(AudioEncoder::Create(kDefaultAudioCodec,
+                                      config_.audio_sample_rate,
+                                      config_.audio_num_bits_per_sample));
+  DCHECK(encoder_.get());
+  const std::string request_key = GenerateRequestKey();
+
+  // Setup downstream fetcher.
+  std::vector<std::string> downstream_args;
+  downstream_args.push_back("sky=" + GetWebserviceKey());
+  downstream_args.push_back("pair=" + request_key);
+  downstream_args.push_back("maxresults=" + base::IntToString(kMaxResults));
+
+  GURL downstream_url(GetWebserviceBaseURL() + std::string(kDownstreamUrl) +
+                      JoinString(downstream_args, '&'));
+  // TODO(primiano): /////////// Remove this after debug stage. /////////////
+  DVLOG(1) << "Opening downstream: " + downstream_url.PathForRequest();
+
+  downstream_fetcher_.reset(URLFetcher::Create(
+      kDownstreamUrlFetcherIdForTests, downstream_url, URLFetcher::GET, this));
+  downstream_fetcher_->SetRequestContext(url_context_);
+  downstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
+                                    net::LOAD_DO_NOT_SEND_COOKIES |
+                                    net::LOAD_DO_NOT_SEND_AUTH_DATA);
+  downstream_fetcher_->Start();
+
+  // Setup upstream fetcher.
+  // TODO(primiano): Handle config_.grammar array when it will be implemented by
+  // the speech recognition webservice.
+  std::vector<std::string> upstream_args;
+  upstream_args.push_back("sky=" + GetWebserviceKey());
+  upstream_args.push_back("pair=" + request_key);
+  upstream_args.push_back(
+      "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));
+  upstream_args.push_back(
+      config_.filter_profanities ? "pfilter=2" : "pfilter=0");
+  upstream_args.push_back("maxresults=" + base::IntToString(kMaxResults));
+  upstream_args.push_back("client=myapp.mycompany.com");
+  // TODO(primiano): Can we remove this feature sending audio HW information?
+  if (!config_.hardware_info.empty()) {
+    upstream_args.push_back(
+        "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));
+  }
+
+  GURL upstream_url(GetWebserviceBaseURL() + std::string(kUpstreamUrl) +
+                    JoinString(upstream_args, '&'));
+
+  // TODO(primiano): /////////// Remove this after debug stage. ////////////////
+  DVLOG(1) << "Opening upstream: " + upstream_url.PathForRequest();
+
+  upstream_fetcher_.reset(URLFetcher::Create(
+      kUpstreamUrlFetcherIdForTests, upstream_url, URLFetcher::POST, this));
+  upstream_fetcher_->SetChunkedUpload(encoder_->mime_type());
+  upstream_fetcher_->SetRequestContext(url_context_);
+  upstream_fetcher_->SetReferrer(config_.origin_url);
+  upstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
+                                  net::LOAD_DO_NOT_SEND_COOKIES |
+                                  net::LOAD_DO_NOT_SEND_AUTH_DATA);
+  upstream_fetcher_->Start();
+  previous_response_length_ = 0;
+  return STATE_BOTH_STREAMS_CONNECTED;
+}
+
+GoogleStreamingRemoteEngine::FSMState
+GoogleStreamingRemoteEngine::TransmitAudioUpstream(
+    const FSMEventArgs& event_args) {
+  DCHECK(upstream_fetcher_.get());
+  DCHECK(event_args.audio_data.get());
+  const AudioChunk& audio = *(event_args.audio_data);
+
+  DCHECK_EQ(audio.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
+  encoder_->Encode(audio);
+  scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
+  upstream_fetcher_->AppendChunkToUpload(encoded_data->AsString(), false);
+  return state_;
+}
+
+GoogleStreamingRemoteEngine::FSMState
+GoogleStreamingRemoteEngine::ProcessDownstreamResponse(
+    const FSMEventArgs& event_args) {
+  DCHECK(event_args.response.get());
+  bool is_definitive_result = false;
+
+  HttpStreamingResult ws_result;
+  const bool protobuf_parse_successful = ws_result.ParseFromArray(
+      &(*event_args.response->begin()),
+      event_args.response->size());
+  if (!protobuf_parse_successful)
+    return AbortWithError(event_args);
+
+  // TODO(primiano): The code below sounds to me like a hack. Discuss the
+  // possibility of having a simpler and clearer protobuf grammar, in order to
+  // distinguish the different type of results and errors in a civilized way.
+
+  // Skip the upstream connected notification, since we're not interested in it.
+  if (ws_result.has_upstream_connected())
+    return state_;
+
+  if (ws_result.has_status()) {
+    switch (ws_result.status()) {
+      case kWebserviceStatusNoError:
+        is_definitive_result = true;
+        break;
+      case kWebserviceStatusErrorNoMatch:
+        // TODO(primiano): Contact gshires@, in case of no results, instead of
+        // the expected kWebserviceStatusErrorNoMatch status code we receive a
+        // provisional-like result with no provisional nor ephemeral strings.
+        return Abort(content::SPEECH_RECOGNITION_ERROR_NO_MATCH);
+      default:
+        VLOG(1) << "Received an unknown status code from the speech recognition"
+                   "webservice (" << ws_result.status() << ")";
+        return Abort(content::SPEECH_RECOGNITION_ERROR_NETWORK);
+    }
+  }
+
+  SpeechRecognitionResult result;
+  if (is_definitive_result) {
+    got_last_definitive_result_ = true;
+    result.is_provisional = false;
+    for (int i = 0; i < ws_result.hypotheses_size(); ++i) {
+      const HttpStreamingHypothesis& ws_hypothesis = ws_result.hypotheses(i);
+      SpeechRecognitionHypothesis hypothesis;
+      DCHECK(ws_hypothesis.has_confidence());
+      hypothesis.confidence = ws_hypothesis.confidence();
+      DCHECK(ws_hypothesis.has_utterance());
+      hypothesis.utterance = UTF8ToUTF16(ws_hypothesis.utterance());
+      result.hypotheses.push_back(hypothesis);
+    }
+  } else {
+    result.is_provisional = true;
+    string16 transcript;
+    if (ws_result.has_provisional())
+      transcript.append(UTF8ToUTF16(ws_result.provisional()));
+    if (ws_result.has_ephemeral())
+      transcript.append(UTF8ToUTF16(ws_result.ephemeral()));
+    DCHECK(!transcript.empty());
+    result.hypotheses.push_back(SpeechRecognitionHypothesis(transcript, 0.0f));
+  }
+
+  // Propagate just actual results.
+  if (result.hypotheses.size() > 0)
+    delegate()->OnSpeechRecognitionEngineResult(result);
+
+  return state_;
+}
+
+GoogleStreamingRemoteEngine::FSMState
+GoogleStreamingRemoteEngine::RaiseNoMatchErrorIfGotNoResults(
+    const FSMEventArgs& event_args) {
+  if (!got_last_definitive_result_) {
+    // Provide an empty result to notify that recognition is ended with no
+    // errors, yet neither any further results.
+    delegate()->OnSpeechRecognitionEngineResult(SpeechRecognitionResult());
+  }
+  return AbortSilently(event_args);
+}
+
+GoogleStreamingRemoteEngine::FSMState
+GoogleStreamingRemoteEngine::CloseUpstreamAndWaitForResults(
+    const FSMEventArgs&) {
+  DCHECK(upstream_fetcher_.get());
+  DCHECK(encoder_.get());
+
+  // TODO(primiano): /////////// Remove this after debug stage. ////////////////
+  DVLOG(1) <<  "Closing upstream";
+
+  // The encoder requires a non-empty final buffer. So we encode a packet
+  // of silence in case encoder had no data already.
+  std::vector<short> samples(
+      config_.audio_sample_rate * kAudioPacketIntervalMs / 1000);
+  scoped_refptr<AudioChunk> dummy_chunk =
+      new AudioChunk(reinterpret_cast<uint8*>(&samples[0]),
+                     samples.size() * sizeof(short),
+                     encoder_->bits_per_sample() / 8);
+  encoder_->Encode(*dummy_chunk);
+  encoder_->Flush();
+  scoped_refptr<AudioChunk> encoded_dummy_data =
+      encoder_->GetEncodedDataAndClear();
+  DCHECK(!encoded_dummy_data->IsEmpty());
+  encoder_.reset();
+
+  upstream_fetcher_->AppendChunkToUpload(encoded_dummy_data->AsString(), true);
+  got_last_definitive_result_ = false;
+  return STATE_WAITING_DOWNSTREAM_RESULTS;
+}
+
+GoogleStreamingRemoteEngine::FSMState
+GoogleStreamingRemoteEngine::CloseDownstream(const FSMEventArgs&) {
+  DCHECK(!upstream_fetcher_.get());
+  DCHECK(downstream_fetcher_.get());
+
+  // TODO(primiano): /////////// Remove this after debug stage. ////////////////
+  DVLOG(1) <<  "Closing downstream";
+  downstream_fetcher_.reset();
+  return STATE_IDLE;
+}
+
+GoogleStreamingRemoteEngine::FSMState
+GoogleStreamingRemoteEngine::AbortSilently(const FSMEventArgs&) {
+  return Abort(content::SPEECH_RECOGNITION_ERROR_NONE);
+}
+
+GoogleStreamingRemoteEngine::FSMState
+GoogleStreamingRemoteEngine::AbortWithError(const FSMEventArgs&) {
+  return Abort(content::SPEECH_RECOGNITION_ERROR_NETWORK);
+}
+
+GoogleStreamingRemoteEngine::FSMState GoogleStreamingRemoteEngine::Abort(
+    SpeechRecognitionErrorCode error_code) {
+  // TODO(primiano): /////////// Remove this after debug stage. ////////////////
+  DVLOG(1) << "Aborting with error " << error_code;
+
+  if (error_code != content::SPEECH_RECOGNITION_ERROR_NONE) {
+    delegate()->OnSpeechRecognitionEngineError(
+        SpeechRecognitionError(error_code));
+  }
+  downstream_fetcher_.reset();
+  upstream_fetcher_.reset();
+  encoder_.reset();
+  return STATE_IDLE;
+}
+
+GoogleStreamingRemoteEngine::FSMState
+GoogleStreamingRemoteEngine::DoNothing(const FSMEventArgs&) {
+  return state_;
+}
+
+GoogleStreamingRemoteEngine::FSMState
+GoogleStreamingRemoteEngine::NotFeasible(const FSMEventArgs& event_args) {
+  NOTREACHED() << "Unfeasible event " << event_args.event
+               << " in state " << state_;
+  return state_;
+}
+
+std::string GoogleStreamingRemoteEngine::GetAcceptedLanguages() const {
+  std::string langs = config_.language;
+  if (langs.empty() && url_context_) {
+    // If no language is provided then we use the first from the accepted
+    // language list. If this list is empty then it defaults to "en-US".
+    // Example of the contents of this list: "es,en-GB;q=0.8", ""
+    net::URLRequestContext* request_context =
+        url_context_->GetURLRequestContext();
+    DCHECK(request_context);
+    std::string accepted_language_list = request_context->accept_language();
+    size_t separator = accepted_language_list.find_first_of(",;");
+    if (separator > std::string::npos)
+      langs = accepted_language_list.substr(0, separator);
+  }
+  if (langs.empty())
+    langs = "en-US";
+  return langs;
+}
+
+// TODO(primiano): Is there any utility in the codebase that already does this?
+std::string GoogleStreamingRemoteEngine::GenerateRequestKey() const {
+  const int64 kKeepLowBytes = GG_LONGLONG(0x00000000FFFFFFFF);
+  const int64 kKeepHighBytes = GG_LONGLONG(0xFFFFFFFF00000000);
+
+  // Just keep the least significant bits of timestamp, in order to reduce
+  // probability of collisions.
+  int64 key = (base::Time::Now().ToInternalValue() & kKeepLowBytes) |
+              (base::RandUint64() & kKeepHighBytes);
+  return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key));
+}
+
+GoogleStreamingRemoteEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
+    : event(event_value) {
+}
+
+GoogleStreamingRemoteEngine::FSMEventArgs::~FSMEventArgs() {
+}
+
+}  // namespace speech
diff --git a/content/browser/speech/google_streaming_remote_engine.h b/content/browser/speech/google_streaming_remote_engine.h
new file mode 100644
index 0000000..fde4957
--- /dev/null
+++ b/content/browser/speech/google_streaming_remote_engine.h
@@ -0,0 +1,165 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_
+#define CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/memory/ref_counted.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/threading/non_thread_safe.h"
+#include "content/browser/speech/audio_encoder.h"
+#include "content/browser/speech/chunked_byte_buffer.h"
+#include "content/browser/speech/speech_recognition_engine.h"
+#include "content/common/content_export.h"
+#include "content/public/common/speech_recognition_error.h"
+#include "googleurl/src/gurl.h"
+#include "net/url_request/url_fetcher_delegate.h"
+
+namespace content {
+struct SpeechRecognitionError;
+struct SpeechRecognitionResult;
+}
+
+namespace net {
+class URLRequestContextGetter;
+}
+
+namespace speech {
+
+class AudioChunk;
+
+// Implements a SpeechRecognitionEngine supporting continuous recognition by
+// means of interaction with Google streaming speech recognition webservice.
+// More in details, this class establishes two HTTP(S) connections with the
+// webservice, for each session, herein called "upstream" and "downstream".
+// Audio chunks are sent on the upstream by means of a chunked HTTP POST upload.
+// Recognition results are retrieved in a full-duplex fashion (i.e. while
+// pushing audio on the upstream) on the downstream by means of a chunked
+// HTTP GET request. Pairing between the two stream is handled through a
+// randomly generated key, unique for each request, which is passed in the
+// &pair= arg to both stream request URLs.
+// In the case of a regular session, the upstream is closed when the audio
+// capture ends (notified through a |AudioChunksEnded| call) and the downstream
+// waits for a corresponding server closure (eventually some late results can
+// come after closing the upstream).
+// Both stream are guaranteed to be closed when |EndRecognition| call is issued.
+class CONTENT_EXPORT GoogleStreamingRemoteEngine
+    : public NON_EXPORTED_BASE(SpeechRecognitionEngine),
+      public net::URLFetcherDelegate,
+      public NON_EXPORTED_BASE(base::NonThreadSafe) {
+ public:
+  explicit GoogleStreamingRemoteEngine(net::URLRequestContextGetter* context);
+  virtual ~GoogleStreamingRemoteEngine();
+
+  // SpeechRecognitionEngine methods.
+  virtual void SetConfig(const SpeechRecognitionEngineConfig& config) OVERRIDE;
+  virtual void StartRecognition() OVERRIDE;
+  virtual void EndRecognition() OVERRIDE;
+  virtual void TakeAudioChunk(const AudioChunk& data) OVERRIDE;
+  virtual void AudioChunksEnded() OVERRIDE;
+  virtual bool IsRecognitionPending() const OVERRIDE;
+  virtual int GetDesiredAudioChunkDurationMs() const OVERRIDE;
+
+  // net::URLFetcherDelegate methods.
+  virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
+  virtual void OnURLFetchDownloadProgress(const net::URLFetcher* source,
+                                          int64 current, int64 total) OVERRIDE;
+
+ private:
+  friend class GoogleStreamingRemoteEngineTest;
+
+  // IDs passed to URLFetcher::Create(). Used for testing.
+  static const int kUpstreamUrlFetcherIdForTests;
+  static const int kDownstreamUrlFetcherIdForTests;
+
+  // Response status codes from the speech recognition webservice.
+  static const int kWebserviceStatusNoError;
+  static const int kWebserviceStatusErrorNoMatch;
+
+  // Data types for the internal Finite State Machine (FSM).
+  enum FSMState {
+    STATE_IDLE = 0,
+    STATE_BOTH_STREAMS_CONNECTED,
+    STATE_WAITING_DOWNSTREAM_RESULTS,
+    STATE_MAX_VALUE = STATE_WAITING_DOWNSTREAM_RESULTS
+  };
+
+  enum FSMEvent {
+    EVENT_END_RECOGNITION = 0,
+    EVENT_START_RECOGNITION,
+    EVENT_AUDIO_CHUNK,
+    EVENT_AUDIO_CHUNKS_ENDED,
+    EVENT_UPSTREAM_ERROR,
+    EVENT_DOWNSTREAM_ERROR,
+    EVENT_DOWNSTREAM_RESPONSE,
+    EVENT_DOWNSTREAM_CLOSED,
+    EVENT_MAX_VALUE = EVENT_DOWNSTREAM_CLOSED
+  };
+
+  struct FSMEventArgs {
+    explicit FSMEventArgs(FSMEvent event_value);
+    ~FSMEventArgs();
+
+    FSMEvent event;
+
+    // In case of EVENT_AUDIO_CHUNK, holds the chunk pushed by |TakeAudioChunk|.
+    scoped_refptr<const AudioChunk> audio_data;
+
+    // In case of EVENT_DOWNSTREAM_RESPONSE, hold the current chunk bytes.
+    scoped_ptr<std::vector<uint8> > response;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(FSMEventArgs);
+  };
+
+  // Invoked by both upstream and downstream URLFetcher callbacks to handle
+  // new chunk data, connection closed or errors notifications.
+  void DispatchHTTPResponse(const net::URLFetcher* source,
+                            bool end_of_response);
+
+  // Entry point for pushing any new external event into the recognizer FSM.
+  void DispatchEvent(const FSMEventArgs& event_args);
+
+  // Defines the behavior of the recognizer FSM, selecting the appropriate
+  // transition according to the current state and event.
+  FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& event_args);
+
+  // The methods below handle transitions of the recognizer FSM.
+  FSMState ConnectBothStreams(const FSMEventArgs& event_args);
+  FSMState TransmitAudioUpstream(const FSMEventArgs& event_args);
+  FSMState ProcessDownstreamResponse(const FSMEventArgs& event_args);
+  FSMState RaiseNoMatchErrorIfGotNoResults(const FSMEventArgs& event_args);
+  FSMState CloseUpstreamAndWaitForResults(const FSMEventArgs& event_args);
+  FSMState CloseDownstream(const FSMEventArgs& event_args);
+  FSMState AbortSilently(const FSMEventArgs& event_args);
+  FSMState AbortWithError(const FSMEventArgs& event_args);
+  FSMState Abort(content::SpeechRecognitionErrorCode error);
+  FSMState DoNothing(const FSMEventArgs& event_args);
+  FSMState NotFeasible(const FSMEventArgs& event_args);
+
+  std::string GetAcceptedLanguages() const;
+  std::string GenerateRequestKey() const;
+
+  SpeechRecognitionEngineConfig config_;
+  scoped_ptr<net::URLFetcher> upstream_fetcher_;
+  scoped_ptr<net::URLFetcher> downstream_fetcher_;
+  scoped_refptr<net::URLRequestContextGetter> url_context_;
+  scoped_ptr<AudioEncoder> encoder_;
+  ChunkedByteBuffer chunked_byte_buffer_;
+  size_t previous_response_length_;
+  bool got_last_definitive_result_;
+  bool is_dispatching_event_;
+  FSMState state_;
+
+  DISALLOW_COPY_AND_ASSIGN(GoogleStreamingRemoteEngine);
+};
+
+}  // namespace speech
+
+#endif  // CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_
diff --git a/content/browser/speech/google_streaming_remote_engine_unittest.cc b/content/browser/speech/google_streaming_remote_engine_unittest.cc
new file mode 100644
index 0000000..9de06b6
--- /dev/null
+++ b/content/browser/speech/google_streaming_remote_engine_unittest.cc
@@ -0,0 +1,440 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <queue>
+
+#include "base/memory/scoped_ptr.h"
+#include "base/message_loop.h"
+#include "base/utf_string_conversions.h"
+#include "content/browser/speech/audio_buffer.h"
+#include "content/browser/speech/google_streaming_remote_engine.h"
+#include "content/browser/speech/proto/google_streaming_api.pb.h"
+#include "content/public/common/speech_recognition_error.h"
+#include "content/public/common/speech_recognition_result.h"
+#include "net/url_request/test_url_fetcher_factory.h"
+#include "net/url_request/url_request_context_getter.h"
+#include "net/url_request/url_request_status.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using content::SpeechRecognitionHypothesis;
+using content::SpeechRecognitionResult;
+using net::URLRequestStatus;
+using net::TestURLFetcher;
+using net::TestURLFetcherFactory;
+
+namespace speech {
+
+// Note: the terms upstream and downstream are herein referring to the client
+// (engine_under_test_) viewpoint.
+
+class GoogleStreamingRemoteEngineTest
+    : public SpeechRecognitionEngineDelegate,
+      public testing::Test {
+ public:
+  GoogleStreamingRemoteEngineTest()
+      : last_number_of_upstream_chunks_seen_(0U),
+        error_(content::SPEECH_RECOGNITION_ERROR_NONE) { }
+
+  // Creates a speech recognition request and invokes its URL fetcher delegate
+  // with the given test data.
+  void CreateAndTestRequest(bool success, const std::string& http_response);
+
+  // SpeechRecognitionRequestDelegate methods.
+  virtual void OnSpeechRecognitionEngineResult(
+      const SpeechRecognitionResult& result) OVERRIDE {
+    results_.push(result);
+  }
+  virtual void OnSpeechRecognitionEngineError(
+      const content::SpeechRecognitionError& error) OVERRIDE {
+    error_ = error.code;
+  }
+
+  // testing::Test methods.
+  virtual void SetUp() OVERRIDE;
+  virtual void TearDown() OVERRIDE;
+
+ protected:
+  enum DownstreamError {
+    DOWNSTREAM_ERROR_NONE,
+    DOWNSTREAM_ERROR_HTTP500,
+    DOWNSTREAM_ERROR_NETWORK,
+    DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH
+  };
+  static bool ResultsAreEqual(const SpeechRecognitionResult& a,
+                              const SpeechRecognitionResult& b);
+  static std::string SerializeProtobufResponse(const HttpStreamingResult& msg);
+  static std::string ToBigEndian32(uint32 value);
+
+  TestURLFetcher* GetUpstreamFetcher();
+  TestURLFetcher* GetDownstreamFetcher();
+  void StartMockRecognition();
+  void EndMockRecognition();
+  void InjectDummyAudioChunk();
+  size_t UpstreamChunksUploadedFromLastCall();
+  void ProvideMockResultDownstream(const SpeechRecognitionResult& result);
+  void ExpectResultReceived(const SpeechRecognitionResult& result);
+  void CloseMockDownstream(DownstreamError error);
+
+  scoped_ptr<GoogleStreamingRemoteEngine> engine_under_test_;
+  TestURLFetcherFactory url_fetcher_factory_;
+  size_t last_number_of_upstream_chunks_seen_;
+  MessageLoop message_loop_;
+  std::string response_buffer_;
+  content::SpeechRecognitionErrorCode error_;
+  std::queue<SpeechRecognitionResult> results_;
+};
+
+TEST_F(GoogleStreamingRemoteEngineTest, SingleDefinitiveResult) {
+  StartMockRecognition();
+  ASSERT_TRUE(GetUpstreamFetcher());
+  ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
+
+  // Inject some dummy audio chunks and check a corresponding chunked upload
+  // is performed every time on the server.
+  for (int i = 0; i < 3; ++i) {
+    InjectDummyAudioChunk();
+    ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
+  }
+
+  // Ensure that a final (empty) audio chunk is uploaded on chunks end.
+  engine_under_test_->AudioChunksEnded();
+  ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
+  ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
+
+  // Simulate a protobuf message streamed from the server containing a single
+  // result with two hypotheses.
+  SpeechRecognitionResult result;
+  result.is_provisional = false;
+  result.hypotheses.push_back(
+      SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis 1"), 0.1F));
+  result.hypotheses.push_back(
+      SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis 2"), 0.2F));
+
+  ProvideMockResultDownstream(result);
+  ExpectResultReceived(result);
+  ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
+
+  // Ensure everything is closed cleanly after the downstream is closed.
+  CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
+  ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
+  EndMockRecognition();
+  ASSERT_EQ(content::SPEECH_RECOGNITION_ERROR_NONE, error_);
+  ASSERT_EQ(0U, results_.size());
+}
+
+TEST_F(GoogleStreamingRemoteEngineTest, SeveralStreamingResults) {
+  StartMockRecognition();
+  ASSERT_TRUE(GetUpstreamFetcher());
+  ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
+
+  for (int i = 0; i < 4; ++i) {
+    InjectDummyAudioChunk();
+    ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
+
+    SpeechRecognitionResult result;
+    result.is_provisional = (i % 2 == 0);  // Alternate result types.
+    float confidence = result.is_provisional ? 0.0F : (i * 0.1F);
+    result.hypotheses.push_back(
+        SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis"), confidence));
+
+    ProvideMockResultDownstream(result);
+    ExpectResultReceived(result);
+    ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
+  }
+
+  // Ensure that a final (empty) audio chunk is uploaded on chunks end.
+  engine_under_test_->AudioChunksEnded();
+  ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
+  ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
+
+  // Simulate a final definitive result.
+  SpeechRecognitionResult result;
+  result.is_provisional = false;
+  result.hypotheses.push_back(
+      SpeechRecognitionHypothesis(UTF8ToUTF16("The final result"), 1.0F));
+  ProvideMockResultDownstream(result);
+  ExpectResultReceived(result);
+  ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
+
+  // Ensure everything is closed cleanly after the downstream is closed.
+  CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
+  ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
+  EndMockRecognition();
+  ASSERT_EQ(content::SPEECH_RECOGNITION_ERROR_NONE, error_);
+  ASSERT_EQ(0U, results_.size());
+}
+
+TEST_F(GoogleStreamingRemoteEngineTest, NoFinalResultAfterAudioChunksEnded) {
+  StartMockRecognition();
+  ASSERT_TRUE(GetUpstreamFetcher());
+  ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
+
+  // Simulate one pushed audio chunk.
+  InjectDummyAudioChunk();
+  ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
+
+  // Simulate the corresponding definitive result.
+  SpeechRecognitionResult result;
+  result.hypotheses.push_back(
+      SpeechRecognitionHypothesis(UTF8ToUTF16("hypothesis"), 1.0F));
+  ProvideMockResultDownstream(result);
+  ExpectResultReceived(result);
+  ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
+
+  // Simulate a silent downstream closure after |AudioChunksEnded|.
+  engine_under_test_->AudioChunksEnded();
+  ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
+  ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
+  CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
+
+  // Expect an empty result, aimed at notifying recognition ended with no
+  // actual results nor errors.
+  SpeechRecognitionResult empty_result;
+  ExpectResultReceived(empty_result);
+
+  // Ensure everything is closed cleanly after the downstream is closed.
+  ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
+  EndMockRecognition();
+  ASSERT_EQ(content::SPEECH_RECOGNITION_ERROR_NONE, error_);
+  ASSERT_EQ(0U, results_.size());
+}
+
+TEST_F(GoogleStreamingRemoteEngineTest, NoMatchError) {
+  StartMockRecognition();
+  ASSERT_TRUE(GetUpstreamFetcher());
+  ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
+
+  for (int i = 0; i < 3; ++i)
+    InjectDummyAudioChunk();
+  engine_under_test_->AudioChunksEnded();
+  ASSERT_EQ(4U, UpstreamChunksUploadedFromLastCall());
+  ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
+
+  // Simulate only a provisional result.
+  SpeechRecognitionResult result;
+  result.is_provisional = true;
+  result.hypotheses.push_back(
+      SpeechRecognitionHypothesis(UTF8ToUTF16("The final result"), 0.0F));
+  ProvideMockResultDownstream(result);
+  ExpectResultReceived(result);
+  ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
+
+  CloseMockDownstream(DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH);
+
+  // Expect a SPEECH_RECOGNITION_ERROR_NO_MATCH error to be raised.
+  ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
+  EndMockRecognition();
+  ASSERT_EQ(content::SPEECH_RECOGNITION_ERROR_NO_MATCH, error_);
+  ASSERT_EQ(0U, results_.size());
+}
+
+TEST_F(GoogleStreamingRemoteEngineTest, HTTPError) {
+  StartMockRecognition();
+  ASSERT_TRUE(GetUpstreamFetcher());
+  ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
+
+  InjectDummyAudioChunk();
+  ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
+
+  // Close the downstream with a HTTP 500 error.
+  CloseMockDownstream(DOWNSTREAM_ERROR_HTTP500);
+
+  // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised.
+  ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
+  EndMockRecognition();
+  ASSERT_EQ(content::SPEECH_RECOGNITION_ERROR_NETWORK, error_);
+  ASSERT_EQ(0U, results_.size());
+}
+
+TEST_F(GoogleStreamingRemoteEngineTest, NetworkError) {
+  StartMockRecognition();
+  ASSERT_TRUE(GetUpstreamFetcher());
+  ASSERT_EQ(0U, UpstreamChunksUploadedFromLastCall());
+
+  InjectDummyAudioChunk();
+  ASSERT_EQ(1U, UpstreamChunksUploadedFromLastCall());
+
+  // Close the downstream fetcher simulating a network failure.
+  CloseMockDownstream(DOWNSTREAM_ERROR_NETWORK);
+
+  // Expect a SPEECH_RECOGNITION_ERROR_NETWORK error to be raised.
+  ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
+  EndMockRecognition();
+  ASSERT_EQ(content::SPEECH_RECOGNITION_ERROR_NETWORK, error_);
+  ASSERT_EQ(0U, results_.size());
+}
+
+void GoogleStreamingRemoteEngineTest::SetUp() {
+  engine_under_test_.reset(
+      new  GoogleStreamingRemoteEngine(NULL /*URLRequestContextGetter*/));
+  engine_under_test_->set_delegate(this);
+}
+
+void GoogleStreamingRemoteEngineTest::TearDown() {
+  engine_under_test_.reset();
+}
+
+TestURLFetcher* GoogleStreamingRemoteEngineTest::GetUpstreamFetcher() {
+  return url_fetcher_factory_.GetFetcherByID(
+        GoogleStreamingRemoteEngine::kUpstreamUrlFetcherIdForTests);
+}
+
+TestURLFetcher* GoogleStreamingRemoteEngineTest::GetDownstreamFetcher() {
+  return url_fetcher_factory_.GetFetcherByID(
+        GoogleStreamingRemoteEngine::kDownstreamUrlFetcherIdForTests);
+}
+
+// Starts recognition on the engine, ensuring that both stream fetchers are
+// created.
+void GoogleStreamingRemoteEngineTest::StartMockRecognition() {
+  DCHECK(engine_under_test_.get());
+
+  ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
+
+  engine_under_test_->StartRecognition();
+  ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
+
+  TestURLFetcher* upstream_fetcher = GetUpstreamFetcher();
+  ASSERT_TRUE(upstream_fetcher);
+  upstream_fetcher->set_url(upstream_fetcher->GetOriginalURL());
+
+  TestURLFetcher* downstream_fetcher = GetDownstreamFetcher();
+  ASSERT_TRUE(downstream_fetcher);
+  downstream_fetcher->set_url(downstream_fetcher->GetOriginalURL());
+}
+
+void GoogleStreamingRemoteEngineTest::EndMockRecognition() {
+  DCHECK(engine_under_test_.get());
+  engine_under_test_->EndRecognition();
+  ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
+
+  // TODO(primiano): In order to be very pedantic we should check that both
+  // the upstream and downstream URL fetchers have been disposed at this time.
+  // Unfortunately it seems that there is no direct way to detect (in tests)
+  // if a url_fetcher has been freed or not, since they are not automatically
+  // de-registered from the TestURLFetcherFactory on destruction.
+  // ASSERT_FALSE(GetUpstreamFetcher());
+  // ASSERT_FALSE(GetDownstreamFetcher());
+}
+
+void GoogleStreamingRemoteEngineTest::InjectDummyAudioChunk() {
+  unsigned char dummy_audio_buffer_data[2] = {'\0', '\0'};
+  scoped_refptr<AudioChunk> dummy_audio_chunk(
+      new AudioChunk(&dummy_audio_buffer_data[0],
+                     sizeof(dummy_audio_buffer_data),
+                     2 /* bytes per sample */));
+  DCHECK(engine_under_test_.get());
+  engine_under_test_->TakeAudioChunk(*dummy_audio_chunk);
+}
+
+size_t GoogleStreamingRemoteEngineTest::UpstreamChunksUploadedFromLastCall() {
+  TestURLFetcher* upstream_fetcher = GetUpstreamFetcher();
+  DCHECK(upstream_fetcher);
+  const size_t number_of_chunks = upstream_fetcher->upload_chunks().size();
+  DCHECK_GE(number_of_chunks, last_number_of_upstream_chunks_seen_);
+  const size_t new_chunks = number_of_chunks -
+                            last_number_of_upstream_chunks_seen_;
+  last_number_of_upstream_chunks_seen_ = number_of_chunks;
+  return new_chunks;
+}
+
+void GoogleStreamingRemoteEngineTest::ProvideMockResultDownstream(
+    const SpeechRecognitionResult& result) {
+  TestURLFetcher* downstream_fetcher = GetDownstreamFetcher();
+
+  ASSERT_TRUE(downstream_fetcher);
+  downstream_fetcher->set_status(URLRequestStatus(/* default=SUCCESS */));
+  downstream_fetcher->set_response_code(200);
+
+  HttpStreamingResult response;
+  if (result.is_provisional) {
+    DCHECK_EQ(result.hypotheses.size(), 1U);
+    const SpeechRecognitionHypothesis& hypothesis = result.hypotheses[0];
+    response.set_provisional(UTF16ToUTF8(hypothesis.utterance));
+  } else {
+    response.set_status(GoogleStreamingRemoteEngine::kWebserviceStatusNoError);
+    for (size_t i = 0; i < result.hypotheses.size(); ++i) {
+        const SpeechRecognitionHypothesis& hypothesis = result.hypotheses[i];
+        HttpStreamingHypothesis* ws_hypothesis = response.add_hypotheses();
+        ws_hypothesis->set_confidence(hypothesis.confidence);
+        ws_hypothesis->set_utterance(UTF16ToUTF8(hypothesis.utterance));
+      }
+  }
+
+  std::string response_string = SerializeProtobufResponse(response);
+  response_buffer_.append(response_string);
+  downstream_fetcher->SetResponseString(response_buffer_);
+  downstream_fetcher->delegate()->OnURLFetchDownloadProgress(
+      downstream_fetcher,
+      response_buffer_.size(),
+      -1 /* total response length not used */);
+}
+
+void GoogleStreamingRemoteEngineTest::CloseMockDownstream(
+    DownstreamError error) {
+  TestURLFetcher* downstream_fetcher = GetDownstreamFetcher();
+  ASSERT_TRUE(downstream_fetcher);
+
+  const URLRequestStatus::Status fetcher_status =
+      (error == DOWNSTREAM_ERROR_NETWORK) ? URLRequestStatus::FAILED :
+                                            URLRequestStatus::SUCCESS;
+  downstream_fetcher->set_status(URLRequestStatus(fetcher_status, 0));
+  downstream_fetcher->set_response_code(
+      (error == DOWNSTREAM_ERROR_HTTP500) ? 500 : 200);
+
+if (error == DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH) {
+    HttpStreamingResult response;
+    response.set_status(
+        GoogleStreamingRemoteEngine::kWebserviceStatusErrorNoMatch);
+    response_buffer_.append(SerializeProtobufResponse(response));
+  }
+  downstream_fetcher->SetResponseString(response_buffer_);
+  downstream_fetcher->delegate()->OnURLFetchComplete(downstream_fetcher);
+}
+
+void GoogleStreamingRemoteEngineTest::ExpectResultReceived(
+    const SpeechRecognitionResult& result) {
+  ASSERT_GE(1U, results_.size());
+  ASSERT_TRUE(ResultsAreEqual(result, results_.front()));
+  results_.pop();
+}
+
+bool GoogleStreamingRemoteEngineTest::ResultsAreEqual(
+    const SpeechRecognitionResult& a, const SpeechRecognitionResult& b) {
+  if (a.is_provisional != b.is_provisional ||
+      a.hypotheses.size() != b.hypotheses.size()) {
+    return false;
+  }
+  for (size_t i = 0; i < a.hypotheses.size(); ++i) {
+    const SpeechRecognitionHypothesis& hyp_a = a.hypotheses[i];
+    const SpeechRecognitionHypothesis& hyp_b = b.hypotheses[i];
+    if (hyp_a.utterance != hyp_b.utterance ||
+        hyp_a.confidence != hyp_b.confidence) {
+      return false;
+    }
+  }
+  return true;
+}
+
+std::string GoogleStreamingRemoteEngineTest::SerializeProtobufResponse(
+    const HttpStreamingResult& msg) {
+  std::string response_string;
+  msg.SerializeToString(&response_string);
+
+  // Append 4 byte prefix length indication to the protobuf message as envisaged
+  // by the google streaming recognition webservice protocol.
+  response_string.insert(0, ToBigEndian32(response_string.size()));
+  return response_string;
+}
+
+std::string GoogleStreamingRemoteEngineTest::ToBigEndian32(uint32 value) {
+  char raw_data[4];
+  raw_data[0] = static_cast<uint8>((value >> 24) & 0xFF);
+  raw_data[1] = static_cast<uint8>((value >> 16) & 0xFF);
+  raw_data[2] = static_cast<uint8>((value >> 8) & 0xFF);
+  raw_data[3] = static_cast<uint8>(value & 0xFF);
+  return std::string(raw_data, sizeof(raw_data));
+}
+
+}  // namespace speech
diff --git a/content/browser/speech/speech_recognition_manager_impl.cc b/content/browser/speech/speech_recognition_manager_impl.cc
index 47b5420..65351c1 100644
--- a/content/browser/speech/speech_recognition_manager_impl.cc
+++ b/content/browser/speech/speech_recognition_manager_impl.cc
@@ -7,6 +7,7 @@
 #include "base/bind.h"
 #include "content/browser/browser_main_loop.h"
 #include "content/browser/speech/google_one_shot_remote_engine.h"
+#include "content/browser/speech/google_streaming_remote_engine.h"
 #include "content/browser/speech/speech_recognition_engine.h"
 #include "content/browser/speech/speech_recognizer_impl.h"
 #include "content/public/browser/browser_thread.h"
@@ -101,12 +102,20 @@ int SpeechRecognitionManagerImpl::CreateSession(
   remote_engine_config.hardware_info = hardware_info;
   remote_engine_config.origin_url = can_report_metrics ? config.origin_url : "";
 
-  SpeechRecognitionEngine* google_remote_engine =
+  SpeechRecognitionEngine* google_remote_engine;
+  if (config.is_one_shot) {
+    google_remote_engine =
         new GoogleOneShotRemoteEngine(config.url_request_context_getter);
+  } else {
+    google_remote_engine =
+        new GoogleStreamingRemoteEngine(config.url_request_context_getter);
+  }
+
   google_remote_engine->SetConfig(remote_engine_config);
 
   session.recognizer = new SpeechRecognizerImpl(this,
                                                 session_id,
+                                                config.is_one_shot,
                                                 google_remote_engine);
   return session_id;
 }
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc
index 18c6c9f..1141d9e 100644
--- a/content/browser/speech/speech_recognizer_impl.cc
+++ b/content/browser/speech/speech_recognizer_impl.cc
@@ -80,6 +80,7 @@ COMPILE_ASSERT(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0,
 SpeechRecognizerImpl::SpeechRecognizerImpl(
     SpeechRecognitionEventListener* listener,
     int session_id,
+    bool is_single_shot,
     SpeechRecognitionEngine* engine)
     : listener_(listener),
       testing_audio_manager_(NULL),
@@ -87,6 +88,7 @@ SpeechRecognizerImpl::SpeechRecognizerImpl(
       endpointer_(kAudioSampleRate),
       session_id_(session_id),
       is_dispatching_event_(false),
+      is_single_shot_(is_single_shot),
       state_(STATE_IDLE) {
   DCHECK(listener_ != NULL);
   DCHECK(recognition_engine_ != NULL);
@@ -197,7 +199,7 @@ void SpeechRecognizerImpl::OnSpeechRecognitionEngineError(
 }
 
 // -----------------------  Core FSM implementation ---------------------------
-// TODO(primiano) After the changes in the media package (r129173), this class
+// TODO(primiano): After the changes in the media package (r129173), this class
 // slightly violates the SpeechRecognitionEventListener interface contract. In
 // particular, it is not true anymore that this class can be freed after the
 // OnRecognitionEnd event, since the audio_controller_.Close() asynchronous
@@ -240,7 +242,7 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
   switch (state_) {
     case STATE_IDLE:
       switch (event) {
-        // TODO(primiano) restore UNREACHABLE_CONDITION on EVENT_ABORT and
+        // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and
         // EVENT_STOP_CAPTURE below once speech input extensions are fixed.
         case EVENT_ABORT:
           return DoNothing(event_args);
@@ -349,7 +351,7 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
 //  - The class won't be freed in the meanwhile due to callbacks;
 //  - IsCapturingAudio() returns true if and only if audio_controller_ != NULL.
 
-// TODO(primiano) the audio pipeline is currently serial. However, the
+// TODO(primiano): the audio pipeline is currently serial. However, the
 // clipper->endpointer->vumeter chain and the sr_engine could be parallelized.
 // We should profile the execution to see if it would be worth or not.
 void SpeechRecognizerImpl::ProcessAudioPipeline(const AudioChunk& raw_audio) {
@@ -464,9 +466,10 @@ SpeechRecognizerImpl::DetectUserSpeechOrTimeout(const FSMEventArgs&) {
 
 SpeechRecognizerImpl::FSMState
 SpeechRecognizerImpl::DetectEndOfSpeech(const FSMEventArgs& event_args) {
-  if (endpointer_.speech_input_complete()) {
+  // End-of-speech detection is performed only in one-shot mode.
+  // TODO(primiano): What about introducing a longer timeout for continuous rec?
+  if (is_single_shot_ && endpointer_.speech_input_complete())
     return StopCaptureAndWaitForResult(event_args);
-  }
   return STATE_RECOGNIZING;
 }
 
@@ -532,21 +535,43 @@ SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(
   return STATE_IDLE;
 }
 
-SpeechRecognizerImpl::FSMState
-SpeechRecognizerImpl::ProcessIntermediateResult(const FSMEventArgs&) {
-  // This is in preparation for future speech recognition functions.
-  NOTREACHED();
+SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::ProcessIntermediateResult(
+    const FSMEventArgs& event_args) {
+  // Provisional results can occur only during continuous (non one-shot) mode.
+  // If this check is reached it means that a continuous speech recognition
+  // engine is being used for a one shot recognition.
+  DCHECK_EQ(false, is_single_shot_);
+  const SpeechRecognitionResult& result = event_args.engine_result;
+  listener_->OnRecognitionResult(session_id_, result);
   return state_;
 }
 
 SpeechRecognizerImpl::FSMState
 SpeechRecognizerImpl::ProcessFinalResult(const FSMEventArgs& event_args) {
   const SpeechRecognitionResult& result = event_args.engine_result;
-  DVLOG(1) << "Got valid result";
-  recognition_engine_->EndRecognition();
-  listener_->OnRecognitionResult(session_id_, result);
-  listener_->OnRecognitionEnd(session_id_);
-  return STATE_IDLE;
+  if (result.is_provisional) {
+    DCHECK(!is_single_shot_);
+    listener_->OnRecognitionResult(session_id_, result);
+    // We don't end the recognition if a provisional result is received in
+    // STATE_WAITING_FINAL_RESULT. A definitive result will come next and will
+    // end the recognition.
+    return state_;
+  } else {
+    recognition_engine_->EndRecognition();
+    // We could receive an empty result (which we won't propagate further)
+    // in the following (continuous) scenario:
+    //  1. The caller start pushing audio and receives some results;
+    //  2. A |StopAudioCapture| is issued later;
+    //  3. The final audio frames captured in the interval ]1,2] do not lead to
+    //     any result (nor any error);
+    //  4. The speech recognition engine, therefore, emits an empty result to
+    //     notify that the recognition is ended with no error, yet neither any
+    //     further result.
+    if (result.hypotheses.size() > 0)
+      listener_->OnRecognitionResult(session_id_, result);
+    listener_->OnRecognitionEnd(session_id_);
+    return STATE_IDLE;
+  }
 }
 
 SpeechRecognizerImpl::FSMState
@@ -563,7 +588,7 @@ SpeechRecognizerImpl::NotFeasible(const FSMEventArgs& event_args) {
 
 void SpeechRecognizerImpl::CloseAudioControllerAsynchronously() {
   DCHECK(IsCapturingAudio());
-  DVLOG(1) << "SpeechRecognizerImpl stopping audio capture.";
+  DVLOG(1) << "SpeechRecognizerImpl closing audio controller.";
   // Issues a Close on the audio controller, passing an empty callback. The only
   // purpose of such callback is to keep the audio controller refcounted until
   // Close has completed (in the audio thread) and automatically destroy it
@@ -581,7 +606,7 @@ void SpeechRecognizerImpl::UpdateSignalAndNoiseLevels(const float& rms,
                                                       bool clip_detected) {
   // Calculate the input volume to display in the UI, smoothing towards the
   // new level.
-  // TODO(primiano) Do we really need all this floating point arith here?
+  // TODO(primiano): Do we really need all this floating point arith here?
   // Perhaps it might be quite expensive on mobile.
   float level = (rms - kAudioMeterMinDb) /
       (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);
diff --git a/content/browser/speech/speech_recognizer_impl.h b/content/browser/speech/speech_recognizer_impl.h
index 8179e2743..89e41e6 100644
--- a/content/browser/speech/speech_recognizer_impl.h
+++ b/content/browser/speech/speech_recognizer_impl.h
@@ -45,6 +45,7 @@ class CONTENT_EXPORT SpeechRecognizerImpl
   SpeechRecognizerImpl(
       content::SpeechRecognitionEventListener* listener,
       int session_id,
+      bool is_single_shot,
       SpeechRecognitionEngine* engine);
 
   void StartRecognition();
@@ -153,6 +154,7 @@ class CONTENT_EXPORT SpeechRecognizerImpl
   int num_samples_recorded_;
   float audio_level_;
   bool is_dispatching_event_;
+  bool is_single_shot_;
   FSMState state_;
 
   DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
diff --git a/content/browser/speech/speech_recognizer_impl_unittest.cc b/content/browser/speech/speech_recognizer_impl_unittest.cc
index e73a894b..4a487c3 100644
--- a/content/browser/speech/speech_recognizer_impl_unittest.cc
+++ b/content/browser/speech/speech_recognizer_impl_unittest.cc
@@ -117,7 +117,10 @@ class SpeechRecognizerImplTest : public content::SpeechRecognitionEventListener,
     config.filter_profanities = false;
     sr_engine->SetConfig(config);
 
-    recognizer_ = new SpeechRecognizerImpl(this, 1, sr_engine);
+    const int kTestingSessionId = 1;
+    const bool kOneShotMode = true;
+    recognizer_ = new SpeechRecognizerImpl(
+        this, kTestingSessionId, kOneShotMode, sr_engine);
     recognizer_->SetAudioManagerForTesting(audio_manager_.get());
 
     int audio_packet_length_bytes =
diff --git a/content/common/speech_recognition_messages.h b/content/common/speech_recognition_messages.h
index 864b442..d004b0a 100644
--- a/content/common/speech_recognition_messages.h
+++ b/content/common/speech_recognition_messages.h
@@ -29,7 +29,7 @@ IPC_STRUCT_TRAITS_BEGIN(content::SpeechRecognitionHypothesis)
 IPC_STRUCT_TRAITS_END()
 
 IPC_STRUCT_TRAITS_BEGIN(content::SpeechRecognitionResult)
-  IPC_STRUCT_TRAITS_MEMBER(provisional)
+  IPC_STRUCT_TRAITS_MEMBER(is_provisional)
   IPC_STRUCT_TRAITS_MEMBER(hypotheses)
 IPC_STRUCT_TRAITS_END()
 
diff --git a/content/content_browser.gypi b/content/content_browser.gypi
index e5489bd..4a56f74 100644
--- a/content/content_browser.gypi
+++ b/content/content_browser.gypi
@@ -669,6 +669,8 @@
     'browser/speech/endpointer/energy_endpointer_params.h',
     'browser/speech/google_one_shot_remote_engine.cc',
     'browser/speech/google_one_shot_remote_engine.h',
+    'browser/speech/google_streaming_remote_engine.cc',
+    'browser/speech/google_streaming_remote_engine.h',
     'browser/speech/input_tag_speech_dispatcher_host.cc',
     'browser/speech/input_tag_speech_dispatcher_host.h',
     'browser/speech/speech_recognition_dispatcher_host.cc',
diff --git a/content/content_tests.gypi b/content/content_tests.gypi
index 3694b30..2ff3620f 100644
--- a/content/content_tests.gypi
+++ b/content/content_tests.gypi
@@ -192,6 +192,7 @@
         'content_plugin',
         'content_renderer',
         'test_support_content',
+        'browser/speech/proto/speech_proto.gyp:speech_proto',
         'content_resources.gyp:content_resources',
         '../base/base.gyp:test_support_base',
         '../base/third_party/dynamic_annotations/dynamic_annotations.gyp:dynamic_annotations',
@@ -281,6 +282,7 @@
         'browser/speech/chunked_byte_buffer_unittest.cc',
         'browser/speech/endpointer/endpointer_unittest.cc',
         'browser/speech/google_one_shot_remote_engine_unittest.cc',
+        'browser/speech/google_streaming_remote_engine_unittest.cc',
         'browser/speech/speech_recognizer_impl_unittest.cc',
         'browser/ssl/ssl_host_state_unittest.cc',
         'browser/system_message_window_win_unittest.cc',
diff --git a/content/public/common/content_switches.cc b/content/public/common/content_switches.cc
index b0dadc7..f2a4034 100644
--- a/content/public/common/content_switches.cc
+++ b/content/public/common/content_switches.cc
@@ -201,6 +201,15 @@ const char kDisableSpeechInput[]            = "disable-speech-input";
 // Enables scripted speech api.
 const char kEnableScriptedSpeech[]          = "enable-scripted-speech";
 
+// TODO(primiano): Remove the two switches below when the URL becomes public.
+// Specifies the webservice URL for continuous speech recognition.
+const char kSpeechRecognitionWebserviceURL[] =
+    "speech-service";
+
+// Specifies the request key for the continuous speech recognition webservice.
+const char kSpeechRecognitionWebserviceKey[] =
+    "speech-service-key";
+
 // Disables animation on the compositor thread.
 const char kDisableThreadedAnimation[]      = "disable-threaded-animation";
 
diff --git a/content/public/common/content_switches.h b/content/public/common/content_switches.h
index a876146..2c8ab43 100644
--- a/content/public/common/content_switches.h
+++ b/content/public/common/content_switches.h
@@ -74,6 +74,8 @@ extern const char kDisableSharedWorkers[];
 extern const char kDisableSiteSpecificQuirks[];
 CONTENT_EXPORT extern const char kDisableSpeechInput[];
 CONTENT_EXPORT extern const char kEnableScriptedSpeech[];
+extern const char kSpeechRecognitionWebserviceURL[];
+extern const char kSpeechRecognitionWebserviceKey[];
 CONTENT_EXPORT extern const char kDisableThreadedAnimation[];
 CONTENT_EXPORT extern const char kDisableWebAudio[];
 extern const char kDisableWebSecurity[];
diff --git a/content/public/common/speech_recognition_result.cc b/content/public/common/speech_recognition_result.cc
index 9a89bab..af05fc7 100644
--- a/content/public/common/speech_recognition_result.cc
+++ b/content/public/common/speech_recognition_result.cc
@@ -7,10 +7,11 @@
 namespace content {
 
 SpeechRecognitionResult::SpeechRecognitionResult()
-    : provisional(false) {
+    : is_provisional(false) {
 }
 
 SpeechRecognitionResult::~SpeechRecognitionResult() {
 }
 
 }  // namespace content
+
diff --git a/content/public/common/speech_recognition_result.h b/content/public/common/speech_recognition_result.h
index f227e0d..1a912fd 100644
--- a/content/public/common/speech_recognition_result.h
+++ b/content/public/common/speech_recognition_result.h
@@ -32,7 +32,7 @@ typedef std::vector<SpeechRecognitionHypothesis>
 
 struct CONTENT_EXPORT SpeechRecognitionResult {
   SpeechRecognitionHypothesisArray hypotheses;
-  bool provisional;
+  bool is_provisional;
 
   SpeechRecognitionResult();
   ~SpeechRecognitionResult();
diff --git a/content/renderer/speech_recognition_dispatcher.cc b/content/renderer/speech_recognition_dispatcher.cc
index 210a66f..43ca409 100644
--- a/content/renderer/speech_recognition_dispatcher.cc
+++ b/content/renderer/speech_recognition_dispatcher.cc
@@ -56,10 +56,14 @@ void SpeechRecognitionDispatcher::start(
     const WebSpeechRecognitionHandle& handle,
     const WebSpeechRecognitionParams& params,
     WebSpeechRecognizerClient* recognizer_client) {
-  //TODO(primiano) What to do if a start is issued to an already started object?
   DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client);
   recognizer_client_ = recognizer_client;
 
+  // TODO(primiano): Should return false in order to communicate the failure
+  // and let WebKit throw an exception (need a hans@ patch to do that).
+  if (HandleExists(handle))
+    return;
+
   SpeechRecognitionHostMsg_StartRequest_Params msg_params;
   for (size_t i = 0; i < params.grammars().size(); ++i) {
     const WebSpeechGrammar& grammar = params.grammars()[i];
@@ -71,23 +75,29 @@ void SpeechRecognitionDispatcher::start(
   msg_params.is_one_shot = !params.continuous();
   msg_params.origin_url = params.origin().toString().utf8();
   msg_params.render_view_id = routing_id();
-  msg_params.request_id = GetIDForHandle(handle);
+  msg_params.request_id = GetOrCreateIDForHandle(handle);
+  // The handle mapping will be removed in |OnRecognitionEnd|.
   Send(new SpeechRecognitionHostMsg_StartRequest(msg_params));
 }
 
 void SpeechRecognitionDispatcher::stop(
     const WebSpeechRecognitionHandle& handle,
     WebSpeechRecognizerClient* recognizer_client) {
-  DCHECK(recognizer_client_ == recognizer_client);
-  Send(new SpeechRecognitionHostMsg_StopCaptureRequest(routing_id(),
-                                                       GetIDForHandle(handle)));
+  // Ignore a |stop| issued without a matching |start|.
+  if (recognizer_client_ != recognizer_client || !HandleExists(handle))
+    return;
+  Send(new SpeechRecognitionHostMsg_StopCaptureRequest(
+      routing_id(), GetOrCreateIDForHandle(handle)));
 }
 
 void SpeechRecognitionDispatcher::abort(
     const WebSpeechRecognitionHandle& handle,
     WebSpeechRecognizerClient* recognizer_client) {
-  Send(new SpeechRecognitionHostMsg_AbortRequest(routing_id(),
-                                                 GetIDForHandle(handle)));
+  // Ignore an |abort| issued without a matching |start|.
+  if (recognizer_client_ != recognizer_client || !HandleExists(handle))
+    return;
+  Send(new SpeechRecognitionHostMsg_AbortRequest(
+      routing_id(), GetOrCreateIDForHandle(handle)));
 }
 
 void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) {
@@ -116,8 +126,8 @@ void SpeechRecognitionDispatcher::OnErrorOccurred(
     recognizer_client_->didReceiveNoMatch(GetHandleFromID(request_id),
                                           WebSpeechRecognitionResult());
   } else {
-    // TODO(primiano) speech_recognition_error.h must be updated with the new
-    // API specs soon.
+    // TODO(primiano): speech_recognition_error.h must be updated to match the
+    // new enums defined in the the API specs (thus removing the code below).
     WebSpeechRecognizerClient::ErrorCode wk_error_code;
     switch (error.code) {
       case content::SPEECH_RECOGNITION_ERROR_ABORTED:
@@ -140,7 +150,7 @@ void SpeechRecognitionDispatcher::OnErrorOccurred(
         wk_error_code = WebSpeechRecognizerClient::OtherError;
     }
     recognizer_client_->didReceiveError(GetHandleFromID(request_id),
-                                        WebString(), // TODO(primiano) message?
+                                        WebString(), // TODO(primiano): message?
                                         wk_error_code);
   }
 }
@@ -160,8 +170,8 @@ void SpeechRecognitionDispatcher::OnResultRetrieved(
     transcripts[i] = result.hypotheses[i].utterance;
     confidences[i] = static_cast<float>(result.hypotheses[i].confidence);
   }
-  webkit_result.assign(transcripts, confidences, !result.provisional);
-  // TODO(primiano) Handle history, currently empty.
+  webkit_result.assign(transcripts, confidences, !result.is_provisional);
+  // TODO(primiano): Handle history, currently empty.
   WebVector<WebSpeechRecognitionResult> empty_history;
   recognizer_client_->didReceiveResult(GetHandleFromID(request_id),
                                        webkit_result,
@@ -169,7 +179,8 @@ void SpeechRecognitionDispatcher::OnResultRetrieved(
                                        empty_history);
 }
 
-int SpeechRecognitionDispatcher::GetIDForHandle(
+
+int SpeechRecognitionDispatcher::GetOrCreateIDForHandle(
     const WebSpeechRecognitionHandle& handle) {
   // Search first for an existing mapping.
   for (HandleMap::iterator iter = handle_map_.begin();
@@ -185,6 +196,17 @@ int SpeechRecognitionDispatcher::GetIDForHandle(
   return new_id;
 }
 
+bool SpeechRecognitionDispatcher::HandleExists(
+    const WebSpeechRecognitionHandle& handle) {
+  for (HandleMap::iterator iter = handle_map_.begin();
+      iter != handle_map_.end();
+      ++iter) {
+    if (iter->second.equals(handle))
+      return true;
+  }
+  return false;
+}
+
 const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID(
     int request_id) {
   HandleMap::iterator iter = handle_map_.find(request_id);
diff --git a/content/renderer/speech_recognition_dispatcher.h b/content/renderer/speech_recognition_dispatcher.h
index 3e3b141..bd643ede 100644
--- a/content/renderer/speech_recognition_dispatcher.h
+++ b/content/renderer/speech_recognition_dispatcher.h
@@ -54,7 +54,8 @@ class SpeechRecognitionDispatcher : public content::RenderViewObserver,
   void OnResultRetrieved(int request_id,
                          const content::SpeechRecognitionResult& result);
 
-  int GetIDForHandle(const WebKit::WebSpeechRecognitionHandle& handle);
+  int GetOrCreateIDForHandle(const WebKit::WebSpeechRecognitionHandle& handle);
+  bool HandleExists(const WebKit::WebSpeechRecognitionHandle& handle);
   const WebKit::WebSpeechRecognitionHandle& GetHandleFromID(int handle_id);
 
   // The WebKit client class that we use to send events back to the JS world.
author	primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-06-22 16:57:14 +0000
committer	primiano@chromium.org <primiano@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-06-22 16:57:14 +0000
commit	c766aa9d160a4da403ff48e62c7f1a5dccdb3421 (patch)
tree	e3deca24d0149086b59a549bf4e87d229f03b88e
parent	03a94ccc9e67b37d2871290b73609c615e95f61f (diff)
download	chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.zip chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.tar.gz chromium_src-c766aa9d160a4da403ff48e62c7f1a5dccdb3421.tar.bz2