Add support for native TTS to provide multiple voices.

This change refactors the text-to-speech system so that the "platform native" text-to-speech implementation can provide multiple voices, rather than just the system default voice. This refactoring also further reduces the coupling between the main TTS system in chrome/browser/speech and the extension API in chrome/browser/speech/extension_api - the goal is to later move all of the non-extension code to content. This change also implements multiple voices on Mac OS X. Windows support will be in a subsequent changelist. BUG=88059 NOTRY=true Review URL: https://chromiumcodereview.appspot.com/14657013 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@199615 0039d316-1c4b-4281-b951-d872f2087c98
author: dmazzoni@chromium.org <dmazzoni@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-05-11 18:51:43 +0000
committer: dmazzoni@chromium.org <dmazzoni@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-05-11 18:51:43 +0000
commit: 56f6f9dc5522435534cebcf0a1a9a239dbda2bea (patch)
tree: ba37024edb6d2d6f06273f6fdfef976f00507719 /chrome/browser/speech/tts_controller.cc
parent: 71395b17a1da1100fe57e8ad141c2ea5da338416 (diff)
download: chromium_src-56f6f9dc5522435534cebcf0a1a9a239dbda2bea.zip
chromium_src-56f6f9dc5522435534cebcf0a1a9a239dbda2bea.tar.gz
chromium_src-56f6f9dc5522435534cebcf0a1a9a239dbda2bea.tar.bz2
1 files changed, 120 insertions, 66 deletions
diff --git a/chrome/browser/speech/tts_controller.cc b/chrome/browser/speech/tts_controller.cc
index f6bff52..d7fb66e 100644
--- a/chrome/browser/speech/tts_controller.cc
+++ b/chrome/browser/speech/tts_controller.cc
@@ -13,18 +13,24 @@
 #include "chrome/browser/profiles/profile.h"
 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
-#include "chrome/browser/speech/extension_api/tts_extension_api_constants.h"
 #include "chrome/browser/speech/tts_platform.h"
 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
 #include "chrome/common/extensions/extension.h"
 
-namespace constants = tts_extension_api_constants;
-
 namespace {
 // A value to be used to indicate that there is no char index available.
 const int kInvalidCharIndex = -1;
-}  // namespace
 
+// Given a language/region code of the form 'fr-FR', returns just the basic
+// language portion, e.g. 'fr'.
+std::string TrimLanguageCode(std::string lang) {
+  if (lang.size() >= 5 && lang[2] == '-')
+    return lang.substr(0, 2);
+  else
+    return lang;
+}
+
+}  // namespace
 
 bool IsFinalTtsEventType(TtsEventType event_type) {
   return (event_type == TTS_EVENT_END ||
@@ -33,7 +39,6 @@ bool IsFinalTtsEventType(TtsEventType event_type) {
           event_type == TTS_EVENT_ERROR);
 }
 
-
 //
 // UtteranceContinuousParameters
 //
@@ -50,7 +55,9 @@ UtteranceContinuousParameters::UtteranceContinuousParameters()
 //
 
 
-VoiceData::VoiceData() {}
+VoiceData::VoiceData()
+    : gender(TTS_GENDER_NONE),
+      native(false) {}
 
 VoiceData::~VoiceData() {}
 
@@ -133,50 +140,59 @@ void TtsController::SpeakOrEnqueue(Utterance* utterance) {
 }
 
 void TtsController::SpeakNow(Utterance* utterance) {
-  const extensions::Extension* extension;
-  size_t voice_index;
-  if (GetMatchingExtensionVoice(utterance, &extension, &voice_index)) {
+  // Get all available voices and try to find a matching voice.
+  std::vector<VoiceData> voices;
+  GetVoices(utterance->profile(), &voices);
+  int index = GetMatchingVoice(utterance, voices);
+
+  // Select the matching voice, but if none was found, initialize an
+  // empty VoiceData with native = true, which will give the native
+  // speech synthesizer a chance to try to synthesize the utterance
+  // anyway.
+  VoiceData voice;
+  if (index >= 0 && index < static_cast<int>(voices.size()))
+    voice = voices[index];
+  else
+    voice.native = true;
+
+  if (!voice.native) {
+    DCHECK(!voice.extension_id.empty());
     current_utterance_ = utterance;
-    utterance->set_extension_id(extension->id());
-
-    ExtensionTtsEngineSpeak(utterance, extension, voice_index);
-
-    const std::vector<extensions::TtsVoice>* tts_voices =
-        extensions::TtsVoice::GetTtsVoices(extension);
-    std::set<std::string> event_types;
-    if (tts_voices)
-      event_types = tts_voices->at(voice_index).event_types;
+    utterance->set_extension_id(voice.extension_id);
+    ExtensionTtsEngineSpeak(utterance, voice);
     bool sends_end_event =
-        (event_types.find(constants::kEventTypeEnd) != event_types.end());
+        voice.events.find(TTS_EVENT_END) != voice.events.end();
     if (!sends_end_event) {
       utterance->Finish();
       delete utterance;
       current_utterance_ = NULL;
       SpeakNextUtterance();
     }
-    return;
-  }
-
-  GetPlatformImpl()->clear_error();
-  bool success = GetPlatformImpl()->Speak(
-      utterance->id(),
-      utterance->text(),
-      utterance->lang(),
-      utterance->continuous_parameters());
-
-  if (!success &&
-      GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
-    utterance_queue_.push(utterance);
-    return;
-  }
+  } else {
+    GetPlatformImpl()->clear_error();
+    bool success = GetPlatformImpl()->Speak(
+        utterance->id(),
+        utterance->text(),
+        utterance->lang(),
+        voice,
+        utterance->continuous_parameters());
+
+    // If the native voice wasn't able to process this speech, see if
+    // the browser has built-in TTS that isn't loaded yet.
+    if (!success &&
+        GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
+      utterance_queue_.push(utterance);
+      return;
+    }
 
-  if (!success) {
-    utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
-                          GetPlatformImpl()->error());
-    delete utterance;
-    return;
+    if (!success) {
+      utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
+                            GetPlatformImpl()->error());
+      delete utterance;
+      return;
+    }
+    current_utterance_ = utterance;
   }
-  current_utterance_ = utterance;
 }
 
 void TtsController::Stop() {
@@ -214,33 +230,12 @@ void TtsController::OnTtsEvent(int utterance_id,
 
 void TtsController::GetVoices(Profile* profile,
                               std::vector<VoiceData>* out_voices) {
-  TtsPlatformImpl* platform_impl = GetPlatformImpl();
-  if (platform_impl && platform_impl->PlatformImplAvailable()) {
-    out_voices->push_back(VoiceData());
-    VoiceData& voice = out_voices->back();
-    voice.name = constants::kNativeVoiceName;
-    voice.gender = platform_impl->gender();
-
-    // All platforms must send end events, and cancelled and interrupted
-    // events are generated from the controller.
-    DCHECK(platform_impl->SendsEvent(TTS_EVENT_END));
-    voice.events.push_back(constants::kEventTypeEnd);
-    voice.events.push_back(constants::kEventTypeCancelled);
-    voice.events.push_back(constants::kEventTypeInterrupted);
-
-    if (platform_impl->SendsEvent(TTS_EVENT_START))
-      voice.events.push_back(constants::kEventTypeStart);
-    if (platform_impl->SendsEvent(TTS_EVENT_WORD))
-      voice.events.push_back(constants::kEventTypeWord);
-    if (platform_impl->SendsEvent(TTS_EVENT_SENTENCE))
-      voice.events.push_back(constants::kEventTypeSentence);
-    if (platform_impl->SendsEvent(TTS_EVENT_MARKER))
-      voice.events.push_back(constants::kEventTypeMarker);
-    if (platform_impl->SendsEvent(TTS_EVENT_ERROR))
-      voice.events.push_back(constants::kEventTypeError);
-  }
+  if (profile)
+    GetExtensionVoices(profile, out_voices);
 
-  GetExtensionVoices(profile, out_voices);
+  TtsPlatformImpl* platform_impl = GetPlatformImpl();
+  if (platform_impl && platform_impl->PlatformImplAvailable())
+    platform_impl->GetVoices(out_voices);
 }
 
 bool TtsController::IsSpeaking() {
@@ -299,3 +294,62 @@ TtsPlatformImpl* TtsController::GetPlatformImpl() {
     platform_impl_ = TtsPlatformImpl::GetInstance();
   return platform_impl_;
 }
+
+int TtsController::GetMatchingVoice(
+    const Utterance* utterance, std::vector<VoiceData>& voices) {
+  // Make two passes: the first time, do strict language matching
+  // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
+  // language matching ('fr-FR' matches 'fr' and 'fr-CA')
+  for (int pass = 0; pass < 2; ++pass) {
+    for (size_t i = 0; i < voices.size(); ++i) {
+      const VoiceData& voice = voices[i];
+
+      if (!utterance->extension_id().empty() &&
+          utterance->extension_id() != voice.extension_id) {
+        continue;
+      }
+
+      if (!voice.name.empty() &&
+          !utterance->voice_name().empty() &&
+          voice.name != utterance->voice_name()) {
+        continue;
+      }
+      if (!voice.lang.empty() && !utterance->lang().empty()) {
+        std::string voice_lang = voice.lang;
+        std::string utterance_lang = utterance->lang();
+        if (pass == 1) {
+          voice_lang = TrimLanguageCode(voice_lang);
+          utterance_lang = TrimLanguageCode(utterance_lang);
+        }
+        if (voice_lang != utterance_lang) {
+          continue;
+        }
+      }
+      if (voice.gender != TTS_GENDER_NONE &&
+          utterance->gender() != TTS_GENDER_NONE &&
+          voice.gender != utterance->gender()) {
+        continue;
+      }
+
+      if (utterance->required_event_types().size() > 0) {
+        bool has_all_required_event_types = true;
+        for (std::set<TtsEventType>::const_iterator iter =
+                 utterance->required_event_types().begin();
+             iter != utterance->required_event_types().end();
+             ++iter) {
+          if (voice.events.find(*iter) == voice.events.end()) {
+            has_all_required_event_types = false;
+            break;
+          }
+        }
+        if (!has_all_required_event_types)
+          continue;
+      }
+
+      return static_cast<int>(i);
+    }
+  }
+
+  return -1;
+}
+
author	dmazzoni@chromium.org <dmazzoni@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-05-11 18:51:43 +0000
committer	dmazzoni@chromium.org <dmazzoni@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-05-11 18:51:43 +0000
commit	56f6f9dc5522435534cebcf0a1a9a239dbda2bea (patch)
tree	ba37024edb6d2d6f06273f6fdfef976f00507719 /chrome/browser/speech/tts_controller.cc
parent	71395b17a1da1100fe57e8ad141c2ea5da338416 (diff)
download	chromium_src-56f6f9dc5522435534cebcf0a1a9a239dbda2bea.zip chromium_src-56f6f9dc5522435534cebcf0a1a9a239dbda2bea.tar.gz chromium_src-56f6f9dc5522435534cebcf0a1a9a239dbda2bea.tar.bz2