// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/speech/chrome_speech_recognition_manager_delegate.h" #include <set> #include <string> #include "base/bind.h" #include "base/synchronization/lock.h" #include "base/threading/thread_restrictions.h" #include "base/utf_string_conversions.h" #include "chrome/browser/browser_process.h" #include "chrome/browser/extensions/extension_service.h" #include "chrome/browser/prefs/pref_service.h" #include "chrome/browser/profiles/profile_manager.h" #include "chrome/browser/speech/chrome_speech_recognition_preferences.h" #include "chrome/browser/speech/speech_recognition_tray_icon_controller.h" #include "chrome/browser/tab_contents/tab_util.h" #include "chrome/browser/view_type_utils.h" #include "chrome/common/pref_names.h" #include "content/public/browser/browser_thread.h" #include "content/public/browser/notification_registrar.h" #include "content/public/browser/notification_source.h" #include "content/public/browser/notification_types.h" #include "content/public/browser/render_process_host.h" #include "content/public/browser/render_view_host.h" #include "content/public/browser/resource_context.h" #include "content/public/browser/speech_recognition_manager.h" #include "content/public/browser/speech_recognition_session_config.h" #include "content/public/browser/speech_recognition_session_context.h" #include "content/public/browser/web_contents.h" #include "content/public/common/speech_recognition_error.h" #include "content/public/common/speech_recognition_result.h" #include "grit/generated_resources.h" #include "net/url_request/url_request_context_getter.h" #include "ui/base/l10n/l10n_util.h" #if defined(OS_WIN) #include "chrome/installer/util/wmi.h" #endif using content::BrowserThread; using content::SpeechRecognitionManager; using content::WebContents; namespace { const char kExtensionPrefix[] = "chrome-extension://"; bool RequiresBubble(int session_id) { return SpeechRecognitionManager::GetInstance()-> GetSessionContext(session_id).requested_by_page_element; } bool RequiresTrayIcon(int session_id) { return !RequiresBubble(session_id); } } // namespace namespace speech { // Asynchronously fetches the PC and audio hardware/driver info if // the user has opted into UMA. This information is sent with speech input // requests to the server for identifying and improving quality issues with // specific device configurations. class ChromeSpeechRecognitionManagerDelegate::OptionalRequestInfo : public base::RefCountedThreadSafe<OptionalRequestInfo> { public: OptionalRequestInfo() : can_report_metrics_(false) { } void Refresh() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // UMA opt-in can be checked only from the UI thread, so switch to that. BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, base::Bind(&OptionalRequestInfo::CheckUMAAndGetHardwareInfo, this)); } void CheckUMAAndGetHardwareInfo() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); if (g_browser_process->local_state()->GetBoolean( prefs::kMetricsReportingEnabled)) { // Access potentially slow OS calls from the FILE thread. BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE, base::Bind(&OptionalRequestInfo::GetHardwareInfo, this)); } } void GetHardwareInfo() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); base::AutoLock lock(lock_); can_report_metrics_ = true; string16 device_model = SpeechRecognitionManager::GetInstance()->GetAudioInputDeviceModel(); #if defined(OS_WIN) value_ = UTF16ToUTF8( installer::WMIComputerSystem::GetModel() + L"|" + device_model); #else // defined(OS_WIN) value_ = UTF16ToUTF8(device_model); #endif // defined(OS_WIN) } std::string value() { base::AutoLock lock(lock_); return value_; } bool can_report_metrics() { base::AutoLock lock(lock_); return can_report_metrics_; } private: friend class base::RefCountedThreadSafe<OptionalRequestInfo>; ~OptionalRequestInfo() {} base::Lock lock_; std::string value_; bool can_report_metrics_; DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo); }; // Simple utility to get notified when a WebContent (a tab or an extension's // background page) is closed or crashes. Both the callback site and the // callback thread are passed by the caller in the constructor. // There is no restriction on the constructor, however this class must be // destroyed on the UI thread, due to the NotificationRegistrar dependency. class ChromeSpeechRecognitionManagerDelegate::TabWatcher : public base::RefCountedThreadSafe<TabWatcher>, public content::NotificationObserver { public: typedef base::Callback<void(int render_process_id, int render_view_id)> TabClosedCallback; TabWatcher(TabClosedCallback tab_closed_callback, BrowserThread::ID callback_thread) : tab_closed_callback_(tab_closed_callback), callback_thread_(callback_thread) { } // Starts monitoring the WebContents corresponding to the given // |render_process_id|, |render_view_id| pair, invoking |tab_closed_callback_| // if closed/unloaded. void Watch(int render_process_id, int render_view_id) { if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) { BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, base::Bind( &TabWatcher::Watch, this, render_process_id, render_view_id)); return; } WebContents* web_contents = tab_util::GetWebContentsByID(render_process_id, render_view_id); // Sessions initiated by speech input extension APIs will end up in a NULL // WebContent here, but they are properly managed by the // chrome::SpeechInputExtensionManager. However, sessions initiated within a // extension using the (new) speech JS APIs, will be properly handled here. // TODO(primiano) turn this line into a DCHECK once speech input extension // API is deprecated. if (!web_contents) return; // Avoid multiple registrations on |registrar_| for the same |web_contents|. if (registered_web_contents_.find(web_contents) != registered_web_contents_.end()) { return; } registered_web_contents_.insert(web_contents); // Lazy initialize the registrar. if (!registrar_.get()) registrar_.reset(new content::NotificationRegistrar()); registrar_->Add(this, content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED, content::Source<WebContents>(web_contents)); } // content::NotificationObserver implementation. virtual void Observe(int type, const content::NotificationSource& source, const content::NotificationDetails& details) OVERRIDE { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); DCHECK_EQ(content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED, type); WebContents* web_contents = content::Source<WebContents>(source).ptr(); int render_process_id = web_contents->GetRenderProcessHost()->GetID(); int render_view_id = web_contents->GetRenderViewHost()->GetRoutingID(); registrar_->Remove(this, content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED, content::Source<WebContents>(web_contents)); registered_web_contents_.erase(web_contents); BrowserThread::PostTask(callback_thread_, FROM_HERE, base::Bind( tab_closed_callback_, render_process_id, render_view_id)); } private: friend class base::RefCountedThreadSafe<TabWatcher>; virtual ~TabWatcher() { // Must be destroyed on the UI thread due to |registrar_| non thread-safety. DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); } // Lazy-initialized and used on the UI thread to handle web contents // notifications (tab closing). scoped_ptr<content::NotificationRegistrar> registrar_; // Keeps track of which WebContent(s) have been registered, in order to avoid // double registrations on |registrar_| std::set<content::WebContents*> registered_web_contents_; // Callback used to notify, on the thread specified by |callback_thread_| the // closure of a registered tab. TabClosedCallback tab_closed_callback_; content::BrowserThread::ID callback_thread_; DISALLOW_COPY_AND_ASSIGN(TabWatcher); }; ChromeSpeechRecognitionManagerDelegate ::ChromeSpeechRecognitionManagerDelegate() { } ChromeSpeechRecognitionManagerDelegate ::~ChromeSpeechRecognitionManagerDelegate() { if (tray_icon_controller_.get()) tray_icon_controller_->Hide(); if (bubble_controller_.get()) bubble_controller_->CloseBubble(); } void ChromeSpeechRecognitionManagerDelegate::InfoBubbleButtonClicked( int session_id, SpeechRecognitionBubble::Button button) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // Note, the session might have been destroyed, therefore avoid calls to the // manager which imply its existance (e.g., GetSessionContext()). if (button == SpeechRecognitionBubble::BUTTON_CANCEL) { GetBubbleController()->CloseBubble(); last_session_config_.reset(); // We can safely call AbortSession even if the session has already ended, // the manager's public methods are reliable and will handle it properly. SpeechRecognitionManager::GetInstance()->AbortSession(session_id); } else if (button == SpeechRecognitionBubble::BUTTON_TRY_AGAIN) { GetBubbleController()->CloseBubble(); RestartLastSession(); } else { NOTREACHED(); } } void ChromeSpeechRecognitionManagerDelegate::InfoBubbleFocusChanged( int session_id) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // This check is needed since on some systems (MacOS), in rare cases, if the // user clicks repeatedly and fast on the input element, the FocusChanged // event (corresponding to the old session that should be aborted) can be // received after a new session (corresponding to the 2nd click) is started. if (GetBubbleController()->GetActiveSessionID() != session_id) return; // Note, the session might have been destroyed, therefore avoid calls to the // manager which imply its existance (e.g., GetSessionContext()). GetBubbleController()->CloseBubble(); last_session_config_.reset(); // Clicking outside the bubble means we should abort. SpeechRecognitionManager::GetInstance()->AbortSession(session_id); } void ChromeSpeechRecognitionManagerDelegate::RestartLastSession() { DCHECK(last_session_config_.get()); SpeechRecognitionManager* manager = SpeechRecognitionManager::GetInstance(); const int new_session_id = manager->CreateSession(*last_session_config_); DCHECK_NE(SpeechRecognitionManager::kSessionIDInvalid, new_session_id); last_session_config_.reset(); manager->StartSession(new_session_id); } void ChromeSpeechRecognitionManagerDelegate::TabClosedCallback( int render_process_id, int render_view_id) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); SpeechRecognitionManager* manager = SpeechRecognitionManager::GetInstance(); // |manager| becomes NULL if a browser shutdown happens between the post of // this task (from the UI thread) and this call (on the IO thread). In this // case we just return. if (!manager) return; manager->AbortAllSessionsForRenderView(render_process_id, render_view_id); if (bubble_controller_.get() && bubble_controller_->IsShowingBubbleForRenderView(render_process_id, render_view_id)) { bubble_controller_->CloseBubble(); } } void ChromeSpeechRecognitionManagerDelegate::OnRecognitionStart( int session_id) { const content::SpeechRecognitionSessionContext& context = SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id); if (RequiresBubble(session_id)) { // Copy the configuration of the session (for the "try again" button). last_session_config_.reset(new content::SpeechRecognitionSessionConfig( SpeechRecognitionManager::GetInstance()->GetSessionConfig(session_id))); // Create and show the bubble. GetBubbleController()->CreateBubble(session_id, context.render_process_id, context.render_view_id, context.element_rect); } // Register callback to auto abort session on tab closure. // |tab_watcher_| is lazyly istantiated on the first call. if (!tab_watcher_.get()) { tab_watcher_ = new TabWatcher( base::Bind(&ChromeSpeechRecognitionManagerDelegate::TabClosedCallback, base::Unretained(this)), BrowserThread::IO); } tab_watcher_->Watch(context.render_process_id, context.render_view_id); } void ChromeSpeechRecognitionManagerDelegate::OnAudioStart(int session_id) { if (RequiresBubble(session_id)) { DCHECK_EQ(session_id, GetBubbleController()->GetActiveSessionID()); GetBubbleController()->SetBubbleRecordingMode(); } else if (RequiresTrayIcon(session_id)) { // We post the action to the UI thread for sessions requiring a tray icon, // since ChromeSpeechRecognitionPreferences (which requires UI thread) is // involved for determining whether a security alert balloon is required. const content::SpeechRecognitionSessionContext& context = SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id); BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, base::Bind( &ChromeSpeechRecognitionManagerDelegate::ShowTrayIconOnUIThread, context.context_name, context.render_process_id, scoped_refptr<SpeechRecognitionTrayIconController>( GetTrayIconController()))); } } void ChromeSpeechRecognitionManagerDelegate::OnEnvironmentEstimationComplete( int session_id) { } void ChromeSpeechRecognitionManagerDelegate::OnSoundStart(int session_id) { } void ChromeSpeechRecognitionManagerDelegate::OnSoundEnd(int session_id) { } void ChromeSpeechRecognitionManagerDelegate::OnAudioEnd(int session_id) { // OnAudioEnd can be also raised after an abort, when the bubble has already // been closed. if (GetBubbleController()->GetActiveSessionID() == session_id) { DCHECK(RequiresBubble(session_id)); GetBubbleController()->SetBubbleRecognizingMode(); } else if (RequiresTrayIcon(session_id)) { GetTrayIconController()->Hide(); } } void ChromeSpeechRecognitionManagerDelegate::OnRecognitionResult( int session_id, const content::SpeechRecognitionResult& result) { // The bubble will be closed upon the OnEnd event, which will follow soon. } void ChromeSpeechRecognitionManagerDelegate::OnRecognitionError( int session_id, const content::SpeechRecognitionError& error) { // An error can be dispatched when the bubble is not visible anymore. if (GetBubbleController()->GetActiveSessionID() != session_id) return; DCHECK(RequiresBubble(session_id)); int error_message_id = 0; switch (error.code) { case content::SPEECH_RECOGNITION_ERROR_AUDIO: switch (error.details) { case content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC: error_message_id = IDS_SPEECH_INPUT_NO_MIC; break; case content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE: error_message_id = IDS_SPEECH_INPUT_MIC_IN_USE; break; default: error_message_id = IDS_SPEECH_INPUT_MIC_ERROR; break; } break; case content::SPEECH_RECOGNITION_ERROR_ABORTED: error_message_id = IDS_SPEECH_INPUT_ABORTED; break; case content::SPEECH_RECOGNITION_ERROR_NO_SPEECH: error_message_id = IDS_SPEECH_INPUT_NO_SPEECH; break; case content::SPEECH_RECOGNITION_ERROR_NO_MATCH: error_message_id = IDS_SPEECH_INPUT_NO_RESULTS; break; case content::SPEECH_RECOGNITION_ERROR_NETWORK: error_message_id = IDS_SPEECH_INPUT_NET_ERROR; break; default: NOTREACHED() << "unknown error " << error.code; return; } GetBubbleController()->SetBubbleMessage( l10n_util::GetStringUTF16(error_message_id)); } void ChromeSpeechRecognitionManagerDelegate::OnAudioLevelsChange( int session_id, float volume, float noise_volume) { if (GetBubbleController()->GetActiveSessionID() == session_id) { DCHECK(RequiresBubble(session_id)); GetBubbleController()->SetBubbleInputVolume(volume, noise_volume); } else if (RequiresTrayIcon(session_id)) { GetTrayIconController()->SetVUMeterVolume(volume); } } void ChromeSpeechRecognitionManagerDelegate::OnRecognitionEnd(int session_id) { // The only case in which the OnRecognitionEnd should not close the bubble is // when we are showing an error. In this case the bubble will be closed by // the |InfoBubbleFocusChanged| method, when the users clicks either the // "Cancel" button or outside of the bubble. if (GetBubbleController()->GetActiveSessionID() == session_id && !GetBubbleController()->IsShowingMessage()) { DCHECK(RequiresBubble(session_id)); GetBubbleController()->CloseBubble(); } } void ChromeSpeechRecognitionManagerDelegate::GetDiagnosticInformation( bool* can_report_metrics, std::string* hardware_info) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); if (!optional_request_info_.get()) { optional_request_info_ = new OptionalRequestInfo(); // Since hardware info is optional with speech input requests, we start an // asynchronous fetch here and move on with recording audio. This first // speech input request would send an empty string for hardware info and // subsequent requests may have the hardware info available if the fetch // completed before them. This way we don't end up stalling the user with // a long wait and disk seeks when they click on a UI element and start // speaking. optional_request_info_->Refresh(); } *can_report_metrics = optional_request_info_->can_report_metrics(); *hardware_info = optional_request_info_->value(); } void ChromeSpeechRecognitionManagerDelegate::CheckRecognitionIsAllowed( int session_id, base::Callback<void(bool ask_user, bool is_allowed)> callback) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); const content::SpeechRecognitionSessionContext& context = SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id); // Make sure that initiators (extensions/web pages) properly set the // |render_process_id| field, which is needed later to retrieve the // ChromeSpeechRecognitionPreferences associated to their profile. DCHECK_NE(context.render_process_id, 0); // Check that the render view type is appropriate, and whether or not we // need to request permission from the user. BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, base::Bind(&CheckRenderViewType, callback, context.render_process_id, context.render_view_id, RequiresTrayIcon(session_id))); } content::SpeechRecognitionEventListener* ChromeSpeechRecognitionManagerDelegate::GetEventListener() { return this; } void ChromeSpeechRecognitionManagerDelegate::ShowTrayIconOnUIThread( const std::string& context_name, int render_process_id, scoped_refptr<SpeechRecognitionTrayIconController> tray_icon_controller) { content::RenderProcessHost* render_process_host = content::RenderProcessHost::FromID(render_process_id); DCHECK(render_process_host); content::BrowserContext* browser_context = render_process_host->GetBrowserContext(); Profile* profile = Profile::FromBrowserContext(browser_context); scoped_refptr<ChromeSpeechRecognitionPreferences> pref = ChromeSpeechRecognitionPreferences::GetForProfile(profile); bool show_notification = pref->ShouldShowSecurityNotification(context_name); if (show_notification) pref->SetHasShownSecurityNotification(context_name); // Speech recognitions initiated by JS APIs within an extension (so NOT by // extension API) will come with a context_name like "chrome-extension://id" // (that is, their origin as injected by WebKit). In such cases we try to // lookup the extension name, in order to show a more user-friendly balloon. string16 initiator_name = UTF8ToUTF16(context_name); if (context_name.find(kExtensionPrefix) == 0) { const std::string extension_id = context_name.substr(sizeof(kExtensionPrefix) - 1); const extensions::Extension* extension = profile->GetExtensionService()->GetExtensionById(extension_id, true); DCHECK(extension); initiator_name = UTF8ToUTF16(extension->name()); } tray_icon_controller->Show(initiator_name, show_notification); } void ChromeSpeechRecognitionManagerDelegate::CheckRenderViewType( base::Callback<void(bool ask_user, bool is_allowed)> callback, int render_process_id, int render_view_id, bool js_api) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); const content::RenderViewHost* render_view_host = content::RenderViewHost::FromID(render_process_id, render_view_id); bool allowed = false; bool ask_permission = false; if (!render_view_host) { if (!js_api) { // If there is no render view, we cannot show the speech bubble, so this // is not allowed. allowed = false; ask_permission = false; } else { // This happens for extensions. Manifest should be checked for permission. allowed = true; ask_permission = false; } BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, base::Bind(callback, ask_permission, allowed)); return; } WebContents* web_contents = WebContents::FromRenderViewHost(render_view_host); chrome::ViewType view_type = chrome::GetViewType(web_contents); if (view_type == chrome::VIEW_TYPE_TAB_CONTENTS) { // If it is a tab, we can show the speech input bubble or ask for // permission. allowed = true; if (js_api) ask_permission = true; } BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, base::Bind(callback, ask_permission, allowed)); } SpeechRecognitionBubbleController* ChromeSpeechRecognitionManagerDelegate::GetBubbleController() { if (!bubble_controller_.get()) bubble_controller_ = new SpeechRecognitionBubbleController(this); return bubble_controller_.get(); } SpeechRecognitionTrayIconController* ChromeSpeechRecognitionManagerDelegate::GetTrayIconController() { if (!tray_icon_controller_.get()) tray_icon_controller_ = new SpeechRecognitionTrayIconController(); return tray_icon_controller_.get(); } } // namespace speech