summaryrefslogtreecommitdiffstats
path: root/content/browser/speech/speech_recognition_engine.h
blob: 73ba26ec7e50236b0caae1619a6eb83f3cb887e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_

#include <string>

#include "base/basictypes.h"
#include "content/common/content_export.h"
#include "content/public/common/speech_recognition_grammar.h"
#include "content/public/common/speech_recognition_result.h"

namespace content {

class AudioChunk;
struct SpeechRecognitionError;

// This interface models the basic contract that a speech recognition engine,
// either working locally or relying on a remote web-service, must obey.
// The expected call sequence for exported methods is:
// StartRecognition      Mandatory at beginning of SR.
//   TakeAudioChunk      For every audio chunk pushed.
//   AudioChunksEnded    Finalize the audio stream (omitted in case of errors).
// EndRecognition        Mandatory at end of SR (even on errors).
// No delegate callbacks are allowed before StartRecognition or after
// EndRecognition. If a recognition was started, the caller can free the
// SpeechRecognitionEngine only after calling EndRecognition.
class SpeechRecognitionEngine {
 public:
  // Interface for receiving callbacks from this object.
  class Delegate {
   public:
    // Called whenever a result is retrieved. It might be issued several times,
    // (e.g., in the case of continuous speech recognition engine
    // implementations).
    virtual void OnSpeechRecognitionEngineResults(
        const SpeechRecognitionResults& results) = 0;
    virtual void OnSpeechRecognitionEngineError(
        const SpeechRecognitionError& error) = 0;

   protected:
    virtual ~Delegate() {}
  };

  // Remote engine configuration.
  struct CONTENT_EXPORT Config {
    Config();
    ~Config();

    std::string language;
    SpeechRecognitionGrammarArray grammars;
    bool filter_profanities;
    bool continuous;
    bool interim_results;
    uint32 max_hypotheses;
    std::string hardware_info;
    std::string origin_url;
    int audio_sample_rate;
    int audio_num_bits_per_sample;
  };

  virtual ~SpeechRecognitionEngine() {}

  // Set/change the recognition engine configuration. It is not allowed to call
  // this function while a recognition is ongoing.
  virtual void SetConfig(const Config& config) = 0;

  // Called when the speech recognition begins, before any TakeAudioChunk call.
  virtual void StartRecognition() = 0;

  // End any recognition activity and don't make any further callback.
  // Must be always called to close the corresponding StartRecognition call,
  // even in case of errors.
  // No further TakeAudioChunk/AudioChunksEnded calls are allowed after this.
  virtual void EndRecognition() = 0;

  // Push a chunk of uncompressed audio data, where the chunk length agrees with
  // GetDesiredAudioChunkDurationMs().
  virtual void TakeAudioChunk(const AudioChunk& data) = 0;

  // Notifies the engine that audio capture has completed and no more chunks
  // will be pushed. The engine, however, can still provide further results
  // using the audio chunks collected so far.
  virtual void AudioChunksEnded() = 0;

  // Checks wheter recognition of pushed audio data is pending.
  virtual bool IsRecognitionPending() const = 0;

  // Retrieves the desired duration, in milliseconds, of pushed AudioChunk(s).
  virtual int GetDesiredAudioChunkDurationMs() const = 0;

  // set_delegate detached from constructor for lazy dependency injection.
  void set_delegate(Delegate* delegate) { delegate_ = delegate; }

 protected:
  Delegate* delegate() const { return delegate_; }

 private:
  Delegate* delegate_;
};

// These typedefs are to workaround the issue with certain versions of
// Visual Studio where it gets confused between multiple Delegate
// classes and gives a C2500 error.
typedef SpeechRecognitionEngine::Delegate SpeechRecognitionEngineDelegate;
typedef SpeechRecognitionEngine::Config SpeechRecognitionEngineConfig;

}  // namespace content

#endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_