chrome/common/extensions/api/tts.json


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

[
  {
    "namespace": "tts",
    "types": [
      {
        "id": "TtsEvent",
        "type": "object",
        "description": "An event from the TTS engine to communicate the status of an utterance.",
        "properties": {
          "type": {
            "type": "string",
            "enum": ["start", "end", "word", "sentence", "marker", "interrupted", "cancelled", "error"],
            "description": "The type can be 'start' as soon as speech has started, 'word' when a word boundary is reached, 'sentence' when a sentence boundary is reached, 'marker' when an SSML mark element is reached, 'end' when the end of the utterance is reached, 'interrupted' when the utterance is stopped or interrupted before reaching the end, 'cancelled' when it's removed from the queue before ever being synthesized, or 'error' when any other error occurs."
          },
          "charIndex": {
            "type": "number",
            "optional": true,
            "description": "The index of the current character in the utterance."
          },
          "errorMessage": {
            "type": "string",
            "description": "The error description, if the event type is 'error'.",
            "optional": true
          },
          "srcId": {
            "type": "number",
            "description": "An ID unique to the calling function's context so that events can get routed back to the correct tts.speak call.",
            "nodoc": true,
            "optional": true
          },
          "isFinalEvent": {
            "type": "boolean",
            "description": "True if this is the final event that will be sent to this handler.",
            "nodoc": true,
            "optional": true
          }
        }
      },
      {
        "id": "TtsVoice",
        "type": "object",
        "description": "A description of a voice available for speech synthesis.",
        "properties": {
          "voiceName": {
            "type": "string",
            "optional": true,
            "description": "The name of the voice."
          },
          "lang": {
            "type": "string",
            "optional": true,
            "description": "The language that this voice supports, in the form <em>language</em>-<em>region</em>. Examples: 'en', 'en-US', 'en-GB', 'zh-CN'."
          },
          "gender": {
            "type": "string",
            "optional": true,
            "description": "This voice's gender.",
            "enum": ["male", "female"]
          },
          "extensionId": {
            "type": "string",
            "optional": true,
            "description": "The ID of the extension providing this voice."
          },
          "eventTypes": {
            "type": "array",
            "items": {"type": "string"},
            "optional": true,
            "description": "All of the callback event types that this voice is capable of sending."
          }
        }
      }
    ],
    "functions": [
      {
        "name": "speak",
        "type": "function",
        "description": "Speaks text using a text-to-speech engine.",
        "parameters": [
          {
            "type": "string",
            "name": "utterance",
            "description": "The text to speak, either plain text or a complete, well-formed SSML document. Speech engines that do not support SSML will strip away the tags and speak the text. The maximum length of the text is 32,768 characters."
          },
          {
            "type": "object",
            "name": "options",
            "optional": true,
            "description": "The speech options.",
            "properties": {
              "enqueue": {
                "type": "boolean",
                "optional": true,
                "description": "If true, enqueues this utterance if TTS is already in progress. If false (the default), interrupts any current speech and flushes the speech queue before speaking this new utterance."
              },
              "voiceName": {
                "type": "string",
                "optional": true,
                "description": "The name of the voice to use for synthesis. If empty, uses any available voice."
              },
              "extensionId": {
                "type": "string",
                "optional": true,
                "description": "The extension ID of the speech engine to use, if known."
              },
              "lang": {
                "type": "string",
                "optional": true,
                "description": "The language to be used for synthesis, in the form <em>language</em>-<em>region</em>. Examples: 'en', 'en-US', 'en-GB', 'zh-CN'."
              },
              "gender": {
                "type": "string",
                "optional": true,
                "description": "Gender of voice for synthesized speech.",
                "enum": ["male", "female"]
              },
              "rate": {
                "type": "number",
                "optional": true,
                "minimum": 0.1,
                "maximum": 10,
                "description": "Speaking rate relative to the default rate for this voice. 1.0 is the default rate, normally around 180 to 220 words per minute. 2.0 is twice as fast, and 0.5 is half as fast. Values below 0.1 or above 10.0 are strictly disallowed, but many voices will constrain the minimum and maximum rates further&mdash;for example a particular voice may not actually speak faster than 3 times normal even if you specify a value larger than 3.0."
              },
              "pitch": {
                "type": "number",
                "optional": true,
                "minimum": 0,
                "maximum": 2,
                "description": "Speaking pitch between 0 and 2 inclusive, with 0 being lowest and 2 being highest. 1.0 corresponds to a voice's default pitch."
              },
              "volume": {
                "type": "number",
                "optional": true,
                "minimum": 0,
                "maximum": 1,
                "description": "Speaking volume between 0 and 1 inclusive, with 0 being lowest and 1 being highest, with a default of 1.0."
              },
              "requiredEventTypes": {
                "type": "array",
                "items": {"type": "string"},
                "optional": true,
                "description": "The TTS event types the voice must support."
              },
              "desiredEventTypes": {
                "type": "array",
                "items": {"type": "string"},
                "optional": true,
                "description": "The TTS event types that you are interested in listening to. If missing, all event types may be sent."
              },
              "onEvent": {
                "type": "function",
                "optional": true,
                "description": "This function is called with events that occur in the process of speaking the utterance.",
                "parameters": [
                  {
                    "name": "event",
                    "$ref": "TtsEvent",
                    "description": "The update event from the text-to-speech engine indicating the status of this utterance."
                  }
                ]
              }
            }
          },
          {
            "type": "function",
            "name": "callback",
            "optional": true,
            "description": "Called right away, before speech finishes. Check chrome.extension.lastError to make sure there were no errors. Use options.onEvent to get more detailed feedback.",
            "parameters": []
          }
        ]
      },
      {
        "name": "stop",
        "type": "function",
        "description": "Stops any current speech.",
        "parameters": []
      },
      {
        "name": "isSpeaking",
        "type": "function",
        "description": "Checks if the engine is currently speaking.",
        "parameters": [
          {
            "type": "function",
            "name": "callback",
            "optional": true,
            "parameters": [
              {
                "name": "speaking",
                "type": "boolean",
                "description": "True if speaking, false otherwise."
              }
            ]
          }
        ]
      },
      {
        "name": "getVoices",
        "type": "function",
        "description": "Gets an array of all available voices.",
        "parameters": [
          {
            "type": "function",
            "name": "callback",
            "optional": true,
            "parameters": [
              {
                "type": "array",
                "name": "voices",
                "items": { "$ref": "TtsVoice" },
                "description": "Array of $ref:TtsVoice objects representing the available voices for speech synthesis."
              }
            ]
          }
        ]
      }
    ],
    "events": [
      {
        "name": "onEvent",
        "type": "function",
        "nodoc": true,
        "parameters": [
          {
            "name": "event",
            "$ref": "TtsEvent",
            "description": "The event from the text-to-speech engine indicating the status of this utterance."
          }
        ],
        "description": "Used to pass events back to the function calling speak()."
      }
    ]
  }
]