media/cast/test/utility/audio_utility.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189

// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <cmath>

#include "base/basictypes.h"
#include "base/logging.h"
#include "base/time/time.h"
#include "media/base/audio_bus.h"
#include "media/cast/test/utility/audio_utility.h"

namespace media {
namespace cast {

const double Pi = 3.14159265358979323846;

TestAudioBusFactory::TestAudioBusFactory(int num_channels,
                                         int sample_rate,
                                         float sine_wave_frequency,
                                         float volume)
    : num_channels_(num_channels),
      sample_rate_(sample_rate),
      volume_(volume),
      source_(num_channels, sine_wave_frequency, sample_rate) {
  CHECK_LT(0, num_channels);
  CHECK_LT(0, sample_rate);
  CHECK_LE(0.0f, volume_);
  CHECK_LE(volume_, 1.0f);
}

TestAudioBusFactory::~TestAudioBusFactory() {}

scoped_ptr<AudioBus> TestAudioBusFactory::NextAudioBus(
    const base::TimeDelta& duration) {
  const int num_samples = static_cast<int>((sample_rate_ * duration) /
                                           base::TimeDelta::FromSeconds(1));
  scoped_ptr<AudioBus> bus(AudioBus::Create(num_channels_, num_samples));
  source_.OnMoreData(bus.get(), 0);
  bus->Scale(volume_);
  return bus.Pass();
}

int CountZeroCrossings(const float* samples, int length) {
  // The sample values must pass beyond |kAmplitudeThreshold| on the opposite
  // side of zero before a crossing will be counted.
  const float kAmplitudeThreshold = 0.03f;  // 3% of max amplitude.

  int count = 0;
  int i = 0;
  float last = 0.0f;
  for (; i < length && fabsf(last) < kAmplitudeThreshold; ++i)
    last = samples[i];
  for (; i < length; ++i) {
    if (fabsf(samples[i]) >= kAmplitudeThreshold &&
        (last < 0) != (samples[i] < 0)) {
      ++count;
      last = samples[i];
    }
  }
  return count;
}

// EncodeTimestamp stores a 16-bit number as frequencies in a sample.
// Our internal code tends to work on 10ms chunks of data, and to
// make sure the decoding always work, I wanted to make sure that the
// encoded value can be decoded from 5ms of sample data, assuming a
// sampling rate of 48Khz, this turns out to be 240 samples.
// Each bit of the timestamp is stored as a frequency, where the
// frequency is bit_number * 200 Hz. We also add a 'sense' tone to
// the output, this tone is 17 * 200 = 3400Hz, and when we decode,
// we can use this tone to make sure that we aren't decoding bogus data.
// Also, we use this tone to scale our expectations in case something
// changed changed the volume of the audio.
//
// Normally, we will encode 480 samples (10ms) of data, but when we
// read it will will scan 240 samples at a time until something that
// can be decoded is found.
//
// The intention is to use these routines to encode the frame number
// that goes with each chunk of audio, so if our frame rate is
// 30Hz, we would encode 48000/30 = 1600 samples of "1", then
// 1600 samples of "2", etc. When we decode this, it is possible
// that we get a chunk of data that is spanning two frame numbers,
// so we gray-code the numbers. Since adjacent gray-coded number
// will only differ in one bit, we should never get numbers out
// of sequence when decoding, at least not by more than one.

const double kBaseFrequency = 200;
const int kSamplingFrequency = 48000;
const size_t kNumBits = 16;
const size_t kSamplesToAnalyze = kSamplingFrequency / kBaseFrequency;
const double kSenseFrequency = kBaseFrequency * (kNumBits + 1);
const double kMinSense = 1.5;

bool EncodeTimestamp(uint16 timestamp,
                     size_t sample_offset,
                     size_t length,
                     float* samples) {
  if (length < kSamplesToAnalyze) {
    return false;
  }
  // gray-code the number
  timestamp = (timestamp >> 1) ^ timestamp;
  std::vector<double> frequencies;
  for (size_t i = 0; i < kNumBits; i++) {
    if ((timestamp >> i) & 1) {
      frequencies.push_back(kBaseFrequency * (i+1));
    }
  }
  // Carrier sense frequency
  frequencies.push_back(kSenseFrequency);
  for (size_t i = 0; i < length; i++) {
    double mix_of_components = 0.0;
    for (size_t f = 0; f < frequencies.size(); f++) {
      mix_of_components += sin((i + sample_offset) * Pi * 2.0 * frequencies[f] /
                                   kSamplingFrequency);
    }
    mix_of_components /= kNumBits + 1;
    DCHECK_LE(fabs(mix_of_components), 1.0);
    samples[i] = mix_of_components;
  }
  return true;
}

namespace {
// We use a slow DCT here since this code is only used for testing.
// While an FFT would probably be faster, it wouldn't be a LOT
// faster since we only analyze 17 out of 120 frequencies.
// With an FFT we would verify that none of the higher frequencies
// contain a lot of energy, which would be useful in detecting
// bogus data.
double DecodeOneFrequency(const float* samples,
                          size_t length,
                          double frequency) {
  double sin_sum = 0.0;
  double cos_sum = 0.0;
  for (size_t i = 0; i < length; i++) {
    sin_sum += samples[i] * sin(i * Pi * 2 * frequency / kSamplingFrequency);
    cos_sum += samples[i] * cos(i * Pi * 2 * frequency / kSamplingFrequency);
  }
  return sqrt(sin_sum * sin_sum + cos_sum * cos_sum);
}
}  // namespace

// When decoding, we first check for sense frequency, then we decode
// each of the bits. Each frequency must have a strength that is similar to
// the sense frequency or to zero, or the decoding fails. If it fails, we
// move head by 60 samples and try again until we run out of samples.
bool DecodeTimestamp(const float* samples, size_t length, uint16* timestamp) {
  for (size_t start = 0;
       start + kSamplesToAnalyze <= length;
       start += kSamplesToAnalyze / 4) {
    double sense = DecodeOneFrequency(&samples[start],
                                      kSamplesToAnalyze,
                                      kSenseFrequency);
    if (sense < kMinSense) continue;
    bool success = true;
    uint16 gray_coded = 0;
    for (size_t bit = 0; success && bit < kNumBits; bit++) {
      double signal_strength = DecodeOneFrequency(
          &samples[start],
          kSamplesToAnalyze,
          kBaseFrequency * (bit + 1));
      if (signal_strength < sense / 4) {
        // Zero bit, no action
      } else if (signal_strength > sense * 0.75 &&
                 signal_strength < sense * 1.25) {
        // One bit
        gray_coded |= 1 << bit;
      } else {
        success = false;
      }
    }
    if (success) {
      // Convert from gray-coded number to binary.
      uint16 mask;
      for (mask = gray_coded >> 1; mask != 0; mask = mask >> 1) {
        gray_coded = gray_coded ^ mask;
      }
      *timestamp = gray_coded;
      return true;
    }
  }
  return false;
}

}  // namespace cast
}  // namespace media