diff options
author | Glenn Kasten <gkasten@google.com> | 2012-05-02 10:34:47 -0700 |
---|---|---|
committer | Glenn Kasten <gkasten@google.com> | 2012-05-10 13:39:16 -0700 |
commit | 42d45cfd0c3d62357a6549c62f535e4d4fe08d91 (patch) | |
tree | 40667398d6db38a70fa2729e6d574a01b1ebefd1 | |
parent | c150ca7dda844891fa684f6898da7f7e0c40329d (diff) | |
download | frameworks_av-42d45cfd0c3d62357a6549c62f535e4d4fe08d91.zip frameworks_av-42d45cfd0c3d62357a6549c62f535e4d4fe08d91.tar.gz frameworks_av-42d45cfd0c3d62357a6549c62f535e4d4fe08d91.tar.bz2 |
Update fast mixer statistics
Compute statistics on fast mixer elapsed time and CPU load per cycle using a
simple moving average rather than by fixed blocks. This has a couple advantages:
- remove burstiness of CPU usage due to former floating-point calculations in fast mixer
- gives us flexibility in how to report (e.g. could report over just the last 1 second)
Disadvantage is increased RAM, and since the samples are not updated
atomically, it is possible to have an error in the statistics. This
should not be much of an issue due to the relatively large number of samples.
Also add CPU raw ns and adjusted MHz statistics.
Change-Id: Iaa2cd13f18250c3162aff40409b3694b769d9505
-rw-r--r-- | services/audioflinger/FastMixer.cpp | 130 | ||||
-rw-r--r-- | services/audioflinger/FastMixer.h | 20 |
2 files changed, 128 insertions, 22 deletions
diff --git a/services/audioflinger/FastMixer.cpp b/services/audioflinger/FastMixer.cpp index bf264be..d499f7a 100644 --- a/services/audioflinger/FastMixer.cpp +++ b/services/audioflinger/FastMixer.cpp @@ -23,6 +23,7 @@ #include <system/audio.h> #ifdef FAST_MIXER_STATISTICS #include <cpustats/CentralTendencyStatistics.h> +#include <cpustats/ThreadCpuUsage.h> #endif #include "AudioMixer.h" #include "FastMixer.h" @@ -65,8 +66,11 @@ bool FastMixer::threadLoop() FastMixerDumpState dummyDumpState, *dumpState = &dummyDumpState; bool ignoreNextOverrun = true; // used to ignore initial overrun and first after an underrun #ifdef FAST_MIXER_STATISTICS - CentralTendencyStatistics cts; // cycle times in seconds - static const unsigned kMaxSamples = 1000; + struct timespec oldLoad = {0, 0}; // previous value of clock_gettime(CLOCK_THREAD_CPUTIME_ID) + bool oldLoadValid = false; // whether oldLoad is valid + uint32_t bounds = 0; + bool full = false; // whether we have collected at least kSamplingN samples + ThreadCpuUsage tcu; // for reading the current CPU clock frequency in kHz #endif unsigned coldGen = 0; // last observed mColdGen bool isWarm = false; // true means ready to mix, false means wait for warmup before mixing @@ -116,6 +120,7 @@ bool FastMixer::threadLoop() preIdle = *current; current = &preIdle; oldTsValid = false; + oldLoadValid = false; ignoreNextOverrun = true; } previous = current; @@ -151,6 +156,8 @@ bool FastMixer::threadLoop() warmupCycles = 0; sleepNs = -1; coldGen = current->mColdGen; + bounds = 0; + full = false; } else { sleepNs = FAST_HOT_IDLE_NS; } @@ -451,15 +458,54 @@ bool FastMixer::threadLoop() ignoreNextOverrun = false; } #ifdef FAST_MIXER_STATISTICS - // long-term statistics - cts.sample(sec + nsec * 1e-9); - if (cts.n() >= kMaxSamples) { - dumpState->mMean = cts.mean(); - dumpState->mMinimum = cts.minimum(); - dumpState->mMaximum = cts.maximum(); - dumpState->mStddev = cts.stddev(); - cts.reset(); + // advance the FIFO queue bounds + size_t i = bounds & (FastMixerDumpState::kSamplingN - 1); + bounds = (bounds + 1) & 0xFFFF; + if (full) { + bounds += 0x10000; + } else if (!(bounds & (FastMixerDumpState::kSamplingN - 1))) { + full = true; } + // compute the delta value of clock_gettime(CLOCK_MONOTONIC) + uint32_t monotonicNs = nsec; + if (sec > 0 && sec < 4) { + monotonicNs += sec * 1000000000; + } + // compute the raw CPU load = delta value of clock_gettime(CLOCK_THREAD_CPUTIME_ID) + uint32_t loadNs = 0; + struct timespec newLoad; + rc = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &newLoad); + if (rc == 0) { + if (oldLoadValid) { + sec = newLoad.tv_sec - oldLoad.tv_sec; + nsec = newLoad.tv_nsec - oldLoad.tv_nsec; + if (nsec < 0) { + --sec; + nsec += 1000000000; + } + loadNs = nsec; + if (sec > 0 && sec < 4) { + loadNs += sec * 1000000000; + } + } else { + // first time through the loop + oldLoadValid = true; + } + oldLoad = newLoad; + } + // get the absolute value of CPU clock frequency in kHz + int cpuNum = sched_getcpu(); + uint32_t kHz = tcu.getCpukHz(cpuNum); + kHz = (kHz & ~0xF) | (cpuNum & 0xF); + // save values in FIFO queues for dumpsys + // these stores #1, #2, #3 are not atomic with respect to each other, + // or with respect to store #4 below + dumpState->mMonotonicNs[i] = monotonicNs; + dumpState->mLoadNs[i] = loadNs; + dumpState->mCpukHz[i] = kHz; + // this store #4 is not atomic with respect to stores #1, #2, #3 above, but + // the newest open and oldest closed halves are atomic with respect to each other + dumpState->mBounds = bounds; #endif } else { // first time through the loop @@ -474,6 +520,7 @@ bool FastMixer::threadLoop() sleepNs = periodNs; } + } // for (;;) // never return 'true'; Thread::_threadLoop() locks mutex which can result in priority inversion @@ -484,11 +531,16 @@ FastMixerDumpState::FastMixerDumpState() : mNumTracks(0), mWriteErrors(0), mUnderruns(0), mOverruns(0), mSampleRate(0), mFrameCount(0), /* mMeasuredWarmupTs({0, 0}), */ mWarmupCycles(0) #ifdef FAST_MIXER_STATISTICS - , mMean(0.0), mMinimum(0.0), mMaximum(0.0), mStddev(0.0) + , mBounds(0) #endif { mMeasuredWarmupTs.tv_sec = 0; mMeasuredWarmupTs.tv_nsec = 0; + // sample arrays aren't accessed atomically with respect to the bounds, + // so clearing reduces chance for dumpsys to read random uninitialized samples + memset(&mMonotonicNs, 0, sizeof(mMonotonicNs)); + memset(&mLoadNs, 0, sizeof(mLoadNs)); + memset(&mCpukHz, 0, sizeof(mCpukHz)); } FastMixerDumpState::~FastMixerDumpState() @@ -525,17 +577,63 @@ void FastMixerDumpState::dump(int fd) snprintf(string, COMMAND_MAX, "%d", mCommand); break; } - double mMeasuredWarmupMs = (mMeasuredWarmupTs.tv_sec * 1000.0) + + double measuredWarmupMs = (mMeasuredWarmupTs.tv_sec * 1000.0) + (mMeasuredWarmupTs.tv_nsec / 1000000.0); + double mixPeriodSec = (double) mFrameCount / (double) mSampleRate; fdprintf(fd, "FastMixer command=%s writeSequence=%u framesWritten=%u\n" " numTracks=%u writeErrors=%u underruns=%u overruns=%u\n" - " sampleRate=%u frameCount=%u measuredWarmup=%.3g ms, warmupCycles=%u\n", + " sampleRate=%u frameCount=%u measuredWarmup=%.3g ms, warmupCycles=%u\n" + " mixPeriod=%.2f ms\n", string, mWriteSequence, mFramesWritten, mNumTracks, mWriteErrors, mUnderruns, mOverruns, - mSampleRate, mFrameCount, mMeasuredWarmupMs, mWarmupCycles); + mSampleRate, mFrameCount, measuredWarmupMs, mWarmupCycles, + mixPeriodSec * 1e3); #ifdef FAST_MIXER_STATISTICS - fdprintf(fd, " cycle time in ms: mean=%.1f min=%.1f max=%.1f stddev=%.1f\n", - mMean*1e3, mMinimum*1e3, mMaximum*1e3, mStddev*1e3); + // find the interval of valid samples + uint32_t bounds = mBounds; + uint32_t newestOpen = bounds & 0xFFFF; + uint32_t oldestClosed = bounds >> 16; + uint32_t n = (newestOpen - oldestClosed) & 0xFFFF; + if (n > kSamplingN) { + ALOGE("too many samples %u", n); + n = kSamplingN; + } + // statistics for monotonic (wall clock) time, thread raw CPU load in time, CPU clock frequency, + // and adjusted CPU load in MHz normalized for CPU clock frequency + CentralTendencyStatistics wall, loadNs, kHz, loadMHz; + // only compute adjusted CPU load in Hz if current CPU number and CPU clock frequency are stable + bool valid = false; + uint32_t previousCpukHz = 0; + // loop over all the samples + for (; n > 0; --n) { + size_t i = oldestClosed++ & (kSamplingN - 1); + uint32_t wallNs = mMonotonicNs[i]; + wall.sample(wallNs); + uint32_t sampleLoadNs = mLoadNs[i]; + uint32_t sampleCpukHz = mCpukHz[i]; + loadNs.sample(sampleLoadNs); + kHz.sample(sampleCpukHz & ~0xF); + if (sampleCpukHz == previousCpukHz) { + double megacycles = (double) sampleLoadNs * (double) sampleCpukHz; + double adjMHz = megacycles / mixPeriodSec; // _not_ wallNs * 1e9 + loadMHz.sample(adjMHz); + } + previousCpukHz = sampleCpukHz; + } + fdprintf(fd, "Simple moving statistics over last %.1f seconds:\n", wall.n() * mixPeriodSec); + fdprintf(fd, " wall clock time in ms per mix cycle:\n" + " mean=%.2f min=%.2f max=%.2f stddev=%.2f\n", + wall.mean()*1e-6, wall.minimum()*1e-6, wall.maximum()*1e-6, wall.stddev()*1e-6); + fdprintf(fd, " raw CPU load in us per mix cycle:\n" + " mean=%.0f min=%.0f max=%.0f stddev=%.0f\n", + loadNs.mean()*1e-3, loadNs.minimum()*1e-3, loadNs.maximum()*1e-3, + loadNs.stddev()*1e-3); + fdprintf(fd, " CPU clock frequency in MHz:\n" + " mean=%.0f min=%.0f max=%.0f stddev=%.0f\n", + kHz.mean()*1e-3, kHz.minimum()*1e-3, kHz.maximum()*1e-3, kHz.stddev()*1e-3); + fdprintf(fd, " adjusted CPU load in MHz (i.e. normalized for CPU clock frequency):\n" + " mean=%.1f min=%.1f max=%.1f stddev=%.1f\n", + loadMHz.mean(), loadMHz.minimum(), loadMHz.maximum(), loadMHz.stddev()); #endif } diff --git a/services/audioflinger/FastMixer.h b/services/audioflinger/FastMixer.h index a6dd310..e2ed553 100644 --- a/services/audioflinger/FastMixer.h +++ b/services/audioflinger/FastMixer.h @@ -64,7 +64,7 @@ struct FastMixerDumpState { FastMixerDumpState(); /*virtual*/ ~FastMixerDumpState(); - void dump(int fd); + void dump(int fd); // should only be called on a stable copy, not the original FastMixerState::Command mCommand; // current command uint32_t mWriteSequence; // incremented before and after each write() @@ -78,12 +78,20 @@ struct FastMixerDumpState { struct timespec mMeasuredWarmupTs; // measured warmup time uint32_t mWarmupCycles; // number of loop cycles required to warmup FastTrackDump mTracks[FastMixerState::kMaxFastTracks]; + #ifdef FAST_MIXER_STATISTICS - // cycle times in seconds - float mMean; - float mMinimum; - float mMaximum; - float mStddev; + // Recently collected samples of per-cycle monotonic time, thread CPU time, and CPU frequency. + // kSamplingN is the size of the sampling frame, and must be a power of 2 <= 0x8000. + static const uint32_t kSamplingN = 0x1000; + // The bounds define the interval of valid samples, and are represented as follows: + // newest open (excluded) endpoint = lower 16 bits of bounds, modulo N + // oldest closed (included) endpoint = upper 16 bits of bounds, modulo N + // Number of valid samples is newest - oldest. + uint32_t mBounds; // bounds for mMonotonicNs, mThreadCpuNs, and mCpukHz + // The elements in the *Ns arrays are in units of nanoseconds <= 3999999999. + uint32_t mMonotonicNs[kSamplingN]; // delta monotonic (wall clock) time + uint32_t mLoadNs[kSamplingN]; // delta CPU load in time + uint32_t mCpukHz[kSamplingN]; // absolute CPU clock frequency in kHz, bits 0-3 are CPU# #endif }; |