diff options
Diffstat (limited to 'bench')
-rw-r--r-- | bench/Android.mk | 15 | ||||
-rw-r--r-- | bench/BenchGpuTimer_gl.cpp | 181 | ||||
-rw-r--r-- | bench/BenchGpuTimer_gl.h | 33 | ||||
-rw-r--r-- | bench/BenchGpuTimer_none.cpp | 14 | ||||
-rw-r--r-- | bench/BenchGpuTimer_none.h | 12 | ||||
-rw-r--r-- | bench/BenchSysTimer_c.cpp | 20 | ||||
-rw-r--r-- | bench/BenchSysTimer_c.h | 19 | ||||
-rw-r--r-- | bench/BenchSysTimer_mach.cpp | 69 | ||||
-rw-r--r-- | bench/BenchSysTimer_mach.h | 19 | ||||
-rw-r--r-- | bench/BenchSysTimer_posix.cpp | 50 | ||||
-rw-r--r-- | bench/BenchSysTimer_posix.h | 19 | ||||
-rw-r--r-- | bench/BenchSysTimer_windows.cpp | 55 | ||||
-rw-r--r-- | bench/BenchSysTimer_windows.h | 19 | ||||
-rw-r--r-- | bench/BenchTimer.cpp | 48 | ||||
-rw-r--r-- | bench/BenchTimer.h | 27 | ||||
-rw-r--r-- | bench/MatrixBench.cpp | 93 | ||||
-rw-r--r-- | bench/ScalarBench.cpp | 97 | ||||
-rw-r--r-- | bench/benchmain.cpp | 46 |
18 files changed, 819 insertions, 17 deletions
diff --git a/bench/Android.mk b/bench/Android.mk index 71523af..a0fe86c 100644 --- a/bench/Android.mk +++ b/bench/Android.mk @@ -3,17 +3,20 @@ LOCAL_PATH:= $(call my-dir) include $(CLEAR_VARS) LOCAL_SRC_FILES := \ - BitmapBench.cpp \ + BenchGpuTimer_none.cpp \ + BenchSysTimer_posix.cpp \ + BenchTimer.cpp \ + BitmapBench.cpp \ DecodeBench.cpp \ FPSBench.cpp \ GradientBench.cpp \ MatrixBench.cpp \ PathBench.cpp \ - RectBench.cpp \ - RepeatTileBench.cpp \ - TextBench.cpp \ - SkBenchmark.cpp \ - benchmain.cpp + RectBench.cpp \ + RepeatTileBench.cpp \ + TextBench.cpp \ + SkBenchmark.cpp \ + benchmain.cpp # additional optional class for this tool LOCAL_SRC_FILES += \ diff --git a/bench/BenchGpuTimer_gl.cpp b/bench/BenchGpuTimer_gl.cpp new file mode 100644 index 0000000..ec2145d --- /dev/null +++ b/bench/BenchGpuTimer_gl.cpp @@ -0,0 +1,181 @@ +#include "BenchGpuTimer_gl.h" +#include <string.h> + +//GL +#define BENCH_GL_FUNCTION_TYPE +#if defined(SK_MESA) + #include <GL/osmesa.h> + #define SK_BENCH_CONTEXT_CHECK (NULL != OSMesaGetCurrentContext()) + + #define SK_GL_GET_PROC(F) gBenchGL.f ## F = (BenchGL ## F ## Proc) \ + OSMesaGetProcAddress("gl" #F); + #define SK_GL_GET_PROC_SUFFIX(F, S) gBenchGL.f ## F = (BenchGL##F##Proc)\ + OSMesaGetProcAddress("gl" #F #S); + +#elif defined(SK_BUILD_FOR_WIN32) + #define WIN32_LEAN_AND_MEAN 1 + #include <Windows.h> + #include <GL/GL.h> + #define SK_BENCH_CONTEXT_CHECK (NULL != wglGetCurrentContext()) + + #undef BENCH_GL_FUNCTION_TYPE + #define BENCH_GL_FUNCTION_TYPE __stdcall + + #define SK_GL_GET_PROC(F) gBenchGL.f ## F = (BenchGL ## F ## Proc) \ + wglGetProcAddress("gl" #F); + #define SK_GL_GET_PROC_SUFFIX(F, S) gBenchGL.f ## F = (BenchGL##F##Proc)\ + wglGetProcAddress("gl" #F #S); + +#elif defined(SK_BUILD_FOR_MAC) + #include <OpenGL/gl.h> + #include <OpenGL/CGLCurrent.h> + #define SK_BENCH_CONTEXT_CHECK (NULL != CGLGetCurrentContext()) + +#elif defined(SK_BUILD_FOR_UNIX) + #include <GL/gl.h> + #include <GL/glx.h> + #define SK_BENCH_CONTEXT_CHECK (NULL != glXGetCurrentContext()) + + #define SK_GL_GET_PROC(F) gBenchGL.f ## F = (BenchGL ## F ## Proc) \ + glXGetProcAddressARB(reinterpret_cast<const GLubyte*>("gl" #F)); + #define SK_GL_GET_PROC_SUFFIX(F, S) gBenchGL.f ## F = (BenchGL##F##Proc)\ + glXGetProcAddressARB(reinterpret_cast<const GLubyte*>("gl" #F #S)); +#else + #error unsupported platform +#endif + +#define BenchGL_TIME_ELAPSED 0x88BF +#define BenchGL_QUERY_RESULT 0x8866 +#define BenchGL_QUERY_RESULT_AVAILABLE 0x8867 + +#if defined(SK_BUILD_FOR_WIN32) +typedef UINT64 BenchGLuint64; +#else +#include <stdint.h> +typedef uint64_t BenchGLuint64; +#endif + +typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLGenQueriesProc) (GLsizei n, GLuint *ids); +typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLBeginQueryProc) (GLenum target, GLuint id); +typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLEndQueryProc) (GLenum target); +typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLDeleteQueriesProc) (GLsizei n, const GLuint *ids); +typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLGetQueryObjectivProc) (GLuint id, GLenum pname, GLint *params); +typedef void (BENCH_GL_FUNCTION_TYPE *BenchGLGetQueryObjectui64vProc) (GLuint id, GLenum pname, BenchGLuint64 *params); + +struct BenchGLInterface { + bool fHasTimer; + BenchGLGenQueriesProc fGenQueries; + BenchGLBeginQueryProc fBeginQuery; + BenchGLEndQueryProc fEndQuery; + BenchGLDeleteQueriesProc fDeleteQueries; + BenchGLGetQueryObjectivProc fGetQueryObjectiv; + BenchGLGetQueryObjectui64vProc fGetQueryObjectui64v; +}; + +static bool BenchGLCheckExtension(const char* ext, + const char* extensionString) { + int extLength = strlen(ext); + + while (true) { + int n = strcspn(extensionString, " "); + if (n == extLength && 0 == strncmp(ext, extensionString, n)) { + return true; + } + if (0 == extensionString[n]) { + return false; + } + extensionString += n+1; + } + + return false; +} + +static BenchGLInterface gBenchGL; +static bool gBenchGLInterfaceInit = false; + +static void BenchGLSetDefaultGLInterface() { + gBenchGL.fHasTimer = false; + if (gBenchGLInterfaceInit || !SK_BENCH_CONTEXT_CHECK) return; + + const char* glExts = + reinterpret_cast<const char*>(glGetString(GL_EXTENSIONS)); + const GLboolean ext = + BenchGLCheckExtension("GL_EXT_timer_query", glExts); + const GLboolean arb = + BenchGLCheckExtension("GL_ARB_timer_query", glExts); + if (ext || arb) { +#if defined(SK_BUILD_FOR_MAC) + #if GL_EXT_timer_query || GL_ARB_timer_query + gBenchGL.fHasTimer = true; + gBenchGL.fGenQueries = glGenQueries; + gBenchGL.fBeginQuery = glBeginQuery; + gBenchGL.fEndQuery = glEndQuery; + gBenchGL.fDeleteQueries = glDeleteQueries; + gBenchGL.fGetQueryObjectiv = glGetQueryObjectiv; + #endif + #if GL_ARB_timer_query + gBenchGL.fGetQueryObjectui64v = glGetQueryObjectui64v; + #elif GL_EXT_timer_query + gBenchGL.fGetQueryObjectui64v = glGetQueryObjectui64vEXT; + #endif +#else + gBenchGL.fHasTimer = true; + SK_GL_GET_PROC(GenQueries) + SK_GL_GET_PROC(BeginQuery) + SK_GL_GET_PROC(EndQuery) + SK_GL_GET_PROC(DeleteQueries) + + SK_GL_GET_PROC(GetQueryObjectiv) + if (arb) { + SK_GL_GET_PROC(GetQueryObjectui64v) + } else { + SK_GL_GET_PROC_SUFFIX(GetQueryObjectui64v, EXT) + } +#endif + } + gBenchGLInterfaceInit = true; +} + +BenchGpuTimer::BenchGpuTimer() { + BenchGLSetDefaultGLInterface(); + if (gBenchGL.fHasTimer) { + gBenchGL.fGenQueries(1, &this->fQuery); + } +} + +BenchGpuTimer::~BenchGpuTimer() { + if (gBenchGL.fHasTimer) { + gBenchGL.fDeleteQueries(1, &this->fQuery); + } +} + +void BenchGpuTimer::startGpu() { + if (!gBenchGL.fHasTimer) return; + + this->fStarted = true; + gBenchGL.fBeginQuery(BenchGL_TIME_ELAPSED, this->fQuery); +} + +/** + * It is important to stop the cpu clocks first, + * as this will cpu wait for the gpu to finish. + */ +double BenchGpuTimer::endGpu() { + if (!gBenchGL.fHasTimer) return 0; + + this->fStarted = false; + gBenchGL.fEndQuery(BenchGL_TIME_ELAPSED); + + GLint available = 0; + while (!available) { + gBenchGL.fGetQueryObjectiv(this->fQuery + , BenchGL_QUERY_RESULT_AVAILABLE + , &available); + } + BenchGLuint64 totalGPUTimeElapsed = 0; + gBenchGL.fGetQueryObjectui64v(this->fQuery + , BenchGL_QUERY_RESULT + , &totalGPUTimeElapsed); + + return totalGPUTimeElapsed / 1000000.0; +} diff --git a/bench/BenchGpuTimer_gl.h b/bench/BenchGpuTimer_gl.h new file mode 100644 index 0000000..ac23482 --- /dev/null +++ b/bench/BenchGpuTimer_gl.h @@ -0,0 +1,33 @@ +#ifndef SkBenchGpuTimer_DEFINED +#define SkBenchGpuTimer_DEFINED + +#if defined(SK_MESA) + #include <GL/osmesa.h> + +#elif defined(SK_BUILD_FOR_WIN32) + #define WIN32_LEAN_AND_MEAN 1 + #include <Windows.h> + #include <GL/GL.h> + +#elif defined(SK_BUILD_FOR_MAC) + #include <OpenGL/gl.h> + +#elif defined(SK_BUILD_FOR_UNIX) + #include <GL/gl.h> + +#else + #error unsupported platform +#endif + +class BenchGpuTimer { +public: + BenchGpuTimer(); + ~BenchGpuTimer(); + void startGpu(); + double endGpu(); +private: + GLuint fQuery; + int fStarted; +}; + +#endif diff --git a/bench/BenchGpuTimer_none.cpp b/bench/BenchGpuTimer_none.cpp new file mode 100644 index 0000000..0dba6d7 --- /dev/null +++ b/bench/BenchGpuTimer_none.cpp @@ -0,0 +1,14 @@ +#include "BenchGpuTimer_none.h" + +BenchGpuTimer::BenchGpuTimer() { +} + +BenchGpuTimer::~BenchGpuTimer() { +} + +void BenchGpuTimer::startGpu() { +} + +double BenchGpuTimer::endGpu() { + return -1.0; +} diff --git a/bench/BenchGpuTimer_none.h b/bench/BenchGpuTimer_none.h new file mode 100644 index 0000000..7069ca4 --- /dev/null +++ b/bench/BenchGpuTimer_none.h @@ -0,0 +1,12 @@ +#ifndef SkBenchGpuTimer_DEFINED +#define SkBenchGpuTimer_DEFINED + +class BenchGpuTimer { +public: + BenchGpuTimer(); + ~BenchGpuTimer(); + void startGpu(); + double endGpu(); +}; + +#endif diff --git a/bench/BenchSysTimer_c.cpp b/bench/BenchSysTimer_c.cpp new file mode 100644 index 0000000..fc0850b --- /dev/null +++ b/bench/BenchSysTimer_c.cpp @@ -0,0 +1,20 @@ +#include "BenchSysTimer_c.h" + +//Time +#include <time.h> + +void BenchSysTimer::startWall() { + this->fStartWall = time(); +} +void BenchSysTimer::startCpu() { + this->fStartCpu = clock(); +} + +double BenchSysTimer::endCpu() { + clock_t end_cpu = clock(); + this->fCpu = (end_cpu - this->fStartCpu) * CLOCKS_PER_SEC / 1000.0; +} +double BenchSysTimer::endWall() { + time_t end_wall = time(); + this->fWall = difftime(end_wall, this->fstartWall) / 1000.0; +} diff --git a/bench/BenchSysTimer_c.h b/bench/BenchSysTimer_c.h new file mode 100644 index 0000000..c598f30 --- /dev/null +++ b/bench/BenchSysTimer_c.h @@ -0,0 +1,19 @@ +#ifndef SkBenchSysTimer_DEFINED +#define SkBenchSysTimer_DEFINED + +//Time +#include <time.h> +#warning standard clocks + +class BenchSysTimer { +public: + void startWall(); + void startCpu(); + double endCpu(); + double endWall(); +private: + clock_t start_cpu; + time_t fStartWall; +}; + +#endif diff --git a/bench/BenchSysTimer_mach.cpp b/bench/BenchSysTimer_mach.cpp new file mode 100644 index 0000000..b23897c --- /dev/null +++ b/bench/BenchSysTimer_mach.cpp @@ -0,0 +1,69 @@ +#include "BenchSysTimer_mach.h" + +//Time +#include <mach/mach.h> +#include <mach/mach_time.h> + +static time_value_t macCpuTime() { + mach_port_t task = mach_task_self(); + if (task == MACH_PORT_NULL) { + time_value_t none = {0, 0}; + return none; + } + + task_thread_times_info thread_info_data; + mach_msg_type_number_t thread_info_count = TASK_THREAD_TIMES_INFO_COUNT; + if (KERN_SUCCESS != task_info(task, + TASK_THREAD_TIMES_INFO, + reinterpret_cast<task_info_t>(&thread_info_data), + &thread_info_count)) + { + time_value_t none = {0, 0}; + return none; + } + + time_value_add(&thread_info_data.user_time, &thread_info_data.system_time) + return thread_info_data.user_time; +} + +static double intervalInMSec(const time_value_t start_clock + , const time_value_t end_clock) +{ + double duration_clock; + if ((end_clock.microseconds - start_clock.microseconds) < 0) { + duration_clock = (end_clock.seconds - start_clock.seconds-1)*1000; + duration_clock += (1000000 + + end_clock.microseconds + - start_clock.microseconds) / 1000.0; + } else { + duration_clock = (end_clock.seconds - start_clock.seconds)*1000; + duration_clock += (end_clock.microseconds - start_clock.microseconds) + / 1000.0; + } + return duration_clock; +} + +void BenchSysTimer::startWall() { + this->fStartWall = mach_absolute_time(); +} +void BenchSysTimer::startCpu() { + this->fStartCpu = macCpuTime(); +} + +double BenchSysTimer::endCpu() { + time_value_t end_cpu = macCpuTime(); + return intervalInMSec(this->fStartCpu, end_cpu); +} +double BenchSysTimer::endWall() { + uint64_t end_wall = mach_absolute_time(); + + uint64_t elapsed = end_wall - this->fStartWall; + mach_timebase_info_data_t sTimebaseInfo; + if (KERN_SUCCESS != mach_timebase_info(&sTimebaseInfo)) { + return 0; + } else { + uint64_t elapsedNano = elapsed * sTimebaseInfo.numer + / sTimebaseInfo.denom; + return elapsedNano / 1000000; + } +} diff --git a/bench/BenchSysTimer_mach.h b/bench/BenchSysTimer_mach.h new file mode 100644 index 0000000..da4fff0 --- /dev/null +++ b/bench/BenchSysTimer_mach.h @@ -0,0 +1,19 @@ +#ifndef SkBenchSysTimer_DEFINED +#define SkBenchSysTimer_DEFINED + +//Time +#include <mach/mach.h> +#include <mach/mach_time.h> + +class BenchSysTimer { +public: + void startWall(); + void startCpu(); + double endCpu(); + double endWall(); +private: + time_value_t fStartCpu; + uint64_t fStartWall; +}; + +#endif diff --git a/bench/BenchSysTimer_posix.cpp b/bench/BenchSysTimer_posix.cpp new file mode 100644 index 0000000..5d28f40 --- /dev/null +++ b/bench/BenchSysTimer_posix.cpp @@ -0,0 +1,50 @@ +#include "BenchSysTimer_posix.h" + +//Time +#include <time.h> + +static double intervalInMSec(const timespec start_clock + , const timespec end_clock) +{ + double duration_clock; + if ((end_clock.tv_nsec - start_clock.tv_nsec) < 0) { + duration_clock = (end_clock.tv_sec - start_clock.tv_sec-1)*1000; + duration_clock += (1000000000 + end_clock.tv_nsec - start_clock.tv_nsec) + / 1000000.0; + } else { + duration_clock = (end_clock.tv_sec - start_clock.tv_sec)*1000; + duration_clock += (end_clock.tv_nsec - start_clock.tv_nsec) / 1000000.0; + } + return duration_clock; +} + +void BenchSysTimer::startWall() { + if (-1 == clock_gettime(CLOCK_MONOTONIC, &this->fWall)) { + timespec none = {0, 0}; + this->fWall = none; + } +} +void BenchSysTimer::startCpu() { + if (-1 == clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &this->fCpu)) { + timespec none = {0, 0}; + this->fCpu = none; + } +} + +double BenchSysTimer::endCpu() { + timespec end_cpu; + if (-1 == clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_cpu)) { + timespec none = {0, 0}; + end_cpu = none; + } + return intervalInMSec(this->fCpu, end_cpu); +} + +double BenchSysTimer::endWall() { + timespec end_wall; + if (-1 == clock_gettime(CLOCK_MONOTONIC, &end_wall)) { + timespec none = {0, 0}; + end_wall = none; + } + return intervalInMSec(this->fWall, end_wall); +} diff --git a/bench/BenchSysTimer_posix.h b/bench/BenchSysTimer_posix.h new file mode 100644 index 0000000..09dfb0e --- /dev/null +++ b/bench/BenchSysTimer_posix.h @@ -0,0 +1,19 @@ +#ifndef SkBenchSysTimer_DEFINED +#define SkBenchSysTimer_DEFINED + +//Time +#include <time.h> + +class BenchSysTimer { +public: + void startWall(); + void startCpu(); + double endCpu(); + double endWall(); +private: + timespec fCpu; + timespec fWall; +}; + +#endif + diff --git a/bench/BenchSysTimer_windows.cpp b/bench/BenchSysTimer_windows.cpp new file mode 100644 index 0000000..923754c --- /dev/null +++ b/bench/BenchSysTimer_windows.cpp @@ -0,0 +1,55 @@ +#include "BenchSysTimer_windows.h" + +//Time +#define WIN32_LEAN_AND_MEAN 1 +#include <Windows.h> + +static ULONGLONG winCpuTime() { + FILETIME createTime; + FILETIME exitTime; + FILETIME usrTime; + FILETIME sysTime; + if (0 == GetProcessTimes(GetCurrentProcess() + , &createTime, &exitTime + , &sysTime, &usrTime)) + { + return 0; + } + ULARGE_INTEGER start_cpu_sys; + ULARGE_INTEGER start_cpu_usr; + start_cpu_sys.LowPart = sysTime.dwLowDateTime; + start_cpu_sys.HighPart = sysTime.dwHighDateTime; + start_cpu_usr.LowPart = usrTime.dwLowDateTime; + start_cpu_usr.HighPart = usrTime.dwHighDateTime; + return start_cpu_sys.QuadPart + start_cpu_usr.QuadPart; +} + +void BenchSysTimer::startWall() { + if (0 == ::QueryPerformanceCounter(&this->fStartWall)) { + this->fStartWall.QuadPart = 0; + } +} +void BenchSysTimer::startCpu() { + this->fStartCpu = winCpuTime(); +} + +double BenchSysTimer::endCpu() { + ULONGLONG end_cpu = winCpuTime(); + return (end_cpu - this->fStartCpu) / 10000; +} +double BenchSysTimer::endWall() { + LARGE_INTEGER end_wall; + if (0 == ::QueryPerformanceCounter(&end_wall)) { + end_wall.QuadPart = 0; + } + + LARGE_INTEGER ticks_elapsed; + ticks_elapsed.QuadPart = end_wall.QuadPart - this->fStartWall.QuadPart; + + LARGE_INTEGER frequency; + if (0 == ::QueryPerformanceFrequency(&frequency)) { + return 0; + } else { + return (double)ticks_elapsed.QuadPart / frequency.QuadPart * 1000; + } +} diff --git a/bench/BenchSysTimer_windows.h b/bench/BenchSysTimer_windows.h new file mode 100644 index 0000000..72a3fb2 --- /dev/null +++ b/bench/BenchSysTimer_windows.h @@ -0,0 +1,19 @@ +#ifndef SkBenchSysTimer_DEFINED +#define SkBenchSysTimer_DEFINED + +//Time +#define WIN32_LEAN_AND_MEAN 1 +#include <Windows.h> + +struct BenchSysTimer { +public: + void startWall(); + void startCpu(); + double endCpu(); + double endWall(); +private: + ULONGLONG fStartCpu; + LARGE_INTEGER fStartWall; +}; + +#endif diff --git a/bench/BenchTimer.cpp b/bench/BenchTimer.cpp new file mode 100644 index 0000000..e7b0068 --- /dev/null +++ b/bench/BenchTimer.cpp @@ -0,0 +1,48 @@ +#include "BenchTimer.h" +#if defined(SK_BUILD_FOR_WIN32) + #include "BenchSysTimer_windows.h" +#elif defined(SK_BUILD_FOR_MAC) + #include "BenchSysTimer_mach.h" +#elif defined(SK_BUILD_FOR_UNIX) + #include "BenchSysTimer_posix.h" +#else + #include "BenchSysTimer_c.h" +#endif + +#if defined(SK_MESA) || \ + defined(SK_BUILD_FOR_WIN32) || \ + defined(SK_BUILD_FOR_MAC) || \ + defined(SK_BUILD_FOR_UNIX) + #include "BenchGpuTimer_gl.h" + +#else + #include "BenchGpuTimer_none.h" +#endif + +BenchTimer::BenchTimer() + : fCpu(-1.0) + , fWall(-1.0) + , fGpu(-1.0) +{ + this->fSysTimer = new BenchSysTimer(); + this->fGpuTimer = new BenchGpuTimer(); +} + +BenchTimer::~BenchTimer() { + delete this->fSysTimer; + delete this->fGpuTimer; +} + +void BenchTimer::start() { + this->fSysTimer->startWall(); + this->fGpuTimer->startGpu(); + this->fSysTimer->startCpu(); +} + +void BenchTimer::end() { + this->fCpu = this->fSysTimer->endCpu(); + //It is important to stop the cpu clocks first, + //as the following will cpu wait for the gpu to finish. + this->fGpu = this->fGpuTimer->endGpu(); + this->fWall = this->fSysTimer->endWall(); +} diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h new file mode 100644 index 0000000..eae82d5 --- /dev/null +++ b/bench/BenchTimer.h @@ -0,0 +1,27 @@ +#ifndef SkBenchTimer_DEFINED +#define SkBenchTimer_DEFINED + +class BenchSysTimer; +class BenchGpuTimer; + +/** + * SysTimers and GpuTimers are implemented orthogonally. + * This class combines a SysTimer and a GpuTimer into one single, + * platform specific, Timer with a simple interface. + */ +class BenchTimer { +public: + BenchTimer(); + ~BenchTimer(); + void start(); + void end(); + double fCpu; + double fWall; + double fGpu; + +private: + BenchSysTimer *fSysTimer; + BenchGpuTimer *fGpuTimer; +}; + +#endif diff --git a/bench/MatrixBench.cpp b/bench/MatrixBench.cpp index d963bc7..dce0358 100644 --- a/bench/MatrixBench.cpp +++ b/bench/MatrixBench.cpp @@ -67,11 +67,10 @@ private: class ScaleMatrixBench : public MatrixBench { public: ScaleMatrixBench(void* param) : INHERITED(param, "scale") { - + fSX = fSY = SkFloatToScalar(1.5f); fM0.reset(); fM1.setScale(fSX, fSY); fM2.setTranslate(fSX, fSY); - fSX = fSY = SkFloatToScalar(1.5f); } protected: virtual void performTest() { @@ -215,6 +214,89 @@ private: typedef MatrixBench INHERITED; }; +#ifdef SK_SCALAR_IS_FLOAT +class ScaleTransMixedMatrixBench : public MatrixBench { + public: + ScaleTransMixedMatrixBench(void* p) : INHERITED(p, "scaletrans_mixed"), fCount (16) { + fMatrix.setAll(fRandom.nextS(), fRandom.nextS(), fRandom.nextS(), + fRandom.nextS(), fRandom.nextS(), fRandom.nextS(), + fRandom.nextS(), fRandom.nextS(), fRandom.nextS()); + int i; + for (i = 0; i < fCount; i++) { + fSrc[i].fX = fRandom.nextS(); + fSrc[i].fY = fRandom.nextS(); + fDst[i].fX = fRandom.nextS(); + fDst[i].fY = fRandom.nextS(); + } + } + protected: + virtual void performTest() { + SkPoint* dst = fDst; + const SkPoint* src = fSrc; + int count = fCount; + float mx = fMatrix[SkMatrix::kMScaleX]; + float my = fMatrix[SkMatrix::kMScaleY]; + float tx = fMatrix[SkMatrix::kMTransX]; + float ty = fMatrix[SkMatrix::kMTransY]; + do { + dst->fY = SkScalarMulAdd(src->fY, my, ty); + dst->fX = SkScalarMulAdd(src->fX, mx, tx); + src += 1; + dst += 1; + } while (--count); + } + private: + SkMatrix fMatrix; + SkPoint fSrc [16]; + SkPoint fDst [16]; + int fCount; + SkRandom fRandom; + typedef MatrixBench INHERITED; +}; + + +class ScaleTransDoubleMatrixBench : public MatrixBench { + public: + ScaleTransDoubleMatrixBench(void* p) : INHERITED(p, "scaletrans_double"), fCount (16) { + init9(fMatrix); + int i; + for (i = 0; i < fCount; i++) { + fSrc[i].fX = fRandom.nextS(); + fSrc[i].fY = fRandom.nextS(); + fDst[i].fX = fRandom.nextS(); + fDst[i].fY = fRandom.nextS(); + } + } + protected: + virtual void performTest() { + SkPoint* dst = fDst; + const SkPoint* src = fSrc; + int count = fCount; + // As doubles, on Z600 Linux systems this is 2.5x as expensive as mixed mode + float mx = fMatrix[SkMatrix::kMScaleX]; + float my = fMatrix[SkMatrix::kMScaleY]; + float tx = fMatrix[SkMatrix::kMTransX]; + float ty = fMatrix[SkMatrix::kMTransY]; + do { + dst->fY = src->fY * my + ty; + dst->fX = src->fX * mx + tx; + src += 1; + dst += 1; + } while (--count); + } + private: + double fMatrix [9]; + SkPoint fSrc [16]; + SkPoint fDst [16]; + int fCount; + SkRandom fRandom; + typedef MatrixBench INHERITED; +}; +#endif + + + + static SkBenchmark* M0(void* p) { return new EqualsMatrixBench(p); } static SkBenchmark* M1(void* p) { return new ScaleMatrixBench(p); } @@ -227,3 +309,10 @@ static BenchRegistry gReg1(M1); static BenchRegistry gReg2(M2); static BenchRegistry gReg3(M3); static BenchRegistry gReg4(M4); + +#ifdef SK_SCALAR_IS_FLOAT +static SkBenchmark* FlM0(void* p) { return new ScaleTransMixedMatrixBench(p); } +static SkBenchmark* FlM1(void* p) { return new ScaleTransDoubleMatrixBench(p); } +static BenchRegistry gFlReg5(FlM0); +static BenchRegistry gFlReg6(FlM1); +#endif diff --git a/bench/ScalarBench.cpp b/bench/ScalarBench.cpp new file mode 100644 index 0000000..29fe5c4 --- /dev/null +++ b/bench/ScalarBench.cpp @@ -0,0 +1,97 @@ +#include "SkBenchmark.h" +#include "SkFloatBits.h" +#include "SkRandom.h" +#include "SkString.h" + +class ScalarBench : public SkBenchmark { + SkString fName; + enum { N = 100000 }; +public: + ScalarBench(void* param, const char name[]) : INHERITED(param) { + fName.printf("scalar_%s", name); + } + + virtual void performTest() = 0; + +protected: + virtual int mulLoopCount() const { return 1; } + + virtual const char* onGetName() { + return fName.c_str(); + } + + virtual void onDraw(SkCanvas* canvas) { + int n = N * this->mulLoopCount(); + for (int i = 0; i < n; i++) { + this->performTest(); + } + } + +private: + typedef SkBenchmark INHERITED; +}; + +// we want to stop the compiler from eliminating code that it thinks is a no-op +// so we have a non-static global we increment, hoping that will convince the +// compiler to execute everything +int gScalarBench_NonStaticGlobal; + +#define always_do(pred) \ + do { \ + if (pred) { \ + ++gScalarBench_NonStaticGlobal; \ + } \ + } while (0) + +// having unknown values in our arrays can throw off the timing a lot, perhaps +// handling NaN values is a lot slower. Anyway, this guy is just meant to put +// reasonable values in our arrays. +template <typename T> void init9(T array[9]) { + SkRandom rand; + for (int i = 0; i < 9; i++) { + array[i] = rand.nextSScalar1(); + } +} + +class FloatComparisonBench : public ScalarBench { +public: + FloatComparisonBench(void* param) : INHERITED(param, "compare_float") { + init9(fArray); + } +protected: + virtual int mulLoopCount() const { return 4; } + virtual void performTest() { + always_do(fArray[6] != 0.0f || fArray[7] != 0.0f || fArray[8] != 1.0f); + always_do(fArray[2] != 0.0f || fArray[5] != 0.0f); + } +private: + float fArray[9]; + typedef ScalarBench INHERITED; +}; + +class ForcedIntComparisonBench : public ScalarBench { +public: + ForcedIntComparisonBench(void* param) + : INHERITED(param, "compare_forced_int") { + init9(fArray); + } +protected: + virtual int mulLoopCount() const { return 4; } + virtual void performTest() { + always_do(SkScalarAs2sCompliment(fArray[6]) | + SkScalarAs2sCompliment(fArray[7]) | + (SkScalarAs2sCompliment(fArray[8]) - kPersp1Int)); + always_do(SkScalarAs2sCompliment(fArray[2]) | + SkScalarAs2sCompliment(fArray[5])); + } +private: + static const int32_t kPersp1Int = 0x3f800000; + SkScalar fArray[9]; + typedef ScalarBench INHERITED; +}; + +static SkBenchmark* S0(void* p) { return new FloatComparisonBench(p); } +static SkBenchmark* S1(void* p) { return new ForcedIntComparisonBench(p); } + +static BenchRegistry gReg0(S0); +static BenchRegistry gReg1(S1); diff --git a/bench/benchmain.cpp b/bench/benchmain.cpp index 066573a..34f8a1a 100644 --- a/bench/benchmain.cpp +++ b/bench/benchmain.cpp @@ -5,12 +5,12 @@ #include "SkNWayCanvas.h" #include "SkPicture.h" #include "SkString.h" -#include "SkTime.h" #include "GrContext.h" #include "SkGpuDevice.h" #include "SkEGLContext.h" #include "SkBenchmark.h" +#include "BenchTimer.h" #ifdef ANDROID static void log_error(const char msg[]) { SkDebugf("%s", msg); } @@ -212,6 +212,9 @@ int main (int argc, char * const argv[]) { bool forceAA = true; bool forceFilter = false; SkTriState::State forceDither = SkTriState::kDefault; + bool timerWall = false; + bool timerCpu = true; + bool timerGpu = true; bool doScale = false; bool doRotate = false; bool doClip = false; @@ -246,6 +249,23 @@ int main (int argc, char * const argv[]) { log_error("missing arg for -repeat\n"); return -1; } + } else if (strcmp(*argv, "-timers") == 0) { + argv++; + if (argv < stop) { + timerWall = false; + timerCpu = false; + timerGpu = false; + for (char* t = *argv; *t; ++t) { + switch (*t) { + case 'w': timerWall = true; break; + case 'c': timerCpu = true; break; + case 'g': timerGpu = true; break; + } + } + } else { + log_error("missing arg for -timers\n"); + return -1; + } } else if (!strcmp(*argv, "-rotate")) { doRotate = true; } else if (!strcmp(*argv, "-scale")) { @@ -346,6 +366,8 @@ int main (int argc, char * const argv[]) { context = GrContext::CreateGLShaderContext(); } + BenchTimer timer = BenchTimer(); + Iter iter(&defineDict); SkBenchmark* bench; while ((bench = iter.next()) != NULL) { @@ -399,30 +421,36 @@ int main (int argc, char * const argv[]) { performRotate(&canvas, dim.fX, dim.fY); } + bool gpu = kGPU_Backend == backend && context; //warm up caches if needed if (repeatDraw > 1) { SkAutoCanvasRestore acr(&canvas, true); bench->draw(&canvas); - if (kGPU_Backend == backend && context) { + if (gpu) { context->flush(); glFinish(); } } - SkMSec now = SkTime::GetMSecs(); + timer.start(); for (int i = 0; i < repeatDraw; i++) { SkAutoCanvasRestore acr(&canvas, true); bench->draw(&canvas); } - if (kGPU_Backend == backend && context) { - context->flush(); - glFinish(); - } + timer.end(); if (repeatDraw > 1) { - double duration = SkTime::GetMSecs() - now; SkString str; - str.printf(" %4s: msecs = %5.2f", configName, duration / repeatDraw); + str.printf(" %4s:", configName); + if (timerWall) { + str.appendf(" msecs = %6.2f", timer.fWall / repeatDraw); + } + if (timerCpu) { + str.appendf(" cmsecs = %6.2f", timer.fCpu / repeatDraw); + } + if (timerGpu && gpu && timer.fGpu > 0) { + str.appendf(" gmsecs = %6.2f", timer.fGpu / repeatDraw); + } log_progress(str); } if (outDir.size() > 0) { |