summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--native_client_sdk/src/examples/demo/life_simd/example.dsc11
-rw-r--r--native_client_sdk/src/examples/demo/life_simd/example.js51
-rw-r--r--native_client_sdk/src/examples/demo/life_simd/index.html12
-rw-r--r--native_client_sdk/src/examples/demo/life_simd/life.c457
-rw-r--r--native_client_sdk/src/examples/demo/life_simd/life.cc524
5 files changed, 594 insertions, 461 deletions
diff --git a/native_client_sdk/src/examples/demo/life_simd/example.dsc b/native_client_sdk/src/examples/demo/life_simd/example.dsc
index a27736e..fb1f343 100644
--- a/native_client_sdk/src/examples/demo/life_simd/example.dsc
+++ b/native_client_sdk/src/examples/demo/life_simd/example.dsc
@@ -2,17 +2,20 @@
'TOOLS': ['pnacl'],
'TARGETS': [
{
- 'NAME' : 'life_simd',
+ 'NAME' : 'life',
'TYPE' : 'main',
'SOURCES' : [
- 'life.c',
+ 'life.cc',
],
'DEPS': ['ppapi_simple', 'nacl_io'],
- 'LIBS': ['ppapi_simple', 'nacl_io', 'ppapi_cpp', 'ppapi', 'pthread']
+ 'LIBS': ['ppapi_simple', 'nacl_io', 'sdk_util', 'ppapi_cpp', 'ppapi', 'pthread']
}
],
+ 'DATA': [
+ 'example.js'
+ ],
'DEST': 'examples/demo',
'NAME': 'life_simd',
- 'TITLE': "Conway's Life (SIMD version)",
+ 'TITLE': "Conway's Life",
'GROUP': 'Demo'
}
diff --git a/native_client_sdk/src/examples/demo/life_simd/example.js b/native_client_sdk/src/examples/demo/life_simd/example.js
new file mode 100644
index 0000000..81426b9
--- /dev/null
+++ b/native_client_sdk/src/examples/demo/life_simd/example.js
@@ -0,0 +1,51 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+function moduleDidLoad() {
+}
+
+
+// Add event listeners after the NaCl module has loaded. These listeners will
+// forward messages to the NaCl module via postMessage()
+function attachListeners() {
+ document.getElementById('benchmark').addEventListener('click',
+ function() {
+ common.naclModule.postMessage({'message' : 'run_benchmark'});
+ common.updateStatus('BENCHMARKING... (please wait)');
+ });
+ document.getElementById('simd').addEventListener('click',
+ function() {
+ var simd = document.getElementById('simd');
+ common.naclModule.postMessage({'message' : 'set_simd',
+ 'value' : simd.checked});
+ });
+ document.getElementById('multithread').addEventListener('click',
+ function() {
+ var multithread = document.getElementById('multithread');
+ common.naclModule.postMessage({'message' : 'set_threading',
+ 'value' : multithread.checked});
+ });
+ document.getElementById('large').addEventListener('click',
+ function() {
+ var large = document.getElementById('large');
+ var nacl = document.getElementById('nacl_module');
+ nacl.setAttribute('width', large.checked ? 1280 : 640);
+ nacl.setAttribute('height', large.checked ? 1024 : 640);
+ });
+}
+
+
+// Handle a message coming from the NaCl module.
+function handleMessage(message_event) {
+ if (message_event.data.message == 'benchmark_result') {
+ // benchmark result
+ var result = message_event.data.value;
+ console.log('Benchmark result:' + result);
+ result = (Math.round(result * 1000) / 1000).toFixed(3);
+ document.getElementById('result').textContent =
+ 'Result: ' + result + ' seconds';
+ common.updateStatus('SUCCESS');
+ }
+}
+
diff --git a/native_client_sdk/src/examples/demo/life_simd/index.html b/native_client_sdk/src/examples/demo/life_simd/index.html
index 5d354fd..b1f2e02 100644
--- a/native_client_sdk/src/examples/demo/life_simd/index.html
+++ b/native_client_sdk/src/examples/demo/life_simd/index.html
@@ -10,12 +10,24 @@ found in the LICENSE file.
<meta http-equiv="Expires" content="-1">
<title>{{title}}</title>
<script type="text/javascript" src="common.js"></script>
+ <script type="text/javascript" src="example.js"></script>
</head>
<body data-width="640" data-height="640" {{attrs}}>
<h1>{{title}}</h1>
<h2>Status: <code id="statusField">NO-STATUS</code></h2>
<!-- The NaCl plugin will be embedded inside the element with id "listener".
See common.js.-->
+ <div>
+ Conway's game of life is a cellular automaton by British mathematician John
+ Horton Conway. Use the touch screen or mouse pointer to interact with the
+ simulation.
+ <br>
+ <input type="checkbox" id="simd" checked >Use SIMD<br>
+ <input type="checkbox" id="multithread" checked >Use multiple threads<br>
+ <input type="checkbox" id="large" >Use large field<br>
+ <input type="submit" id="benchmark" value="Run Benchmark">
+ <label id="result" name="result"> </label>
+ </div>
<div id="listener"></div>
</body>
</html>
diff --git a/native_client_sdk/src/examples/demo/life_simd/life.c b/native_client_sdk/src/examples/demo/life_simd/life.c
deleted file mode 100644
index e928a4a..0000000
--- a/native_client_sdk/src/examples/demo/life_simd/life.c
+++ /dev/null
@@ -1,457 +0,0 @@
-/* Copyright 2014 The Chromium Authors. All rights reserved.
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "ppapi/c/pp_resource.h"
-#include "ppapi/c/ppb_core.h"
-#include "ppapi/c/ppb_fullscreen.h"
-#include "ppapi/c/ppb_graphics_2d.h"
-#include "ppapi/c/ppb_image_data.h"
-#include "ppapi/c/ppb_input_event.h"
-#include "ppapi/c/ppb_instance.h"
-#include "ppapi/c/ppb_view.h"
-
-#include "ppapi_simple/ps_event.h"
-#include "ppapi_simple/ps_main.h"
-
-PPB_Core* g_pCore;
-PPB_Fullscreen* g_pFullscreen;
-PPB_Graphics2D* g_pGraphics2D;
-PPB_ImageData* g_pImageData;
-PPB_Instance* g_pInstance;
-PPB_View* g_pView;
-PPB_InputEvent* g_pInputEvent;
-PPB_KeyboardInputEvent* g_pKeyboardInput;
-PPB_MouseInputEvent* g_pMouseInput;
-PPB_TouchInputEvent* g_pTouchInput;
-
-struct {
- PP_Resource ctx;
- struct PP_Size size;
- int bound;
- uint8_t* cell_in;
- uint8_t* cell_out;
- int32_t cell_stride;
-} g_Context;
-
-
-const unsigned int kInitialRandSeed = 0xC0DE533D;
-const int kCellAlignment = 0x10;
-
-#define INLINE inline __attribute__((always_inline))
-
-/* BGRA helper macro, for constructing a pixel for a BGRA buffer. */
-#define MakeBGRA(b, g, r, a) \
- (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
-
-/* 128 bit vector types */
-typedef uint8_t u8x16_t __attribute__ ((vector_size (16)));
-
-/* Helper function to broadcast x across 16 element vector. */
-INLINE u8x16_t broadcast(uint8_t x) {
- u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
- return r;
-}
-
-
-/*
- * Convert a count value into a live (green) or dead color value.
- */
-const uint32_t kNeighborColors[] = {
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
- MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
- MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
- MakeBGRA(0x00, 0x00, 0x00, 0xFF),
-};
-
-/*
- * These represent the new health value of a cell based on its neighboring
- * values. The health is binary: either alive or dead.
- */
-const uint8_t kIsAlive[] = {
- 0, 0, 0, 0, 0, 1, 1, 1, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-void UpdateContext(uint32_t width, uint32_t height) {
- int stride = (width + kCellAlignment - 1) & ~kCellAlignment;
- if (width != g_Context.size.width || height != g_Context.size.height) {
-
- size_t size = stride * height;
- size_t index;
-
- free(g_Context.cell_in);
- free(g_Context.cell_out);
-
- /* Create a new context */
- void* in_buffer = NULL;
- void* out_buffer = NULL;
- /* alloc buffers aligned on 16 bytes */
- posix_memalign(&in_buffer, kCellAlignment, size);
- posix_memalign(&out_buffer, kCellAlignment, size);
- g_Context.cell_in = (uint8_t*) in_buffer;
- g_Context.cell_out = (uint8_t*) out_buffer;
-
- memset(g_Context.cell_out, 0, size);
- for (index = 0; index < size; index++) {
- g_Context.cell_in[index] = rand() & 1;
- }
- }
-
- /* Recreate the graphics context on a view change */
- g_pCore->ReleaseResource(g_Context.ctx);
- g_Context.size.width = width;
- g_Context.size.height = height;
- g_Context.cell_stride = stride;
- g_Context.ctx =
- g_pGraphics2D->Create(PSGetInstanceId(), &g_Context.size, PP_TRUE);
- g_Context.bound =
- g_pInstance->BindGraphics(PSGetInstanceId(), g_Context.ctx);
-}
-
-void DrawCell(int32_t x, int32_t y) {
- int32_t width = g_Context.size.width;
- int32_t height = g_Context.size.height;
- int32_t stride = g_Context.cell_stride;
-
- if (!g_Context.cell_in) return;
-
- if (x > 0 && x < width - 1 && y > 0 && y < height - 1) {
- g_Context.cell_in[x - 1 + y * stride] = 1;
- g_Context.cell_in[x + 1 + y * stride] = 1;
- g_Context.cell_in[x + (y - 1) * stride] = 1;
- g_Context.cell_in[x + (y + 1) * stride] = 1;
- }
-}
-
-void ProcessTouchEvent(PSEvent* event) {
- uint32_t count = g_pTouchInput->GetTouchCount(event->as_resource,
- PP_TOUCHLIST_TYPE_TOUCHES);
- uint32_t i, j;
- for (i = 0; i < count; i++) {
- struct PP_TouchPoint touch = g_pTouchInput->GetTouchByIndex(
- event->as_resource, PP_TOUCHLIST_TYPE_TOUCHES, i);
- int radius = (int)touch.radius.x;
- int x = (int)touch.position.x;
- int y = (int)touch.position.y;
- /* num = 1/100th the area of touch point */
- int num = (int)(M_PI * radius * radius / 100.0f);
- for (j = 0; j < num; j++) {
- int dx = rand() % (radius * 2) - radius;
- int dy = rand() % (radius * 2) - radius;
- /* only plot random cells within the touch area */
- if (dx * dx + dy * dy <= radius * radius)
- DrawCell(x + dx, y + dy);
- }
- }
-}
-
-void ProcessEvent(PSEvent* event) {
- switch(event->type) {
- /* If the view updates, build a new Graphics 2D Context */
- case PSE_INSTANCE_DIDCHANGEVIEW: {
- struct PP_Rect rect;
-
- g_pView->GetRect(event->as_resource, &rect);
- UpdateContext(rect.size.width, rect.size.height);
- break;
- }
-
- case PSE_INSTANCE_HANDLEINPUT: {
- PP_InputEvent_Type type = g_pInputEvent->GetType(event->as_resource);
- PP_InputEvent_Modifier modifiers =
- g_pInputEvent->GetModifiers(event->as_resource);
-
- switch(type) {
- case PP_INPUTEVENT_TYPE_MOUSEDOWN:
- case PP_INPUTEVENT_TYPE_MOUSEMOVE: {
- struct PP_Point location =
- g_pMouseInput->GetPosition(event->as_resource);
- /* If the button is down, draw */
- if (modifiers & PP_INPUTEVENT_MODIFIER_LEFTBUTTONDOWN) {
- DrawCell(location.x, location.y);
- }
- break;
- }
-
- case PP_INPUTEVENT_TYPE_TOUCHSTART:
- case PP_INPUTEVENT_TYPE_TOUCHMOVE:
- ProcessTouchEvent(event);
- break;
-
- case PP_INPUTEVENT_TYPE_KEYDOWN: {
- PP_Bool fullscreen = g_pFullscreen->IsFullscreen(PSGetInstanceId());
- g_pFullscreen->SetFullscreen(PSGetInstanceId(),
- fullscreen ? PP_FALSE : PP_TRUE);
- break;
- }
-
- default:
- break;
- }
- /* case PSE_INSTANCE_HANDLEINPUT */
- break;
- }
-
- default:
- break;
- }
-}
-
-
-void Stir() {
- int32_t width = g_Context.size.width;
- int32_t height = g_Context.size.height;
- int32_t stride = g_Context.cell_stride;
- int32_t i;
- if (g_Context.cell_in == NULL || g_Context.cell_out == NULL)
- return;
-
- for (i = 0; i < width; ++i) {
- g_Context.cell_in[i] = rand() & 1;
- g_Context.cell_in[i + (height - 1) * stride] = rand() & 1;
- }
- for (i = 0; i < height; ++i) {
- g_Context.cell_in[i * stride] = rand() & 1;
- g_Context.cell_in[i * stride + (width - 1)] = rand() & 1;
- }
-}
-
-
-void Render() {
- struct PP_Size* psize = &g_Context.size;
- PP_ImageDataFormat format = PP_IMAGEDATAFORMAT_BGRA_PREMUL;
-
- /*
- * Create a buffer to draw into. Since we are waiting until the next flush
- * chrome has an opportunity to cache this buffer see ppb_graphics_2d.h.
- */
- PP_Resource image =
- g_pImageData->Create(PSGetInstanceId(), format, psize, PP_FALSE);
- uint8_t* pixels = g_pImageData->Map(image);
-
- struct PP_ImageDataDesc desc;
- uint8_t* cell_temp;
- uint32_t x, y;
-
- /* If we somehow have not allocated these pointers yet, skip this frame. */
- if (!g_Context.cell_in || !g_Context.cell_out) return;
-
- /* Get the pixel stride. */
- g_pImageData->Describe(image, &desc);
-
- /* Stir up the edges to prevent the simulation from reaching steady state. */
- Stir();
-
- /*
- * Do neighbor summation; apply rules, output pixel color. Note that a 1 cell
- * wide perimeter is excluded from the simulation update; only cells from
- * x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated.
- */
-
- for (y = 1; y < g_Context.size.height - 1; ++y) {
- uint8_t *src0 = (g_Context.cell_in + (y - 1) * g_Context.cell_stride);
- uint8_t *src1 = src0 + g_Context.cell_stride;
- uint8_t *src2 = src1 + g_Context.cell_stride;
- uint8_t *dst = (g_Context.cell_out + y * g_Context.cell_stride) + 1;
- uint32_t *pixel_line = (uint32_t*) (pixels + y * desc.stride);
- const u8x16_t kOne = broadcast(1);
- const u8x16_t kFour = broadcast(4);
- const u8x16_t kEight = broadcast(8);
- const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-
- /* Prime the src */
- u8x16_t src00 = *(u8x16_t*)&src0[0];
- u8x16_t src01 = *(u8x16_t*)&src0[16];
- u8x16_t src10 = *(u8x16_t*)&src1[0];
- u8x16_t src11 = *(u8x16_t*)&src1[16];
- u8x16_t src20 = *(u8x16_t*)&src2[0];
- u8x16_t src21 = *(u8x16_t*)&src2[16];
-
- /* This inner loop is SIMD - each loop iteration will process 16 cells. */
- for (x = 1; (x + 15) < (g_Context.size.width - 1); x += 16) {
-
- /*
- * Construct jittered source temps, using __builtin_shufflevector(..) to
- * extract a shifted 16 element vector from the 32 element concatenation
- * of two source vectors.
- */
- u8x16_t src0j0 = src00;
- u8x16_t src0j1 = __builtin_shufflevector(src00, src01,
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- u8x16_t src0j2 = __builtin_shufflevector(src00, src01,
- 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- u8x16_t src1j0 = src10;
- u8x16_t src1j1 = __builtin_shufflevector(src10, src11,
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- u8x16_t src1j2 = __builtin_shufflevector(src10, src11,
- 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
- u8x16_t src2j0 = src20;
- u8x16_t src2j1 = __builtin_shufflevector(src20, src21,
- 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
- u8x16_t src2j2 = __builtin_shufflevector(src20, src21,
- 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
-
- /* Sum the jittered sources to construct neighbor count. */
- u8x16_t count = src0j0 + src0j1 + src0j2 +
- src1j0 + + src1j2 +
- src2j0 + src2j1 + src2j2;
- /* Add the center cell. */
- count = count + count + src1j1;
- /* If count > 4 and < 8, center cell will be alive in the next frame. */
- u8x16_t alive1 = count > kFour;
- u8x16_t alive2 = count < kEight;
- /* Intersect the two comparisons from above. */
- u8x16_t alive = alive1 & alive2;
-
- /*
- * At this point, alive[x] will be one of two values:
- * 0x00 for a dead cell
- * 0xFF for an alive cell.
- *
- * Next, convert alive cells to green pixel color.
- * Use __builtin_shufflevector(..) to construct output pixels from
- * concantination of alive vector and kZero255 const vector.
- * Indices 0..15 select the 16 cells from alive vector.
- * Index 16 is zero constant from kZero255 constant vector.
- * Index 17 is 255 constant from kZero255 constant vector.
- * Output pixel color values are in BGRABGRABGRABGRA order.
- * Since each pixel needs 4 bytes of color information, 16 cells will
- * need to expand to 4 seperate 16 byte pixel splats.
- */
- u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255,
- 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17);
- u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255,
- 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17);
- u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255,
- 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17);
- u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255,
- 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17);
-
- /* Write 16 pixels to output pixel buffer. */
- *(u8x16_t*)(pixel_line + 0) = pixel0_3;
- *(u8x16_t*)(pixel_line + 4) = pixel4_7;
- *(u8x16_t*)(pixel_line + 8) = pixel8_11;
- *(u8x16_t*)(pixel_line + 12) = pixel12_15;
-
- /* Convert alive mask to 1 or 0 and store in destination cell array. */
- *(u8x16_t*)dst = alive & kOne;
-
- /* Increment pointers. */
- pixel_line += 16;
- dst += 16;
- src0 += 16;
- src1 += 16;
- src2 += 16;
-
- /* Shift source over by 16 cells and read the next 16 cells. */
- src00 = src01;
- src01 = *(u8x16_t*)&src0[16];
- src10 = src11;
- src11 = *(u8x16_t*)&src1[16];
- src20 = src21;
- src21 = *(u8x16_t*)&src2[16];
- }
-
- /*
- * The SIMD loop above does 16 cells at a time. The loop below is the
- * regular version which processes one cell at a time. It is used to
- * finish the remainder of the scanline not handled by the SIMD loop.
- */
- for (; x < (g_Context.size.width - 1); ++x) {
- /* Sum the jittered sources to construct neighbor count. */
- int count = src0[0] + src0[1] + src0[2] +
- src1[0] + + src1[2] +
- src2[0] + src2[1] + src2[2];
- /* Add the center cell. */
- count = count + count + src1[1];
- /* Use table lookup indexed by count to determine pixel & alive state. */
- uint32_t color = kNeighborColors[count];
- *pixel_line++ = color;
- *dst++ = kIsAlive[count];
- ++src0;
- ++src1;
- ++src2;
- }
- }
-
- cell_temp = g_Context.cell_in;
- g_Context.cell_in = g_Context.cell_out;
- g_Context.cell_out = cell_temp;
-
- /* Unmap the range, we no longer need it. */
- g_pImageData->Unmap(image);
-
- /* Replace the contexts, and block until it's on the screen. */
- g_pGraphics2D->ReplaceContents(g_Context.ctx, image);
- g_pGraphics2D->Flush(g_Context.ctx, PP_BlockUntilComplete());
-
- /* Release the image data, we no longer need it. */
- g_pCore->ReleaseResource(image);
-}
-
-/*
- * Starting point for the module. We do not use main since it would
- * collide with main in libppapi_cpp.
- */
-int example_main(int argc, char *argv[]) {
- fprintf(stdout,"Started main.\n");
- g_pCore = (PPB_Core*)PSGetInterface(PPB_CORE_INTERFACE);
- g_pFullscreen = (PPB_Fullscreen*)PSGetInterface(PPB_FULLSCREEN_INTERFACE);
- g_pGraphics2D = (PPB_Graphics2D*)PSGetInterface(PPB_GRAPHICS_2D_INTERFACE);
- g_pInstance = (PPB_Instance*)PSGetInterface(PPB_INSTANCE_INTERFACE);
- g_pImageData = (PPB_ImageData*)PSGetInterface(PPB_IMAGEDATA_INTERFACE);
- g_pView = (PPB_View*)PSGetInterface(PPB_VIEW_INTERFACE);
-
- g_pInputEvent =
- (PPB_InputEvent*) PSGetInterface(PPB_INPUT_EVENT_INTERFACE);
- g_pKeyboardInput = (PPB_KeyboardInputEvent*)
- PSGetInterface(PPB_KEYBOARD_INPUT_EVENT_INTERFACE);
- g_pMouseInput =
- (PPB_MouseInputEvent*) PSGetInterface(PPB_MOUSE_INPUT_EVENT_INTERFACE);
- g_pTouchInput =
- (PPB_TouchInputEvent*) PSGetInterface(PPB_TOUCH_INPUT_EVENT_INTERFACE);
-
- PSEventSetFilter(PSE_ALL);
- while (1) {
- /* Process all waiting events without blocking */
- PSEvent* event;
- while ((event = PSEventTryAcquire()) != NULL) {
- ProcessEvent(event);
- PSEventRelease(event);
- }
-
- /* Render a frame, blocking until complete. */
- if (g_Context.bound) {
- Render();
- }
- }
- return 0;
-}
-
-/*
- * Register the function to call once the Instance Object is initialized.
- * see: pappi_simple/ps_main.h
- */
-PPAPI_SIMPLE_REGISTER_MAIN(example_main);
diff --git a/native_client_sdk/src/examples/demo/life_simd/life.cc b/native_client_sdk/src/examples/demo/life_simd/life.cc
new file mode 100644
index 0000000..e784b99
--- /dev/null
+++ b/native_client_sdk/src/examples/demo/life_simd/life.cc
@@ -0,0 +1,524 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <assert.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <ppapi/c/ppb_input_event.h>
+#include <ppapi/cpp/fullscreen.h>
+#include <ppapi/cpp/input_event.h>
+#include <ppapi/cpp/var.h>
+#include <ppapi/cpp/var_array.h>
+#include <ppapi/cpp/var_array_buffer.h>
+#include <ppapi/cpp/var_dictionary.h>
+
+#include "ppapi_simple/ps.h"
+#include "ppapi_simple/ps_context_2d.h"
+#include "ppapi_simple/ps_event.h"
+#include "ppapi_simple/ps_instance.h"
+#include "ppapi_simple/ps_interface.h"
+#include "ppapi_simple/ps_main.h"
+#include "sdk_util/macros.h"
+#include "sdk_util/thread_pool.h"
+
+using namespace sdk_util; // For sdk_util::ThreadPool
+
+namespace {
+
+#define INLINE inline __attribute__((always_inline))
+
+// BGRA helper macro, for constructing a pixel for a BGRA buffer.
+#define MakeBGRA(b, g, r, a) \
+ (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+
+const int kFramesToBenchmark = 100;
+const int kCellAlignment = 0x10;
+
+// 128 bit vector types
+typedef uint8_t u8x16_t __attribute__ ((vector_size (16)));
+
+// Helper function to broadcast x across 16 element vector.
+INLINE u8x16_t broadcast(uint8_t x) {
+ u8x16_t r = {x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x};
+ return r;
+}
+
+// Convert a count value into a live (green) or dead color value.
+const uint32_t kNeighborColors[] = {
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
+ MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
+ MakeBGRA(0x00, 0xFF, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+ MakeBGRA(0x00, 0x00, 0x00, 0xFF),
+};
+
+// These represent the new health value of a cell based on its neighboring
+// values. The health is binary: either alive or dead.
+const uint8_t kIsAlive[] = {
+ 0, 0, 0, 0, 0, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+// Timer helper for benchmarking. Returns seconds elapsed since program start,
+// as a double.
+timeval start_tv;
+int start_tv_retv = gettimeofday(&start_tv, NULL);
+
+inline double getseconds() {
+ const double usec_to_sec = 0.000001;
+ timeval tv;
+ if ((0 == start_tv_retv) && (0 == gettimeofday(&tv, NULL)))
+ return (tv.tv_sec - start_tv.tv_sec) + tv.tv_usec * usec_to_sec;
+ return 0.0;
+}
+} // namespace
+
+
+class Life {
+ public:
+ Life();
+ virtual ~Life();
+ // Runs a tick of the simulations, update 2D output.
+ void Update();
+ // Handle event from user, or message from JS.
+ void HandleEvent(PSEvent* ps_event);
+ private:
+ void UpdateContext();
+ void DrawCell(int32_t x, int32_t y);
+ void ProcessTouchEvent(const pp::TouchInputEvent& touches);
+ void PostUpdateMessage(const char* message, double value);
+ void StartBenchmark();
+ void EndBenchmark();
+ void Stir();
+ void wSimulate(int y);
+ static void wSimulateEntry(int y, void* data);
+ void Simulate();
+
+ bool simd_;
+ bool multithread_;
+ bool benchmarking_;
+ int benchmark_frame_counter_;
+ double bench_start_time_;
+ double bench_end_time_;
+ uint8_t* cell_in_;
+ uint8_t* cell_out_;
+ int32_t cell_stride_;
+ int32_t width_;
+ int32_t height_;
+ PSContext2D_t* ps_context_;
+ ThreadPool* workers_;
+};
+
+Life::Life() :
+ simd_(true),
+ multithread_(true),
+ benchmarking_(false),
+ benchmark_frame_counter_(0),
+ bench_start_time_(0.0),
+ bench_end_time_(0.0),
+ cell_in_(NULL),
+ cell_out_(NULL),
+ cell_stride_(0),
+ width_(0),
+ height_(0) {
+ ps_context_ = PSContext2DAllocate(PP_IMAGEDATAFORMAT_BGRA_PREMUL);
+ // Query system for number of processors via sysconf()
+ int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
+ if (num_threads < 2)
+ num_threads = 2;
+ workers_ = new ThreadPool(num_threads);
+ PSEventSetFilter(PSE_ALL);
+}
+
+Life::~Life() {
+ delete workers_;
+ PSContext2DFree(ps_context_);
+}
+
+void Life::UpdateContext() {
+ cell_stride_ = (ps_context_->width + kCellAlignment - 1) &
+ ~(kCellAlignment - 1);
+ size_t size = cell_stride_ * ps_context_->height;
+
+ if (ps_context_->width != width_ || ps_context_->height != height_) {
+ free(cell_in_);
+ free(cell_out_);
+
+ // Create a new context
+ void* in_buffer = NULL;
+ void* out_buffer = NULL;
+ // alloc buffers aligned on 16 bytes
+ posix_memalign(&in_buffer, kCellAlignment, size);
+ posix_memalign(&out_buffer, kCellAlignment, size);
+ cell_in_ = (uint8_t*) in_buffer;
+ cell_out_ = (uint8_t*) out_buffer;
+
+ memset(cell_out_, 0, size);
+ for (size_t index = 0; index < size; index++) {
+ cell_in_[index] = rand() & 1;
+ }
+ width_ = ps_context_->width;
+ height_ = ps_context_->height;
+ }
+}
+
+void Life::DrawCell(int32_t x, int32_t y) {
+ if (!cell_in_) return;
+ if (x > 0 && x < ps_context_->width - 1 &&
+ y > 0 && y < ps_context_->height - 1) {
+ cell_in_[x - 1 + y * cell_stride_] = 1;
+ cell_in_[x + 1 + y * cell_stride_] = 1;
+ cell_in_[x + (y - 1) * cell_stride_] = 1;
+ cell_in_[x + (y + 1) * cell_stride_] = 1;
+ }
+}
+
+void Life::ProcessTouchEvent(const pp::TouchInputEvent& touches) {
+ uint32_t count = touches.GetTouchCount(PP_TOUCHLIST_TYPE_TOUCHES);
+ uint32_t i, j;
+ for (i = 0; i < count; i++) {
+ pp::TouchPoint touch =
+ touches.GetTouchByIndex(PP_TOUCHLIST_TYPE_TOUCHES, i);
+ int radius = (int)(touch.radii().x());
+ int x = (int)(touch.position().x());
+ int y = (int)(touch.position().y());
+ // num = 1/100th the area of touch point
+ uint32_t num = (uint32_t)(M_PI * radius * radius / 100.0f);
+ for (j = 0; j < num; j++) {
+ int dx = rand() % (radius * 2) - radius;
+ int dy = rand() % (radius * 2) - radius;
+ // only plot random cells within the touch area
+ if (dx * dx + dy * dy <= radius * radius)
+ DrawCell(x + dx, y + dy);
+ }
+ }
+}
+
+void Life::PostUpdateMessage(const char* message_name, double value) {
+ pp::VarDictionary message;
+ message.Set("message", message_name);
+ message.Set("value", value);
+ PSInterfaceMessaging()->PostMessage(PSGetInstanceId(), message.pp_var());
+}
+
+void Life::StartBenchmark() {
+ printf("Running benchmark... (SIMD: %s, multi-threading: %s, size: %dx%d)\n",
+ simd_ ? "enabled" : "disabled",
+ multithread_ ? "enabled" : "disabled",
+ ps_context_->width,
+ ps_context_->height);
+ benchmarking_ = true;
+ bench_start_time_ = getseconds();
+ benchmark_frame_counter_ = kFramesToBenchmark;
+}
+
+void Life::EndBenchmark() {
+ double total_time;
+ bench_end_time_ = getseconds();
+ benchmarking_ = false;
+ total_time = bench_end_time_ - bench_start_time_;
+ printf("Finished - benchmark took %f seconds\n", total_time);
+ // Send benchmark result to JS.
+ PostUpdateMessage("benchmark_result", total_time);
+}
+
+void Life::HandleEvent(PSEvent* ps_event) {
+ // Give the 2D context a chance to process the event.
+ if (0 != PSContext2DHandleEvent(ps_context_, ps_event)) {
+ UpdateContext();
+ return;
+ }
+
+ switch(ps_event->type) {
+
+ case PSE_INSTANCE_HANDLEINPUT: {
+ pp::InputEvent event(ps_event->as_resource);
+
+ switch(event.GetType()) {
+ case PP_INPUTEVENT_TYPE_MOUSEDOWN:
+ case PP_INPUTEVENT_TYPE_MOUSEMOVE: {
+ pp::MouseInputEvent mouse = pp::MouseInputEvent(event);
+ // If the button is down, draw
+ if (mouse.GetModifiers() & PP_INPUTEVENT_MODIFIER_LEFTBUTTONDOWN) {
+ PP_Point location = mouse.GetPosition();
+ DrawCell(location.x, location.y);
+ }
+ break;
+ }
+
+ case PP_INPUTEVENT_TYPE_TOUCHSTART:
+ case PP_INPUTEVENT_TYPE_TOUCHMOVE: {
+ pp::TouchInputEvent touches = pp::TouchInputEvent(event);
+ ProcessTouchEvent(touches);
+ break;
+ }
+
+ case PP_INPUTEVENT_TYPE_KEYDOWN: {
+ pp::Fullscreen fullscreen(PSInstance::GetInstance());
+ bool isFullscreen = fullscreen.IsFullscreen();
+ fullscreen.SetFullscreen(!isFullscreen);
+ break;
+ }
+
+ default:
+ break;
+ }
+ break; // case PSE_INSTANCE_HANDLEINPUT
+ }
+
+ case PSE_INSTANCE_HANDLEMESSAGE: {
+ // Convert Pepper Simple message to PPAPI C++ vars
+ pp::Var var(ps_event->as_var);
+ if (var.is_dictionary()) {
+ pp::VarDictionary dictionary(var);
+ std::string message = dictionary.Get("message").AsString();
+ if (message == "run_benchmark" && !benchmarking_) {
+ StartBenchmark();
+ } else if (message == "set_simd") {
+ simd_ = dictionary.Get("value").AsBool();
+ } else if (message == "set_threading") {
+ multithread_ = dictionary.Get("value").AsBool();
+ }
+ }
+ break; // case PSE_INSTANCE_HANDLEMESSAGE
+ }
+
+ default:
+ break;
+ }
+}
+
+void Life::Stir() {
+ int32_t width = ps_context_->width;
+ int32_t height = ps_context_->height;
+ int32_t stride = cell_stride_;
+ int32_t i;
+ if (cell_in_ == NULL || cell_out_ == NULL)
+ return;
+
+ for (i = 0; i < width; ++i) {
+ cell_in_[i] = rand() & 1;
+ cell_in_[i + (height - 1) * stride] = rand() & 1;
+ }
+ for (i = 0; i < height; ++i) {
+ cell_in_[i * stride] = rand() & 1;
+ cell_in_[i * stride + (width - 1)] = rand() & 1;
+ }
+}
+
+void Life::wSimulate(int y) {
+ // Don't run simulation on top and bottom borders
+ if (y < 1 || y >= ps_context_->height - 1)
+ return;
+
+ // Do neighbor summation; apply rules, output pixel color. Note that a 1 cell
+ // wide perimeter is excluded from the simulation update; only cells from
+ // x = 1 to x < width - 1 and y = 1 to y < height - 1 are updated.
+ uint8_t *src0 = (cell_in_ + (y - 1) * cell_stride_);
+ uint8_t *src1 = src0 + cell_stride_;
+ uint8_t *src2 = src1 + cell_stride_;
+ uint8_t *dst = (cell_out_ + y * cell_stride_) + 1;
+ uint32_t *pixels = static_cast<uint32_t *>(ps_context_->data);
+ uint32_t *pixel_line = // static_cast<uint32_t*>
+ (pixels + y * ps_context_->stride / sizeof(uint32_t));
+ int32_t x = 1;
+
+ if (simd_) {
+ const u8x16_t kOne = broadcast(1);
+ const u8x16_t kFour = broadcast(4);
+ const u8x16_t kEight = broadcast(8);
+ const u8x16_t kZero255 = {0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ // Prime the src
+ u8x16_t src00 = *reinterpret_cast<u8x16_t*>(&src0[0]);
+ u8x16_t src01 = *reinterpret_cast<u8x16_t*>(&src0[16]);
+ u8x16_t src10 = *reinterpret_cast<u8x16_t*>(&src1[0]);
+ u8x16_t src11 = *reinterpret_cast<u8x16_t*>(&src1[16]);
+ u8x16_t src20 = *reinterpret_cast<u8x16_t*>(&src2[0]);
+ u8x16_t src21 = *reinterpret_cast<u8x16_t*>(&src2[16]);
+
+ // This inner loop is SIMD - each loop iteration will process 16 cells.
+ for (; (x + 15) < (ps_context_->width - 1); x += 16) {
+
+ // Construct jittered source temps, using __builtin_shufflevector(..) to
+ // extract a shifted 16 element vector from the 32 element concatenation
+ // of two source vectors.
+ u8x16_t src0j0 = src00;
+ u8x16_t src0j1 = __builtin_shufflevector(src00, src01,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ u8x16_t src0j2 = __builtin_shufflevector(src00, src01,
+ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ u8x16_t src1j0 = src10;
+ u8x16_t src1j1 = __builtin_shufflevector(src10, src11,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ u8x16_t src1j2 = __builtin_shufflevector(src10, src11,
+ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+ u8x16_t src2j0 = src20;
+ u8x16_t src2j1 = __builtin_shufflevector(src20, src21,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ u8x16_t src2j2 = __builtin_shufflevector(src20, src21,
+ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+
+ // Sum the jittered sources to construct neighbor count.
+ u8x16_t count = src0j0 + src0j1 + src0j2 +
+ src1j0 + + src1j2 +
+ src2j0 + src2j1 + src2j2;
+ // Add the center cell.
+ count = count + count + src1j1;
+ // If count > 4 and < 8, center cell will be alive in the next frame.
+ u8x16_t alive1 = count > kFour;
+ u8x16_t alive2 = count < kEight;
+ // Intersect the two comparisons from above.
+ u8x16_t alive = alive1 & alive2;
+
+ // At this point, alive[x] will be one of two values:
+ // 0x00 for a dead cell
+ // 0xFF for an alive cell.
+ //
+ // Next, convert alive cells to green pixel color.
+ // Use __builtin_shufflevector(..) to construct output pixels from
+ // concantination of alive vector and kZero255 const vector.
+ // Indices 0..15 select the 16 cells from alive vector.
+ // Index 16 is zero constant from kZero255 constant vector.
+ // Index 17 is 255 constant from kZero255 constant vector.
+ // Output pixel color values are in BGRABGRABGRABGRA order.
+ // Since each pixel needs 4 bytes of color information, 16 cells will
+ // need to expand to 4 seperate 16 byte pixel splats.
+ u8x16_t pixel0_3 = __builtin_shufflevector(alive, kZero255,
+ 16, 0, 16, 17, 16, 1, 16, 17, 16, 2, 16, 17, 16, 3, 16, 17);
+ u8x16_t pixel4_7 = __builtin_shufflevector(alive, kZero255,
+ 16, 4, 16, 17, 16, 5, 16, 17, 16, 6, 16, 17, 16, 7, 16, 17);
+ u8x16_t pixel8_11 = __builtin_shufflevector(alive, kZero255,
+ 16, 8, 16, 17, 16, 9, 16, 17, 16, 10, 16, 17, 16, 11, 16, 17);
+ u8x16_t pixel12_15 = __builtin_shufflevector(alive, kZero255,
+ 16, 12, 16, 17, 16, 13, 16, 17, 16, 14, 16, 17, 16, 15, 16, 17);
+
+ // Write 16 pixels to output pixel buffer.
+ *reinterpret_cast<u8x16_t*>(pixel_line + 0) = pixel0_3;
+ *reinterpret_cast<u8x16_t*>(pixel_line + 4) = pixel4_7;
+ *reinterpret_cast<u8x16_t*>(pixel_line + 8) = pixel8_11;
+ *reinterpret_cast<u8x16_t*>(pixel_line + 12) = pixel12_15;
+
+ // Convert alive mask to 1 or 0 and store in destination cell array.
+ *reinterpret_cast<u8x16_t*>(dst) = alive & kOne;
+
+ // Increment pointers.
+ pixel_line += 16;
+ dst += 16;
+ src0 += 16;
+ src1 += 16;
+ src2 += 16;
+
+ // Shift source over by 16 cells and read the next 16 cells.
+ src00 = src01;
+ src01 = *reinterpret_cast<u8x16_t*>(&src0[16]);
+ src10 = src11;
+ src11 = *reinterpret_cast<u8x16_t*>(&src1[16]);
+ src20 = src21;
+ src21 = *reinterpret_cast<u8x16_t*>(&src2[16]);
+ }
+ }
+
+ // The SIMD loop above does 16 cells at a time. The loop below is the
+ // regular version which processes one cell at a time. It is used to
+ // finish the remainder of the scanline not handled by the SIMD loop.
+ for (; x < (ps_context_->width - 1); ++x) {
+ // Sum the jittered sources to construct neighbor count.
+ int count = src0[0] + src0[1] + src0[2] +
+ src1[0] + + src1[2] +
+ src2[0] + src2[1] + src2[2];
+ // Add the center cell.
+ count = count + count + src1[1];
+ // Use table lookup indexed by count to determine pixel & alive state.
+ uint32_t color = kNeighborColors[count];
+ *pixel_line++ = color;
+ *dst++ = kIsAlive[count];
+ ++src0;
+ ++src1;
+ ++src2;
+ }
+}
+
+// Static entry point for worker thread.
+void Life::wSimulateEntry(int slice, void* thiz) {
+ static_cast<Life*>(thiz)->wSimulate(slice);
+}
+
+void Life::Simulate() {
+ // Stir up the edges to prevent the simulation from reaching steady state.
+ Stir();
+
+ if (multithread_) {
+ // If multi-threading enabled, dispatch tasks to pool of worker threads.
+ workers_->Dispatch(ps_context_->height, wSimulateEntry, this);
+ } else {
+ // Else manually simulate each line on this thread.
+ for (int y = 0; y < ps_context_->height; y++) {
+ wSimulateEntry(y, this);
+ }
+ }
+ std::swap(cell_in_, cell_out_);
+}
+
+void Life::Update() {
+
+ PSContext2DGetBuffer(ps_context_);
+ if (NULL == ps_context_->data)
+ return;
+
+ // If we somehow have not allocated these pointers yet, skip this frame.
+ if (!cell_in_ || !cell_out_) return;
+
+ // Simulate one (or more if benchmarking) frames
+ do {
+ Simulate();
+ if (!benchmarking_)
+ break;
+ --benchmark_frame_counter_;
+ } while(benchmark_frame_counter_ > 0);
+ if (benchmarking_)
+ EndBenchmark();
+
+ PSContext2DSwapBuffer(ps_context_);
+}
+
+// Starting point for the module. We do not use main since it would
+// collide with main in libppapi_cpp.
+int example_main(int argc, char* argv[]) {
+ Life life;
+ while (true) {
+ PSEvent* ps_event;
+ // Consume all available events
+ while ((ps_event = PSEventTryAcquire()) != NULL) {
+ life.HandleEvent(ps_event);
+ PSEventRelease(ps_event);
+ }
+ // Do simulation, render and present.
+ life.Update();
+ }
+ return 0;
+}
+
+// Register the function to call once the Instance Object is initialized.
+// see: pappi_simple/ps_main.h
+PPAPI_SIMPLE_REGISTER_MAIN(example_main);