summaryrefslogtreecommitdiffstats
path: root/net/websockets
diff options
context:
space:
mode:
authorricea <ricea@chromium.org>2015-10-06 06:41:43 -0700
committerCommit bot <commit-bot@chromium.org>2015-10-06 13:42:29 +0000
commit500dbe3cfc18c490cc466abef162896302284814 (patch)
treef6650cc5b302dc93f9dd6a2b96eae63167dd5aac /net/websockets
parent10eb8250bc72d8c7211244a0930f126bb5f635bd (diff)
downloadchromium_src-500dbe3cfc18c490cc466abef162896302284814.zip
chromium_src-500dbe3cfc18c490cc466abef162896302284814.tar.gz
chromium_src-500dbe3cfc18c490cc466abef162896302284814.tar.bz2
Use vector operations for WebSocket masking.
GCC and Clang can transparently generate vector operations from normal C++ code, just by changing the type. This permits using SIMD instructions on x86(-64) and in future ARM without additional code complexity. This increases the speed of masking by 50% from 18GBps to 27GBps on x86-64. ARM support is waiting for Neon SIMD support to be enabled by default. See crbug.com/448055 SSE2 has been the default on x86 platforms for over a year. TEST=net_unittests BUG=539259 Review URL: https://codereview.chromium.org/1382143002 Cr-Commit-Position: refs/heads/master@{#352592}
Diffstat (limited to 'net/websockets')
-rw-r--r--net/websockets/websocket_frame.cc30
-rw-r--r--net/websockets/websocket_frame_perftest.cc8
2 files changed, 31 insertions, 7 deletions
diff --git a/net/websockets/websocket_frame.cc b/net/websockets/websocket_frame.cc
index 3782f28..a702207 100644
--- a/net/websockets/websocket_frame.cc
+++ b/net/websockets/websocket_frame.cc
@@ -4,6 +4,9 @@
#include "net/websockets/websocket_frame.h"
+#include <stddef.h>
+#include <stdint.h>
+
#include <algorithm>
#include "base/basictypes.h"
@@ -17,6 +20,21 @@ namespace net {
namespace {
+// GCC (and Clang) can transparently use vector ops. Only try to do this on
+// architectures where we know it works, otherwise gcc will attempt to emulate
+// the vector ops, which is unlikely to be efficient.
+// TODO(ricea): Add ARCH_CPU_ARM_FAMILY when arm_neon=1 becomes the default.
+#if defined(COMPILER_GCC) && defined(ARCH_CPU_X86_FAMILY) && !defined(OS_NACL)
+
+using PackedMaskType = uint32_t __attribute__((vector_size(16)));
+
+#else
+
+using PackedMaskType = size_t;
+
+#endif // defined(COMPILER_GCC) && defined(ARCH_CPU_X86_FAMILY) &&
+ // !defined(OS_NACL)
+
const uint8 kFinalBit = 0x80;
const uint8 kReserved1Bit = 0x40;
const uint8 kReserved2Bit = 0x20;
@@ -171,16 +189,14 @@ void MaskWebSocketFramePayload(const WebSocketMaskingKey& masking_key,
DCHECK_GE(data_size, 0);
- // Most of the masking is done one word at a time, except for the beginning
- // and the end of the buffer which may be unaligned. We use size_t to get the
- // word size for this architecture. We require it be a multiple of
- // kMaskingKeyLength in size.
- typedef size_t PackedMaskType;
- PackedMaskType packed_mask_key = 0;
+ // Most of the masking is done in chunks of sizeof(PackedMaskType), except for
+ // the beginning and the end of the buffer which may be unaligned.
+ // PackedMaskType must be a multiple of kMaskingKeyLength in size.
+ PackedMaskType packed_mask_key;
static const size_t kPackedMaskKeySize = sizeof(packed_mask_key);
static_assert((kPackedMaskKeySize >= kMaskingKeyLength &&
kPackedMaskKeySize % kMaskingKeyLength == 0),
- "word size is not a multiple of mask length");
+ "PackedMaskType size is not a multiple of mask length");
char* const end = data + data_size;
// If the buffer is too small for the vectorised version to be useful, revert
// to the byte-at-a-time implementation early.
diff --git a/net/websockets/websocket_frame_perftest.cc b/net/websockets/websocket_frame_perftest.cc
index 98ea624..bd1de1c 100644
--- a/net/websockets/websocket_frame_perftest.cc
+++ b/net/websockets/websocket_frame_perftest.cc
@@ -53,6 +53,14 @@ TEST_F(WebSocketFrameTestMaskBenchmark, BenchmarkMaskLongPayload) {
Benchmark("Frame_mask_long_payload", &payload.front(), payload.size());
}
+// A 31-byte payload is guaranteed to do 7 byte mask operations and 3 vector
+// mask operations with an 8-byte vector. With a 16-byte vector it will fall
+// back to the byte-only code path and do 31 byte mask operations.
+TEST_F(WebSocketFrameTestMaskBenchmark, Benchmark31BytePayload) {
+ std::vector<char> payload(31, 'a');
+ Benchmark("Frame_mask_31_payload", &payload.front(), payload.size());
+}
+
} // namespace
} // namespace net