summaryrefslogtreecommitdiffstats
path: root/third_party/boringssl
diff options
context:
space:
mode:
authordavidben <davidben@chromium.org>2016-03-04 16:32:41 -0800
committerCommit bot <commit-bot@chromium.org>2016-03-05 00:34:33 +0000
commit890a0f656b932681ffa69d6f702b4dda676cf7bf (patch)
tree59117792b4a24ca3e5eb90d70286310908db5496 /third_party/boringssl
parent2ddf3b764c008dfe984af88c247a46815243ce16 (diff)
downloadchromium_src-890a0f656b932681ffa69d6f702b4dda676cf7bf.zip
chromium_src-890a0f656b932681ffa69d6f702b4dda676cf7bf.tar.gz
chromium_src-890a0f656b932681ffa69d6f702b4dda676cf7bf.tar.bz2
Roll src/third_party/boringssl/src 708db1646..58218b63b
https://boringssl.googlesource.com/boringssl/+log/708db16463a21b922aed6d393ef74c3c5a366668..58218b63bc033782162168e1462c9c8890606885 This includes a revision of the gn build for BoringSSL so that we build a separate boringssl_fuzzer target in BORINGSSL_UNSAFE_FUZZER_MODE for linking into the fuzzers. BUG=none Review URL: https://codereview.chromium.org/1767833002 Cr-Commit-Position: refs/heads/master@{#379412}
Diffstat (limited to 'third_party/boringssl')
-rw-r--r--third_party/boringssl/BUILD.gn177
-rw-r--r--third_party/boringssl/boringssl.gypi1
-rw-r--r--third_party/boringssl/boringssl_tests.gypi15
-rw-r--r--third_party/boringssl/boringssl_unittest.cc4
-rw-r--r--third_party/boringssl/linux-aarch64/crypto/poly1305/poly1305-armv8.S26
-rw-r--r--third_party/boringssl/linux-arm/crypto/poly1305/poly1305-armv4.S18
-rw-r--r--third_party/boringssl/linux-x86/crypto/poly1305/poly1305-x86.S22
-rw-r--r--third_party/boringssl/linux-x86_64/crypto/poly1305/poly1305-x86_64.S88
-rw-r--r--third_party/boringssl/mac-x86/crypto/poly1305/poly1305-x86.S22
-rw-r--r--third_party/boringssl/mac-x86_64/crypto/poly1305/poly1305-x86_64.S88
-rw-r--r--third_party/boringssl/win-x86/crypto/poly1305/poly1305-x86.asm22
-rw-r--r--third_party/boringssl/win-x86_64/crypto/poly1305/poly1305-x86_64.asm88
12 files changed, 311 insertions, 260 deletions
diff --git a/third_party/boringssl/BUILD.gn b/third_party/boringssl/BUILD.gn
index 2ac6ec3..91c6ded 100644
--- a/third_party/boringssl/BUILD.gn
+++ b/third_party/boringssl/BUILD.gn
@@ -7,16 +7,36 @@ import("//build/config/sanitizers/sanitizers.gni")
import("//testing/libfuzzer/fuzzer_test.gni")
# Config for us and everybody else depending on BoringSSL.
-config("openssl_config") {
+config("external_config") {
include_dirs = [ "src/include" ]
if (is_component_build) {
defines = [ "BORINGSSL_SHARED_LIBRARY" ]
}
}
-# Config internal to this build file.
-config("openssl_internal_config") {
+# Config internal to this build file, shared by boringssl and boringssl_fuzzer.
+config("internal_config") {
visibility = [ ":*" ] # Only targets in this file can depend on this.
+ defines = [
+ "BORINGSSL_IMPLEMENTATION",
+ "BORINGSSL_NO_STATIC_INITIALIZER",
+ "OPENSSL_SMALL",
+ ]
+ configs = [
+ # TODO(davidben): Fix size_t truncations in BoringSSL.
+ # https://crbug.com/429039
+ "//build/config/compiler:no_size_t_to_int_warning",
+ ]
+}
+
+config("no_asm_config") {
+ visibility = [ ":*" ] # Only targets in this file can depend on this.
+ defines = [ "OPENSSL_NO_ASM" ]
+}
+
+config("fuzzer_config") {
+ visibility = [ ":*" ] # Only targets in this file can depend on this.
+ defines = [ "BORINGSSL_UNSAFE_FUZZER_MODE" ]
}
# The list of BoringSSL files is kept in boringssl.gypi.
@@ -25,6 +45,8 @@ gypi_values =
[ rebase_path("//third_party/boringssl/boringssl.gypi") ],
"scope",
[ "//third_party/boringssl/boringssl.gypi" ])
+boringssl_sources =
+ gypi_values.boringssl_crypto_sources + gypi_values.boringssl_ssl_sources
# Windows' assembly is built with Yasm. The other platforms use the platform
# assembler.
@@ -37,83 +59,92 @@ if (is_win && !is_msan) {
sources = gypi_values.boringssl_win_x86_sources
}
}
+} else {
+ source_set("boringssl_asm") {
+ visibility = [ ":*" ] # Only targets in this file can depend on this.
+
+ sources = []
+ asmflags = []
+ include_dirs = [ "src/include" ]
+
+ if (current_cpu == "arm" && is_clang) {
+ # TODO(hans) Enable integrated-as (crbug.com/124610).
+ asmflags += [ "-fno-integrated-as" ]
+ if (is_android) {
+ rebased_android_toolchain_root =
+ rebase_path(android_toolchain_root, root_build_dir)
+
+ # Else /usr/bin/as gets picked up.
+ asmflags += [ "-B${rebased_android_toolchain_root}/bin" ]
+ }
+ }
+
+ if (is_msan) {
+ public_configs = [ ":no_asm_config" ]
+ } else if (current_cpu == "x64") {
+ if (is_mac || is_ios) {
+ sources += gypi_values.boringssl_mac_x86_64_sources
+ } else if (is_linux || is_android) {
+ sources += gypi_values.boringssl_linux_x86_64_sources
+ } else {
+ public_configs = [ ":no_asm_config" ]
+ }
+ } else if (current_cpu == "x86") {
+ if (is_mac || is_ios) {
+ sources += gypi_values.boringssl_mac_x86_sources
+ } else if (is_linux || is_android) {
+ sources += gypi_values.boringssl_linux_x86_sources
+ } else {
+ public_configs = [ ":no_asm_config" ]
+ }
+ } else if (current_cpu == "arm" && (is_linux || is_android)) {
+ sources += gypi_values.boringssl_linux_arm_sources
+ } else if (current_cpu == "arm64" && (is_linux || is_android)) {
+ sources += gypi_values.boringssl_linux_aarch64_sources
+
+ # TODO(davidben): Remove explicit arch flag once https://crbug.com/576858
+ # is fixed.
+ asmflags += [ "-march=armv8-a+crypto" ]
+ } else {
+ public_configs = [ ":no_asm_config" ]
+ }
+ }
}
component("boringssl") {
- sources = gypi_values.boringssl_crypto_sources
- sources += gypi_values.boringssl_ssl_sources
-
- public_configs = [ ":openssl_config" ]
-
- asmflags = []
- cflags = []
- defines = [
- "BORINGSSL_IMPLEMENTATION",
- "BORINGSSL_NO_STATIC_INITIALIZER",
- "OPENSSL_SMALL",
+ sources = boringssl_sources
+ deps = [
+ ":boringssl_asm",
]
- deps = []
- if (is_component_build) {
- defines += [ "BORINGSSL_SHARED_LIBRARY" ]
- }
+
+ public_configs = [ ":external_config" ]
+ configs += [ ":internal_config" ]
configs -= [ "//build/config/compiler:chromium_code" ]
- configs += [
- "//build/config/compiler:no_chromium_code",
+ configs += [ "//build/config/compiler:no_chromium_code" ]
- # TODO(davidben): Fix size_t truncations in BoringSSL.
- # https://crbug.com/429039
- "//build/config/compiler:no_size_t_to_int_warning",
- ]
+ if (is_nacl) {
+ deps += [ "//native_client_sdk/src/libraries/nacl_io" ]
+ }
+}
- # Also gets the include dirs from :openssl_config
- include_dirs = [ "src/include" ]
+# The same as boringssl, but builds with BORINGSSL_UNSAFE_FUZZER_MODE.
+component("boringssl_fuzzer") {
+ visibility = [ ":*" ] # Only targets in this file can depend on this.
- if (current_cpu == "arm" && is_clang) {
- # TODO(hans) Enable integrated-as (crbug.com/124610).
- asmflags += [ "-fno-integrated-as" ]
- if (is_android) {
- rebased_android_toolchain_root =
- rebase_path(android_toolchain_root, root_build_dir)
+ sources = boringssl_sources
+ deps = [
+ ":boringssl_asm",
+ ]
- # Else /usr/bin/as gets picked up.
- asmflags += [ "-B${rebased_android_toolchain_root}/bin" ]
- }
- }
+ public_configs = [
+ ":external_config",
+ ":fuzzer_config",
+ ]
+ configs += [ ":internal_config" ]
- if (is_msan) {
- defines += [ "OPENSSL_NO_ASM" ]
- } else if (current_cpu == "x64") {
- if (is_mac || is_ios) {
- sources += gypi_values.boringssl_mac_x86_64_sources
- } else if (is_linux || is_android) {
- sources += gypi_values.boringssl_linux_x86_64_sources
- } else if (is_win) {
- deps += [ ":boringssl_asm" ]
- } else {
- defines += [ "OPENSSL_NO_ASM" ]
- }
- } else if (current_cpu == "x86") {
- if (is_mac || is_ios) {
- sources += gypi_values.boringssl_mac_x86_sources
- } else if (is_linux || is_android) {
- sources += gypi_values.boringssl_linux_x86_sources
- } else if (is_win) {
- deps += [ ":boringssl_asm" ]
- } else {
- defines += [ "OPENSSL_NO_ASM" ]
- }
- } else if (current_cpu == "arm" && (is_linux || is_android)) {
- sources += gypi_values.boringssl_linux_arm_sources
- } else if (current_cpu == "arm64" && (is_linux || is_android)) {
- sources += gypi_values.boringssl_linux_aarch64_sources
-
- # TODO(davidben): Remove explicit arch flag once https://crbug.com/576858
- # is fixed.
- asmflags += [ "-march=armv8-a+crypto" ]
- } else {
- defines += [ "OPENSSL_NO_ASM" ]
- }
+ configs -= [ "//build/config/compiler:chromium_code" ]
+ configs += [ "//build/config/compiler:no_chromium_code" ]
if (is_nacl) {
deps += [ "//native_client_sdk/src/libraries/nacl_io" ]
@@ -125,7 +156,7 @@ fuzzer_test("boringssl_d2i_x509_fuzzer") {
"src/fuzz/cert.cc",
]
deps = [
- ":boringssl",
+ ":boringssl_fuzzer",
]
}
@@ -134,7 +165,7 @@ fuzzer_test("boringssl_client_bio_write_fuzzer") {
"src/fuzz/client.cc",
]
deps = [
- ":boringssl",
+ ":boringssl_fuzzer",
]
}
@@ -143,7 +174,7 @@ fuzzer_test("boringssl_d2i_autoprivatekey_fuzzer") {
"src/fuzz/privkey.cc",
]
deps = [
- ":boringssl",
+ ":boringssl_fuzzer",
]
}
@@ -152,6 +183,6 @@ fuzzer_test("boringssl_server_bio_write_fuzzer") {
"src/fuzz/server.cc",
]
deps = [
- ":boringssl",
+ ":boringssl_fuzzer",
]
}
diff --git a/third_party/boringssl/boringssl.gypi b/third_party/boringssl/boringssl.gypi
index 2643855..6742d58 100644
--- a/third_party/boringssl/boringssl.gypi
+++ b/third_party/boringssl/boringssl.gypi
@@ -202,6 +202,7 @@
'src/crypto/pkcs8/p8_pkey.c',
'src/crypto/pkcs8/pkcs8.c',
'src/crypto/poly1305/poly1305.c',
+ 'src/crypto/rand/deterministic.c',
'src/crypto/rand/rand.c',
'src/crypto/rand/urandom.c',
'src/crypto/rand/windows.c',
diff --git a/third_party/boringssl/boringssl_tests.gypi b/third_party/boringssl/boringssl_tests.gypi
index 980a8d2..863ad81 100644
--- a/third_party/boringssl/boringssl_tests.gypi
+++ b/third_party/boringssl/boringssl_tests.gypi
@@ -91,6 +91,20 @@
'msvs_disabled_warnings': [ 4267, ],
},
{
+ 'target_name': 'boringssl_chacha_test',
+ 'type': 'executable',
+ 'dependencies': [
+ 'boringssl.gyp:boringssl',
+ ],
+ 'sources': [
+ 'src/crypto/chacha/chacha_test.cc',
+ '<@(boringssl_test_support_sources)',
+ ],
+ # TODO(davidben): Fix size_t truncations in BoringSSL.
+ # https://crbug.com/429039
+ 'msvs_disabled_warnings': [ 4267, ],
+ },
+ {
'target_name': 'boringssl_aead_test',
'type': 'executable',
'dependencies': [
@@ -567,6 +581,7 @@
'boringssl_bio_test',
'boringssl_bn_test',
'boringssl_bytestring_test',
+ 'boringssl_chacha_test',
'boringssl_cipher_test',
'boringssl_cmac_test',
'boringssl_constant_time_test',
diff --git a/third_party/boringssl/boringssl_unittest.cc b/third_party/boringssl/boringssl_unittest.cc
index 65839e1..04ce0f5 100644
--- a/third_party/boringssl/boringssl_unittest.cc
+++ b/third_party/boringssl/boringssl_unittest.cc
@@ -161,6 +161,10 @@ TEST(BoringSSL, ByteString) {
TestSimple("bytestring_test");
}
+TEST(BoringSSL, ChaCha) {
+ TestSimple("chacha_test");
+}
+
TEST(BoringSSL, Cipher) {
base::FilePath data_file;
ASSERT_TRUE(CryptoCipherTestPath(&data_file));
diff --git a/third_party/boringssl/linux-aarch64/crypto/poly1305/poly1305-armv8.S b/third_party/boringssl/linux-aarch64/crypto/poly1305/poly1305-armv8.S
index 83aaac8..7b681aa 100644
--- a/third_party/boringssl/linux-aarch64/crypto/poly1305/poly1305-armv8.S
+++ b/third_party/boringssl/linux-aarch64/crypto/poly1305/poly1305-armv8.S
@@ -739,6 +739,19 @@ poly1305_blocks_neon:
.Lshort_tail:
////////////////////////////////////////////////////////////////
+ // horizontal add
+
+ addp v22.2d,v22.2d,v22.2d
+ ldp d8,d9,[sp,#16] // meet ABI requirements
+ addp v19.2d,v19.2d,v19.2d
+ ldp d10,d11,[sp,#32]
+ addp v23.2d,v23.2d,v23.2d
+ ldp d12,d13,[sp,#48]
+ addp v20.2d,v20.2d,v20.2d
+ ldp d14,d15,[sp,#64]
+ addp v21.2d,v21.2d,v21.2d
+
+ ////////////////////////////////////////////////////////////////
// lazy reduction, but without narrowing
ushr v29.2d,v22.2d,#26
@@ -770,19 +783,6 @@ poly1305_blocks_neon:
add v23.2d,v23.2d,v30.2d // h3 -> h4
////////////////////////////////////////////////////////////////
- // horizontal add
-
- addp v21.2d,v21.2d,v21.2d
- ldp d8,d9,[sp,#16] // meet ABI requirements
- addp v19.2d,v19.2d,v19.2d
- ldp d10,d11,[sp,#32]
- addp v20.2d,v20.2d,v20.2d
- ldp d12,d13,[sp,#48]
- addp v22.2d,v22.2d,v22.2d
- ldp d14,d15,[sp,#64]
- addp v23.2d,v23.2d,v23.2d
-
- ////////////////////////////////////////////////////////////////
// write the result, can be partially reduced
st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
diff --git a/third_party/boringssl/linux-arm/crypto/poly1305/poly1305-armv4.S b/third_party/boringssl/linux-arm/crypto/poly1305/poly1305-armv4.S
index 52b0a0c8c..acd96f2 100644
--- a/third_party/boringssl/linux-arm/crypto/poly1305/poly1305-armv4.S
+++ b/third_party/boringssl/linux-arm/crypto/poly1305/poly1305-armv4.S
@@ -997,6 +997,15 @@ poly1305_blocks_neon:
.Lshort_tail:
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ horizontal addition
+
+ vadd.i64 d16,d16,d17
+ vadd.i64 d10,d10,d11
+ vadd.i64 d18,d18,d19
+ vadd.i64 d12,d12,d13
+ vadd.i64 d14,d14,d15
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@ lazy reduction, but without narrowing
vshr.u64 q15,q8,#26
@@ -1026,15 +1035,6 @@ poly1305_blocks_neon:
vadd.i64 q6,q6,q15 @ h0 -> h1
vadd.i64 q9,q9,q4 @ h3 -> h4
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ horizontal addition
-
- vadd.i64 d14,d14,d15
- vadd.i64 d10,d10,d11
- vadd.i64 d16,d16,d17
- vadd.i64 d12,d12,d13
- vadd.i64 d18,d18,d19
-
cmp r2,#0
bne .Leven
diff --git a/third_party/boringssl/linux-x86/crypto/poly1305/poly1305-x86.S b/third_party/boringssl/linux-x86/crypto/poly1305/poly1305-x86.S
index fe442f8..6821901 100644
--- a/third_party/boringssl/linux-x86/crypto/poly1305/poly1305-x86.S
+++ b/third_party/boringssl/linux-x86/crypto/poly1305/poly1305-x86.S
@@ -1200,9 +1200,19 @@ _poly1305_blocks_sse2:
paddq %xmm5,%xmm1
paddq %xmm6,%xmm2
.L017short_tail:
+ pshufd $78,%xmm4,%xmm6
+ pshufd $78,%xmm3,%xmm5
+ paddq %xmm6,%xmm4
+ paddq %xmm5,%xmm3
+ pshufd $78,%xmm0,%xmm6
+ pshufd $78,%xmm1,%xmm5
+ paddq %xmm6,%xmm0
+ paddq %xmm5,%xmm1
+ pshufd $78,%xmm2,%xmm6
movdqa %xmm3,%xmm5
pand %xmm7,%xmm3
psrlq $26,%xmm5
+ paddq %xmm6,%xmm2
paddq %xmm4,%xmm5
movdqa %xmm0,%xmm6
pand %xmm7,%xmm0
@@ -1230,24 +1240,14 @@ _poly1305_blocks_sse2:
paddd %xmm5,%xmm1
pand %xmm7,%xmm3
paddd %xmm6,%xmm4
- pshufd $78,%xmm0,%xmm6
- pshufd $78,%xmm1,%xmm5
- paddd %xmm6,%xmm0
- pshufd $78,%xmm2,%xmm6
- paddd %xmm5,%xmm1
- pshufd $78,%xmm3,%xmm5
- paddd %xmm6,%xmm2
- pshufd $78,%xmm4,%xmm6
- paddd %xmm5,%xmm3
- paddd %xmm6,%xmm4
.L013done:
movd %xmm0,-48(%edi)
movd %xmm1,-44(%edi)
movd %xmm2,-40(%edi)
movd %xmm3,-36(%edi)
movd %xmm4,-32(%edi)
-.L007nodata:
movl %ebp,%esp
+.L007nodata:
popl %edi
popl %esi
popl %ebx
diff --git a/third_party/boringssl/linux-x86_64/crypto/poly1305/poly1305-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/poly1305/poly1305-x86_64.S
index a89874e..5f7d75b 100644
--- a/third_party/boringssl/linux-x86_64/crypto/poly1305/poly1305-x86_64.S
+++ b/third_party/boringssl/linux-x86_64/crypto/poly1305/poly1305-x86_64.S
@@ -1102,6 +1102,20 @@ poly1305_blocks_avx:
+ vpsrldq $8,%xmm14,%xmm9
+ vpsrldq $8,%xmm13,%xmm8
+ vpsrldq $8,%xmm11,%xmm6
+ vpsrldq $8,%xmm10,%xmm5
+ vpsrldq $8,%xmm12,%xmm7
+ vpaddq %xmm8,%xmm13,%xmm13
+ vpaddq %xmm9,%xmm14,%xmm14
+ vpaddq %xmm5,%xmm10,%xmm10
+ vpaddq %xmm6,%xmm11,%xmm11
+ vpaddq %xmm7,%xmm12,%xmm12
+
+
+
+
vpsrlq $26,%xmm13,%xmm3
vpand %xmm15,%xmm13,%xmm13
vpaddq %xmm3,%xmm14,%xmm14
@@ -1133,25 +1147,11 @@ poly1305_blocks_avx:
vpand %xmm15,%xmm13,%xmm13
vpaddq %xmm3,%xmm14,%xmm14
-
-
-
- vpsrldq $8,%xmm12,%xmm7
- vpsrldq $8,%xmm10,%xmm5
- vpsrldq $8,%xmm11,%xmm6
- vpsrldq $8,%xmm13,%xmm8
- vpsrldq $8,%xmm14,%xmm9
- vpaddq %xmm7,%xmm12,%xmm2
- vpaddq %xmm5,%xmm10,%xmm0
- vpaddq %xmm6,%xmm11,%xmm1
- vpaddq %xmm8,%xmm13,%xmm3
- vpaddq %xmm9,%xmm14,%xmm4
-
- vmovd %xmm0,-112(%rdi)
- vmovd %xmm1,-108(%rdi)
- vmovd %xmm2,-104(%rdi)
- vmovd %xmm3,-100(%rdi)
- vmovd %xmm4,-96(%rdi)
+ vmovd %xmm10,-112(%rdi)
+ vmovd %xmm11,-108(%rdi)
+ vmovd %xmm12,-104(%rdi)
+ vmovd %xmm13,-100(%rdi)
+ vmovd %xmm14,-96(%rdi)
leaq 88(%r11),%rsp
vzeroupper
.byte 0xf3,0xc3
@@ -1747,6 +1747,31 @@ poly1305_blocks_avx2:
+ vpsrldq $8,%ymm12,%ymm8
+ vpsrldq $8,%ymm2,%ymm9
+ vpsrldq $8,%ymm3,%ymm10
+ vpsrldq $8,%ymm4,%ymm6
+ vpsrldq $8,%ymm0,%ymm7
+ vpaddq %ymm8,%ymm12,%ymm12
+ vpaddq %ymm9,%ymm2,%ymm2
+ vpaddq %ymm10,%ymm3,%ymm3
+ vpaddq %ymm6,%ymm4,%ymm4
+ vpaddq %ymm7,%ymm0,%ymm0
+
+ vpermq $0x2,%ymm3,%ymm10
+ vpermq $0x2,%ymm4,%ymm6
+ vpermq $0x2,%ymm0,%ymm7
+ vpermq $0x2,%ymm12,%ymm8
+ vpermq $0x2,%ymm2,%ymm9
+ vpaddq %ymm10,%ymm3,%ymm3
+ vpaddq %ymm6,%ymm4,%ymm4
+ vpaddq %ymm7,%ymm0,%ymm0
+ vpaddq %ymm8,%ymm12,%ymm12
+ vpaddq %ymm9,%ymm2,%ymm2
+
+
+
+
vpsrlq $26,%ymm3,%ymm14
vpand %ymm5,%ymm3,%ymm3
vpaddq %ymm14,%ymm4,%ymm4
@@ -1778,31 +1803,6 @@ poly1305_blocks_avx2:
vpand %ymm5,%ymm3,%ymm3
vpaddq %ymm14,%ymm4,%ymm4
-
-
-
- vpsrldq $8,%ymm2,%ymm9
- vpsrldq $8,%ymm0,%ymm7
- vpsrldq $8,%ymm1,%ymm8
- vpsrldq $8,%ymm3,%ymm10
- vpsrldq $8,%ymm4,%ymm6
- vpaddq %ymm9,%ymm2,%ymm2
- vpaddq %ymm7,%ymm0,%ymm0
- vpaddq %ymm8,%ymm1,%ymm1
- vpaddq %ymm10,%ymm3,%ymm3
- vpaddq %ymm6,%ymm4,%ymm4
-
- vpermq $0x2,%ymm2,%ymm9
- vpermq $0x2,%ymm0,%ymm7
- vpermq $0x2,%ymm1,%ymm8
- vpermq $0x2,%ymm3,%ymm10
- vpermq $0x2,%ymm4,%ymm6
- vpaddq %ymm9,%ymm2,%ymm2
- vpaddq %ymm7,%ymm0,%ymm0
- vpaddq %ymm8,%ymm1,%ymm1
- vpaddq %ymm10,%ymm3,%ymm3
- vpaddq %ymm6,%ymm4,%ymm4
-
vmovd %xmm0,-112(%rdi)
vmovd %xmm1,-108(%rdi)
vmovd %xmm2,-104(%rdi)
diff --git a/third_party/boringssl/mac-x86/crypto/poly1305/poly1305-x86.S b/third_party/boringssl/mac-x86/crypto/poly1305/poly1305-x86.S
index f433f48..9b8f6f6 100644
--- a/third_party/boringssl/mac-x86/crypto/poly1305/poly1305-x86.S
+++ b/third_party/boringssl/mac-x86/crypto/poly1305/poly1305-x86.S
@@ -1191,9 +1191,19 @@ L016long_tail:
paddq %xmm5,%xmm1
paddq %xmm6,%xmm2
L017short_tail:
+ pshufd $78,%xmm4,%xmm6
+ pshufd $78,%xmm3,%xmm5
+ paddq %xmm6,%xmm4
+ paddq %xmm5,%xmm3
+ pshufd $78,%xmm0,%xmm6
+ pshufd $78,%xmm1,%xmm5
+ paddq %xmm6,%xmm0
+ paddq %xmm5,%xmm1
+ pshufd $78,%xmm2,%xmm6
movdqa %xmm3,%xmm5
pand %xmm7,%xmm3
psrlq $26,%xmm5
+ paddq %xmm6,%xmm2
paddq %xmm4,%xmm5
movdqa %xmm0,%xmm6
pand %xmm7,%xmm0
@@ -1221,24 +1231,14 @@ L017short_tail:
paddd %xmm5,%xmm1
pand %xmm7,%xmm3
paddd %xmm6,%xmm4
- pshufd $78,%xmm0,%xmm6
- pshufd $78,%xmm1,%xmm5
- paddd %xmm6,%xmm0
- pshufd $78,%xmm2,%xmm6
- paddd %xmm5,%xmm1
- pshufd $78,%xmm3,%xmm5
- paddd %xmm6,%xmm2
- pshufd $78,%xmm4,%xmm6
- paddd %xmm5,%xmm3
- paddd %xmm6,%xmm4
L013done:
movd %xmm0,-48(%edi)
movd %xmm1,-44(%edi)
movd %xmm2,-40(%edi)
movd %xmm3,-36(%edi)
movd %xmm4,-32(%edi)
-L007nodata:
movl %ebp,%esp
+L007nodata:
popl %edi
popl %esi
popl %ebx
diff --git a/third_party/boringssl/mac-x86_64/crypto/poly1305/poly1305-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/poly1305/poly1305-x86_64.S
index 78c887dd..13f028d 100644
--- a/third_party/boringssl/mac-x86_64/crypto/poly1305/poly1305-x86_64.S
+++ b/third_party/boringssl/mac-x86_64/crypto/poly1305/poly1305-x86_64.S
@@ -1101,6 +1101,20 @@ L$short_tail_avx:
+ vpsrldq $8,%xmm14,%xmm9
+ vpsrldq $8,%xmm13,%xmm8
+ vpsrldq $8,%xmm11,%xmm6
+ vpsrldq $8,%xmm10,%xmm5
+ vpsrldq $8,%xmm12,%xmm7
+ vpaddq %xmm8,%xmm13,%xmm13
+ vpaddq %xmm9,%xmm14,%xmm14
+ vpaddq %xmm5,%xmm10,%xmm10
+ vpaddq %xmm6,%xmm11,%xmm11
+ vpaddq %xmm7,%xmm12,%xmm12
+
+
+
+
vpsrlq $26,%xmm13,%xmm3
vpand %xmm15,%xmm13,%xmm13
vpaddq %xmm3,%xmm14,%xmm14
@@ -1132,25 +1146,11 @@ L$short_tail_avx:
vpand %xmm15,%xmm13,%xmm13
vpaddq %xmm3,%xmm14,%xmm14
-
-
-
- vpsrldq $8,%xmm12,%xmm7
- vpsrldq $8,%xmm10,%xmm5
- vpsrldq $8,%xmm11,%xmm6
- vpsrldq $8,%xmm13,%xmm8
- vpsrldq $8,%xmm14,%xmm9
- vpaddq %xmm7,%xmm12,%xmm2
- vpaddq %xmm5,%xmm10,%xmm0
- vpaddq %xmm6,%xmm11,%xmm1
- vpaddq %xmm8,%xmm13,%xmm3
- vpaddq %xmm9,%xmm14,%xmm4
-
- vmovd %xmm0,-112(%rdi)
- vmovd %xmm1,-108(%rdi)
- vmovd %xmm2,-104(%rdi)
- vmovd %xmm3,-100(%rdi)
- vmovd %xmm4,-96(%rdi)
+ vmovd %xmm10,-112(%rdi)
+ vmovd %xmm11,-108(%rdi)
+ vmovd %xmm12,-104(%rdi)
+ vmovd %xmm13,-100(%rdi)
+ vmovd %xmm14,-96(%rdi)
leaq 88(%r11),%rsp
vzeroupper
.byte 0xf3,0xc3
@@ -1746,6 +1746,31 @@ L$tail_avx2:
+ vpsrldq $8,%ymm12,%ymm8
+ vpsrldq $8,%ymm2,%ymm9
+ vpsrldq $8,%ymm3,%ymm10
+ vpsrldq $8,%ymm4,%ymm6
+ vpsrldq $8,%ymm0,%ymm7
+ vpaddq %ymm8,%ymm12,%ymm12
+ vpaddq %ymm9,%ymm2,%ymm2
+ vpaddq %ymm10,%ymm3,%ymm3
+ vpaddq %ymm6,%ymm4,%ymm4
+ vpaddq %ymm7,%ymm0,%ymm0
+
+ vpermq $0x2,%ymm3,%ymm10
+ vpermq $0x2,%ymm4,%ymm6
+ vpermq $0x2,%ymm0,%ymm7
+ vpermq $0x2,%ymm12,%ymm8
+ vpermq $0x2,%ymm2,%ymm9
+ vpaddq %ymm10,%ymm3,%ymm3
+ vpaddq %ymm6,%ymm4,%ymm4
+ vpaddq %ymm7,%ymm0,%ymm0
+ vpaddq %ymm8,%ymm12,%ymm12
+ vpaddq %ymm9,%ymm2,%ymm2
+
+
+
+
vpsrlq $26,%ymm3,%ymm14
vpand %ymm5,%ymm3,%ymm3
vpaddq %ymm14,%ymm4,%ymm4
@@ -1777,31 +1802,6 @@ L$tail_avx2:
vpand %ymm5,%ymm3,%ymm3
vpaddq %ymm14,%ymm4,%ymm4
-
-
-
- vpsrldq $8,%ymm2,%ymm9
- vpsrldq $8,%ymm0,%ymm7
- vpsrldq $8,%ymm1,%ymm8
- vpsrldq $8,%ymm3,%ymm10
- vpsrldq $8,%ymm4,%ymm6
- vpaddq %ymm9,%ymm2,%ymm2
- vpaddq %ymm7,%ymm0,%ymm0
- vpaddq %ymm8,%ymm1,%ymm1
- vpaddq %ymm10,%ymm3,%ymm3
- vpaddq %ymm6,%ymm4,%ymm4
-
- vpermq $0x2,%ymm2,%ymm9
- vpermq $0x2,%ymm0,%ymm7
- vpermq $0x2,%ymm1,%ymm8
- vpermq $0x2,%ymm3,%ymm10
- vpermq $0x2,%ymm4,%ymm6
- vpaddq %ymm9,%ymm2,%ymm2
- vpaddq %ymm7,%ymm0,%ymm0
- vpaddq %ymm8,%ymm1,%ymm1
- vpaddq %ymm10,%ymm3,%ymm3
- vpaddq %ymm6,%ymm4,%ymm4
-
vmovd %xmm0,-112(%rdi)
vmovd %xmm1,-108(%rdi)
vmovd %xmm2,-104(%rdi)
diff --git a/third_party/boringssl/win-x86/crypto/poly1305/poly1305-x86.asm b/third_party/boringssl/win-x86/crypto/poly1305/poly1305-x86.asm
index 3380c90..ca61027 100644
--- a/third_party/boringssl/win-x86/crypto/poly1305/poly1305-x86.asm
+++ b/third_party/boringssl/win-x86/crypto/poly1305/poly1305-x86.asm
@@ -1200,9 +1200,19 @@ L$016long_tail:
paddq xmm1,xmm5
paddq xmm2,xmm6
L$017short_tail:
+ pshufd xmm6,xmm4,78
+ pshufd xmm5,xmm3,78
+ paddq xmm4,xmm6
+ paddq xmm3,xmm5
+ pshufd xmm6,xmm0,78
+ pshufd xmm5,xmm1,78
+ paddq xmm0,xmm6
+ paddq xmm1,xmm5
+ pshufd xmm6,xmm2,78
movdqa xmm5,xmm3
pand xmm3,xmm7
psrlq xmm5,26
+ paddq xmm2,xmm6
paddq xmm5,xmm4
movdqa xmm6,xmm0
pand xmm0,xmm7
@@ -1230,24 +1240,14 @@ L$017short_tail:
paddd xmm1,xmm5
pand xmm3,xmm7
paddd xmm4,xmm6
- pshufd xmm6,xmm0,78
- pshufd xmm5,xmm1,78
- paddd xmm0,xmm6
- pshufd xmm6,xmm2,78
- paddd xmm1,xmm5
- pshufd xmm5,xmm3,78
- paddd xmm2,xmm6
- pshufd xmm6,xmm4,78
- paddd xmm3,xmm5
- paddd xmm4,xmm6
L$013done:
movd DWORD [edi-48],xmm0
movd DWORD [edi-44],xmm1
movd DWORD [edi-40],xmm2
movd DWORD [edi-36],xmm3
movd DWORD [edi-32],xmm4
-L$007nodata:
mov esp,ebp
+L$007nodata:
pop edi
pop esi
pop ebx
diff --git a/third_party/boringssl/win-x86_64/crypto/poly1305/poly1305-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/poly1305/poly1305-x86_64.asm
index 65c9ac6..6d285b6 100644
--- a/third_party/boringssl/win-x86_64/crypto/poly1305/poly1305-x86_64.asm
+++ b/third_party/boringssl/win-x86_64/crypto/poly1305/poly1305-x86_64.asm
@@ -1159,6 +1159,20 @@ $L$short_tail_avx:
+ vpsrldq xmm9,xmm14,8
+ vpsrldq xmm8,xmm13,8
+ vpsrldq xmm6,xmm11,8
+ vpsrldq xmm5,xmm10,8
+ vpsrldq xmm7,xmm12,8
+ vpaddq xmm13,xmm13,xmm8
+ vpaddq xmm14,xmm14,xmm9
+ vpaddq xmm10,xmm10,xmm5
+ vpaddq xmm11,xmm11,xmm6
+ vpaddq xmm12,xmm12,xmm7
+
+
+
+
vpsrlq xmm3,xmm13,26
vpand xmm13,xmm13,xmm15
vpaddq xmm14,xmm14,xmm3
@@ -1190,25 +1204,11 @@ $L$short_tail_avx:
vpand xmm13,xmm13,xmm15
vpaddq xmm14,xmm14,xmm3
-
-
-
- vpsrldq xmm7,xmm12,8
- vpsrldq xmm5,xmm10,8
- vpsrldq xmm6,xmm11,8
- vpsrldq xmm8,xmm13,8
- vpsrldq xmm9,xmm14,8
- vpaddq xmm2,xmm12,xmm7
- vpaddq xmm0,xmm10,xmm5
- vpaddq xmm1,xmm11,xmm6
- vpaddq xmm3,xmm13,xmm8
- vpaddq xmm4,xmm14,xmm9
-
- vmovd DWORD[(-112)+rdi],xmm0
- vmovd DWORD[(-108)+rdi],xmm1
- vmovd DWORD[(-104)+rdi],xmm2
- vmovd DWORD[(-100)+rdi],xmm3
- vmovd DWORD[(-96)+rdi],xmm4
+ vmovd DWORD[(-112)+rdi],xmm10
+ vmovd DWORD[(-108)+rdi],xmm11
+ vmovd DWORD[(-104)+rdi],xmm12
+ vmovd DWORD[(-100)+rdi],xmm13
+ vmovd DWORD[(-96)+rdi],xmm14
vmovdqa xmm6,XMMWORD[80+r11]
vmovdqa xmm7,XMMWORD[96+r11]
vmovdqa xmm8,XMMWORD[112+r11]
@@ -1851,6 +1851,31 @@ $L$tail_avx2:
+ vpsrldq ymm8,ymm12,8
+ vpsrldq ymm9,ymm2,8
+ vpsrldq ymm10,ymm3,8
+ vpsrldq ymm6,ymm4,8
+ vpsrldq ymm7,ymm0,8
+ vpaddq ymm12,ymm12,ymm8
+ vpaddq ymm2,ymm2,ymm9
+ vpaddq ymm3,ymm3,ymm10
+ vpaddq ymm4,ymm4,ymm6
+ vpaddq ymm0,ymm0,ymm7
+
+ vpermq ymm10,ymm3,0x2
+ vpermq ymm6,ymm4,0x2
+ vpermq ymm7,ymm0,0x2
+ vpermq ymm8,ymm12,0x2
+ vpermq ymm9,ymm2,0x2
+ vpaddq ymm3,ymm3,ymm10
+ vpaddq ymm4,ymm4,ymm6
+ vpaddq ymm0,ymm0,ymm7
+ vpaddq ymm12,ymm12,ymm8
+ vpaddq ymm2,ymm2,ymm9
+
+
+
+
vpsrlq ymm14,ymm3,26
vpand ymm3,ymm3,ymm5
vpaddq ymm4,ymm4,ymm14
@@ -1882,31 +1907,6 @@ $L$tail_avx2:
vpand ymm3,ymm3,ymm5
vpaddq ymm4,ymm4,ymm14
-
-
-
- vpsrldq ymm9,ymm2,8
- vpsrldq ymm7,ymm0,8
- vpsrldq ymm8,ymm1,8
- vpsrldq ymm10,ymm3,8
- vpsrldq ymm6,ymm4,8
- vpaddq ymm2,ymm2,ymm9
- vpaddq ymm0,ymm0,ymm7
- vpaddq ymm1,ymm1,ymm8
- vpaddq ymm3,ymm3,ymm10
- vpaddq ymm4,ymm4,ymm6
-
- vpermq ymm9,ymm2,0x2
- vpermq ymm7,ymm0,0x2
- vpermq ymm8,ymm1,0x2
- vpermq ymm10,ymm3,0x2
- vpermq ymm6,ymm4,0x2
- vpaddq ymm2,ymm2,ymm9
- vpaddq ymm0,ymm0,ymm7
- vpaddq ymm1,ymm1,ymm8
- vpaddq ymm3,ymm3,ymm10
- vpaddq ymm4,ymm4,ymm6
-
vmovd DWORD[(-112)+rdi],xmm0
vmovd DWORD[(-108)+rdi],xmm1
vmovd DWORD[(-104)+rdi],xmm2