diff options
13 files changed, 312 insertions, 261 deletions
@@ -67,7 +67,7 @@ vars = { # Three lines of non-changing comments so that # the commit queue can handle CLs rolling BoringSSL # and whatever else without interference from each other. - 'boringssl_revision': '708db16463a21b922aed6d393ef74c3c5a366668', + 'boringssl_revision': '58218b63bc033782162168e1462c9c8890606885', # Three lines of non-changing comments so that # the commit queue can handle CLs rolling nss # and whatever else without interference from each other. diff --git a/third_party/boringssl/BUILD.gn b/third_party/boringssl/BUILD.gn index 2ac6ec3..91c6ded 100644 --- a/third_party/boringssl/BUILD.gn +++ b/third_party/boringssl/BUILD.gn @@ -7,16 +7,36 @@ import("//build/config/sanitizers/sanitizers.gni") import("//testing/libfuzzer/fuzzer_test.gni") # Config for us and everybody else depending on BoringSSL. -config("openssl_config") { +config("external_config") { include_dirs = [ "src/include" ] if (is_component_build) { defines = [ "BORINGSSL_SHARED_LIBRARY" ] } } -# Config internal to this build file. -config("openssl_internal_config") { +# Config internal to this build file, shared by boringssl and boringssl_fuzzer. +config("internal_config") { visibility = [ ":*" ] # Only targets in this file can depend on this. + defines = [ + "BORINGSSL_IMPLEMENTATION", + "BORINGSSL_NO_STATIC_INITIALIZER", + "OPENSSL_SMALL", + ] + configs = [ + # TODO(davidben): Fix size_t truncations in BoringSSL. + # https://crbug.com/429039 + "//build/config/compiler:no_size_t_to_int_warning", + ] +} + +config("no_asm_config") { + visibility = [ ":*" ] # Only targets in this file can depend on this. + defines = [ "OPENSSL_NO_ASM" ] +} + +config("fuzzer_config") { + visibility = [ ":*" ] # Only targets in this file can depend on this. + defines = [ "BORINGSSL_UNSAFE_FUZZER_MODE" ] } # The list of BoringSSL files is kept in boringssl.gypi. @@ -25,6 +45,8 @@ gypi_values = [ rebase_path("//third_party/boringssl/boringssl.gypi") ], "scope", [ "//third_party/boringssl/boringssl.gypi" ]) +boringssl_sources = + gypi_values.boringssl_crypto_sources + gypi_values.boringssl_ssl_sources # Windows' assembly is built with Yasm. The other platforms use the platform # assembler. @@ -37,83 +59,92 @@ if (is_win && !is_msan) { sources = gypi_values.boringssl_win_x86_sources } } +} else { + source_set("boringssl_asm") { + visibility = [ ":*" ] # Only targets in this file can depend on this. + + sources = [] + asmflags = [] + include_dirs = [ "src/include" ] + + if (current_cpu == "arm" && is_clang) { + # TODO(hans) Enable integrated-as (crbug.com/124610). + asmflags += [ "-fno-integrated-as" ] + if (is_android) { + rebased_android_toolchain_root = + rebase_path(android_toolchain_root, root_build_dir) + + # Else /usr/bin/as gets picked up. + asmflags += [ "-B${rebased_android_toolchain_root}/bin" ] + } + } + + if (is_msan) { + public_configs = [ ":no_asm_config" ] + } else if (current_cpu == "x64") { + if (is_mac || is_ios) { + sources += gypi_values.boringssl_mac_x86_64_sources + } else if (is_linux || is_android) { + sources += gypi_values.boringssl_linux_x86_64_sources + } else { + public_configs = [ ":no_asm_config" ] + } + } else if (current_cpu == "x86") { + if (is_mac || is_ios) { + sources += gypi_values.boringssl_mac_x86_sources + } else if (is_linux || is_android) { + sources += gypi_values.boringssl_linux_x86_sources + } else { + public_configs = [ ":no_asm_config" ] + } + } else if (current_cpu == "arm" && (is_linux || is_android)) { + sources += gypi_values.boringssl_linux_arm_sources + } else if (current_cpu == "arm64" && (is_linux || is_android)) { + sources += gypi_values.boringssl_linux_aarch64_sources + + # TODO(davidben): Remove explicit arch flag once https://crbug.com/576858 + # is fixed. + asmflags += [ "-march=armv8-a+crypto" ] + } else { + public_configs = [ ":no_asm_config" ] + } + } } component("boringssl") { - sources = gypi_values.boringssl_crypto_sources - sources += gypi_values.boringssl_ssl_sources - - public_configs = [ ":openssl_config" ] - - asmflags = [] - cflags = [] - defines = [ - "BORINGSSL_IMPLEMENTATION", - "BORINGSSL_NO_STATIC_INITIALIZER", - "OPENSSL_SMALL", + sources = boringssl_sources + deps = [ + ":boringssl_asm", ] - deps = [] - if (is_component_build) { - defines += [ "BORINGSSL_SHARED_LIBRARY" ] - } + + public_configs = [ ":external_config" ] + configs += [ ":internal_config" ] configs -= [ "//build/config/compiler:chromium_code" ] - configs += [ - "//build/config/compiler:no_chromium_code", + configs += [ "//build/config/compiler:no_chromium_code" ] - # TODO(davidben): Fix size_t truncations in BoringSSL. - # https://crbug.com/429039 - "//build/config/compiler:no_size_t_to_int_warning", - ] + if (is_nacl) { + deps += [ "//native_client_sdk/src/libraries/nacl_io" ] + } +} - # Also gets the include dirs from :openssl_config - include_dirs = [ "src/include" ] +# The same as boringssl, but builds with BORINGSSL_UNSAFE_FUZZER_MODE. +component("boringssl_fuzzer") { + visibility = [ ":*" ] # Only targets in this file can depend on this. - if (current_cpu == "arm" && is_clang) { - # TODO(hans) Enable integrated-as (crbug.com/124610). - asmflags += [ "-fno-integrated-as" ] - if (is_android) { - rebased_android_toolchain_root = - rebase_path(android_toolchain_root, root_build_dir) + sources = boringssl_sources + deps = [ + ":boringssl_asm", + ] - # Else /usr/bin/as gets picked up. - asmflags += [ "-B${rebased_android_toolchain_root}/bin" ] - } - } + public_configs = [ + ":external_config", + ":fuzzer_config", + ] + configs += [ ":internal_config" ] - if (is_msan) { - defines += [ "OPENSSL_NO_ASM" ] - } else if (current_cpu == "x64") { - if (is_mac || is_ios) { - sources += gypi_values.boringssl_mac_x86_64_sources - } else if (is_linux || is_android) { - sources += gypi_values.boringssl_linux_x86_64_sources - } else if (is_win) { - deps += [ ":boringssl_asm" ] - } else { - defines += [ "OPENSSL_NO_ASM" ] - } - } else if (current_cpu == "x86") { - if (is_mac || is_ios) { - sources += gypi_values.boringssl_mac_x86_sources - } else if (is_linux || is_android) { - sources += gypi_values.boringssl_linux_x86_sources - } else if (is_win) { - deps += [ ":boringssl_asm" ] - } else { - defines += [ "OPENSSL_NO_ASM" ] - } - } else if (current_cpu == "arm" && (is_linux || is_android)) { - sources += gypi_values.boringssl_linux_arm_sources - } else if (current_cpu == "arm64" && (is_linux || is_android)) { - sources += gypi_values.boringssl_linux_aarch64_sources - - # TODO(davidben): Remove explicit arch flag once https://crbug.com/576858 - # is fixed. - asmflags += [ "-march=armv8-a+crypto" ] - } else { - defines += [ "OPENSSL_NO_ASM" ] - } + configs -= [ "//build/config/compiler:chromium_code" ] + configs += [ "//build/config/compiler:no_chromium_code" ] if (is_nacl) { deps += [ "//native_client_sdk/src/libraries/nacl_io" ] @@ -125,7 +156,7 @@ fuzzer_test("boringssl_d2i_x509_fuzzer") { "src/fuzz/cert.cc", ] deps = [ - ":boringssl", + ":boringssl_fuzzer", ] } @@ -134,7 +165,7 @@ fuzzer_test("boringssl_client_bio_write_fuzzer") { "src/fuzz/client.cc", ] deps = [ - ":boringssl", + ":boringssl_fuzzer", ] } @@ -143,7 +174,7 @@ fuzzer_test("boringssl_d2i_autoprivatekey_fuzzer") { "src/fuzz/privkey.cc", ] deps = [ - ":boringssl", + ":boringssl_fuzzer", ] } @@ -152,6 +183,6 @@ fuzzer_test("boringssl_server_bio_write_fuzzer") { "src/fuzz/server.cc", ] deps = [ - ":boringssl", + ":boringssl_fuzzer", ] } diff --git a/third_party/boringssl/boringssl.gypi b/third_party/boringssl/boringssl.gypi index 2643855..6742d58 100644 --- a/third_party/boringssl/boringssl.gypi +++ b/third_party/boringssl/boringssl.gypi @@ -202,6 +202,7 @@ 'src/crypto/pkcs8/p8_pkey.c', 'src/crypto/pkcs8/pkcs8.c', 'src/crypto/poly1305/poly1305.c', + 'src/crypto/rand/deterministic.c', 'src/crypto/rand/rand.c', 'src/crypto/rand/urandom.c', 'src/crypto/rand/windows.c', diff --git a/third_party/boringssl/boringssl_tests.gypi b/third_party/boringssl/boringssl_tests.gypi index 980a8d2..863ad81 100644 --- a/third_party/boringssl/boringssl_tests.gypi +++ b/third_party/boringssl/boringssl_tests.gypi @@ -91,6 +91,20 @@ 'msvs_disabled_warnings': [ 4267, ], }, { + 'target_name': 'boringssl_chacha_test', + 'type': 'executable', + 'dependencies': [ + 'boringssl.gyp:boringssl', + ], + 'sources': [ + 'src/crypto/chacha/chacha_test.cc', + '<@(boringssl_test_support_sources)', + ], + # TODO(davidben): Fix size_t truncations in BoringSSL. + # https://crbug.com/429039 + 'msvs_disabled_warnings': [ 4267, ], + }, + { 'target_name': 'boringssl_aead_test', 'type': 'executable', 'dependencies': [ @@ -567,6 +581,7 @@ 'boringssl_bio_test', 'boringssl_bn_test', 'boringssl_bytestring_test', + 'boringssl_chacha_test', 'boringssl_cipher_test', 'boringssl_cmac_test', 'boringssl_constant_time_test', diff --git a/third_party/boringssl/boringssl_unittest.cc b/third_party/boringssl/boringssl_unittest.cc index 65839e1..04ce0f5 100644 --- a/third_party/boringssl/boringssl_unittest.cc +++ b/third_party/boringssl/boringssl_unittest.cc @@ -161,6 +161,10 @@ TEST(BoringSSL, ByteString) { TestSimple("bytestring_test"); } +TEST(BoringSSL, ChaCha) { + TestSimple("chacha_test"); +} + TEST(BoringSSL, Cipher) { base::FilePath data_file; ASSERT_TRUE(CryptoCipherTestPath(&data_file)); diff --git a/third_party/boringssl/linux-aarch64/crypto/poly1305/poly1305-armv8.S b/third_party/boringssl/linux-aarch64/crypto/poly1305/poly1305-armv8.S index 83aaac8..7b681aa 100644 --- a/third_party/boringssl/linux-aarch64/crypto/poly1305/poly1305-armv8.S +++ b/third_party/boringssl/linux-aarch64/crypto/poly1305/poly1305-armv8.S @@ -739,6 +739,19 @@ poly1305_blocks_neon: .Lshort_tail: //////////////////////////////////////////////////////////////// + // horizontal add + + addp v22.2d,v22.2d,v22.2d + ldp d8,d9,[sp,#16] // meet ABI requirements + addp v19.2d,v19.2d,v19.2d + ldp d10,d11,[sp,#32] + addp v23.2d,v23.2d,v23.2d + ldp d12,d13,[sp,#48] + addp v20.2d,v20.2d,v20.2d + ldp d14,d15,[sp,#64] + addp v21.2d,v21.2d,v21.2d + + //////////////////////////////////////////////////////////////// // lazy reduction, but without narrowing ushr v29.2d,v22.2d,#26 @@ -770,19 +783,6 @@ poly1305_blocks_neon: add v23.2d,v23.2d,v30.2d // h3 -> h4 //////////////////////////////////////////////////////////////// - // horizontal add - - addp v21.2d,v21.2d,v21.2d - ldp d8,d9,[sp,#16] // meet ABI requirements - addp v19.2d,v19.2d,v19.2d - ldp d10,d11,[sp,#32] - addp v20.2d,v20.2d,v20.2d - ldp d12,d13,[sp,#48] - addp v22.2d,v22.2d,v22.2d - ldp d14,d15,[sp,#64] - addp v23.2d,v23.2d,v23.2d - - //////////////////////////////////////////////////////////////// // write the result, can be partially reduced st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16 diff --git a/third_party/boringssl/linux-arm/crypto/poly1305/poly1305-armv4.S b/third_party/boringssl/linux-arm/crypto/poly1305/poly1305-armv4.S index 52b0a0c8c..acd96f2 100644 --- a/third_party/boringssl/linux-arm/crypto/poly1305/poly1305-armv4.S +++ b/third_party/boringssl/linux-arm/crypto/poly1305/poly1305-armv4.S @@ -997,6 +997,15 @@ poly1305_blocks_neon: .Lshort_tail: @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ horizontal addition + + vadd.i64 d16,d16,d17 + vadd.i64 d10,d10,d11 + vadd.i64 d18,d18,d19 + vadd.i64 d12,d12,d13 + vadd.i64 d14,d14,d15 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @ lazy reduction, but without narrowing vshr.u64 q15,q8,#26 @@ -1026,15 +1035,6 @@ poly1305_blocks_neon: vadd.i64 q6,q6,q15 @ h0 -> h1 vadd.i64 q9,q9,q4 @ h3 -> h4 - @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - @ horizontal addition - - vadd.i64 d14,d14,d15 - vadd.i64 d10,d10,d11 - vadd.i64 d16,d16,d17 - vadd.i64 d12,d12,d13 - vadd.i64 d18,d18,d19 - cmp r2,#0 bne .Leven diff --git a/third_party/boringssl/linux-x86/crypto/poly1305/poly1305-x86.S b/third_party/boringssl/linux-x86/crypto/poly1305/poly1305-x86.S index fe442f8..6821901 100644 --- a/third_party/boringssl/linux-x86/crypto/poly1305/poly1305-x86.S +++ b/third_party/boringssl/linux-x86/crypto/poly1305/poly1305-x86.S @@ -1200,9 +1200,19 @@ _poly1305_blocks_sse2: paddq %xmm5,%xmm1 paddq %xmm6,%xmm2 .L017short_tail: + pshufd $78,%xmm4,%xmm6 + pshufd $78,%xmm3,%xmm5 + paddq %xmm6,%xmm4 + paddq %xmm5,%xmm3 + pshufd $78,%xmm0,%xmm6 + pshufd $78,%xmm1,%xmm5 + paddq %xmm6,%xmm0 + paddq %xmm5,%xmm1 + pshufd $78,%xmm2,%xmm6 movdqa %xmm3,%xmm5 pand %xmm7,%xmm3 psrlq $26,%xmm5 + paddq %xmm6,%xmm2 paddq %xmm4,%xmm5 movdqa %xmm0,%xmm6 pand %xmm7,%xmm0 @@ -1230,24 +1240,14 @@ _poly1305_blocks_sse2: paddd %xmm5,%xmm1 pand %xmm7,%xmm3 paddd %xmm6,%xmm4 - pshufd $78,%xmm0,%xmm6 - pshufd $78,%xmm1,%xmm5 - paddd %xmm6,%xmm0 - pshufd $78,%xmm2,%xmm6 - paddd %xmm5,%xmm1 - pshufd $78,%xmm3,%xmm5 - paddd %xmm6,%xmm2 - pshufd $78,%xmm4,%xmm6 - paddd %xmm5,%xmm3 - paddd %xmm6,%xmm4 .L013done: movd %xmm0,-48(%edi) movd %xmm1,-44(%edi) movd %xmm2,-40(%edi) movd %xmm3,-36(%edi) movd %xmm4,-32(%edi) -.L007nodata: movl %ebp,%esp +.L007nodata: popl %edi popl %esi popl %ebx diff --git a/third_party/boringssl/linux-x86_64/crypto/poly1305/poly1305-x86_64.S b/third_party/boringssl/linux-x86_64/crypto/poly1305/poly1305-x86_64.S index a89874e..5f7d75b 100644 --- a/third_party/boringssl/linux-x86_64/crypto/poly1305/poly1305-x86_64.S +++ b/third_party/boringssl/linux-x86_64/crypto/poly1305/poly1305-x86_64.S @@ -1102,6 +1102,20 @@ poly1305_blocks_avx: + vpsrldq $8,%xmm14,%xmm9 + vpsrldq $8,%xmm13,%xmm8 + vpsrldq $8,%xmm11,%xmm6 + vpsrldq $8,%xmm10,%xmm5 + vpsrldq $8,%xmm12,%xmm7 + vpaddq %xmm8,%xmm13,%xmm13 + vpaddq %xmm9,%xmm14,%xmm14 + vpaddq %xmm5,%xmm10,%xmm10 + vpaddq %xmm6,%xmm11,%xmm11 + vpaddq %xmm7,%xmm12,%xmm12 + + + + vpsrlq $26,%xmm13,%xmm3 vpand %xmm15,%xmm13,%xmm13 vpaddq %xmm3,%xmm14,%xmm14 @@ -1133,25 +1147,11 @@ poly1305_blocks_avx: vpand %xmm15,%xmm13,%xmm13 vpaddq %xmm3,%xmm14,%xmm14 - - - - vpsrldq $8,%xmm12,%xmm7 - vpsrldq $8,%xmm10,%xmm5 - vpsrldq $8,%xmm11,%xmm6 - vpsrldq $8,%xmm13,%xmm8 - vpsrldq $8,%xmm14,%xmm9 - vpaddq %xmm7,%xmm12,%xmm2 - vpaddq %xmm5,%xmm10,%xmm0 - vpaddq %xmm6,%xmm11,%xmm1 - vpaddq %xmm8,%xmm13,%xmm3 - vpaddq %xmm9,%xmm14,%xmm4 - - vmovd %xmm0,-112(%rdi) - vmovd %xmm1,-108(%rdi) - vmovd %xmm2,-104(%rdi) - vmovd %xmm3,-100(%rdi) - vmovd %xmm4,-96(%rdi) + vmovd %xmm10,-112(%rdi) + vmovd %xmm11,-108(%rdi) + vmovd %xmm12,-104(%rdi) + vmovd %xmm13,-100(%rdi) + vmovd %xmm14,-96(%rdi) leaq 88(%r11),%rsp vzeroupper .byte 0xf3,0xc3 @@ -1747,6 +1747,31 @@ poly1305_blocks_avx2: + vpsrldq $8,%ymm12,%ymm8 + vpsrldq $8,%ymm2,%ymm9 + vpsrldq $8,%ymm3,%ymm10 + vpsrldq $8,%ymm4,%ymm6 + vpsrldq $8,%ymm0,%ymm7 + vpaddq %ymm8,%ymm12,%ymm12 + vpaddq %ymm9,%ymm2,%ymm2 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm6,%ymm4,%ymm4 + vpaddq %ymm7,%ymm0,%ymm0 + + vpermq $0x2,%ymm3,%ymm10 + vpermq $0x2,%ymm4,%ymm6 + vpermq $0x2,%ymm0,%ymm7 + vpermq $0x2,%ymm12,%ymm8 + vpermq $0x2,%ymm2,%ymm9 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm6,%ymm4,%ymm4 + vpaddq %ymm7,%ymm0,%ymm0 + vpaddq %ymm8,%ymm12,%ymm12 + vpaddq %ymm9,%ymm2,%ymm2 + + + + vpsrlq $26,%ymm3,%ymm14 vpand %ymm5,%ymm3,%ymm3 vpaddq %ymm14,%ymm4,%ymm4 @@ -1778,31 +1803,6 @@ poly1305_blocks_avx2: vpand %ymm5,%ymm3,%ymm3 vpaddq %ymm14,%ymm4,%ymm4 - - - - vpsrldq $8,%ymm2,%ymm9 - vpsrldq $8,%ymm0,%ymm7 - vpsrldq $8,%ymm1,%ymm8 - vpsrldq $8,%ymm3,%ymm10 - vpsrldq $8,%ymm4,%ymm6 - vpaddq %ymm9,%ymm2,%ymm2 - vpaddq %ymm7,%ymm0,%ymm0 - vpaddq %ymm8,%ymm1,%ymm1 - vpaddq %ymm10,%ymm3,%ymm3 - vpaddq %ymm6,%ymm4,%ymm4 - - vpermq $0x2,%ymm2,%ymm9 - vpermq $0x2,%ymm0,%ymm7 - vpermq $0x2,%ymm1,%ymm8 - vpermq $0x2,%ymm3,%ymm10 - vpermq $0x2,%ymm4,%ymm6 - vpaddq %ymm9,%ymm2,%ymm2 - vpaddq %ymm7,%ymm0,%ymm0 - vpaddq %ymm8,%ymm1,%ymm1 - vpaddq %ymm10,%ymm3,%ymm3 - vpaddq %ymm6,%ymm4,%ymm4 - vmovd %xmm0,-112(%rdi) vmovd %xmm1,-108(%rdi) vmovd %xmm2,-104(%rdi) diff --git a/third_party/boringssl/mac-x86/crypto/poly1305/poly1305-x86.S b/third_party/boringssl/mac-x86/crypto/poly1305/poly1305-x86.S index f433f48..9b8f6f6 100644 --- a/third_party/boringssl/mac-x86/crypto/poly1305/poly1305-x86.S +++ b/third_party/boringssl/mac-x86/crypto/poly1305/poly1305-x86.S @@ -1191,9 +1191,19 @@ L016long_tail: paddq %xmm5,%xmm1 paddq %xmm6,%xmm2 L017short_tail: + pshufd $78,%xmm4,%xmm6 + pshufd $78,%xmm3,%xmm5 + paddq %xmm6,%xmm4 + paddq %xmm5,%xmm3 + pshufd $78,%xmm0,%xmm6 + pshufd $78,%xmm1,%xmm5 + paddq %xmm6,%xmm0 + paddq %xmm5,%xmm1 + pshufd $78,%xmm2,%xmm6 movdqa %xmm3,%xmm5 pand %xmm7,%xmm3 psrlq $26,%xmm5 + paddq %xmm6,%xmm2 paddq %xmm4,%xmm5 movdqa %xmm0,%xmm6 pand %xmm7,%xmm0 @@ -1221,24 +1231,14 @@ L017short_tail: paddd %xmm5,%xmm1 pand %xmm7,%xmm3 paddd %xmm6,%xmm4 - pshufd $78,%xmm0,%xmm6 - pshufd $78,%xmm1,%xmm5 - paddd %xmm6,%xmm0 - pshufd $78,%xmm2,%xmm6 - paddd %xmm5,%xmm1 - pshufd $78,%xmm3,%xmm5 - paddd %xmm6,%xmm2 - pshufd $78,%xmm4,%xmm6 - paddd %xmm5,%xmm3 - paddd %xmm6,%xmm4 L013done: movd %xmm0,-48(%edi) movd %xmm1,-44(%edi) movd %xmm2,-40(%edi) movd %xmm3,-36(%edi) movd %xmm4,-32(%edi) -L007nodata: movl %ebp,%esp +L007nodata: popl %edi popl %esi popl %ebx diff --git a/third_party/boringssl/mac-x86_64/crypto/poly1305/poly1305-x86_64.S b/third_party/boringssl/mac-x86_64/crypto/poly1305/poly1305-x86_64.S index 78c887dd..13f028d 100644 --- a/third_party/boringssl/mac-x86_64/crypto/poly1305/poly1305-x86_64.S +++ b/third_party/boringssl/mac-x86_64/crypto/poly1305/poly1305-x86_64.S @@ -1101,6 +1101,20 @@ L$short_tail_avx: + vpsrldq $8,%xmm14,%xmm9 + vpsrldq $8,%xmm13,%xmm8 + vpsrldq $8,%xmm11,%xmm6 + vpsrldq $8,%xmm10,%xmm5 + vpsrldq $8,%xmm12,%xmm7 + vpaddq %xmm8,%xmm13,%xmm13 + vpaddq %xmm9,%xmm14,%xmm14 + vpaddq %xmm5,%xmm10,%xmm10 + vpaddq %xmm6,%xmm11,%xmm11 + vpaddq %xmm7,%xmm12,%xmm12 + + + + vpsrlq $26,%xmm13,%xmm3 vpand %xmm15,%xmm13,%xmm13 vpaddq %xmm3,%xmm14,%xmm14 @@ -1132,25 +1146,11 @@ L$short_tail_avx: vpand %xmm15,%xmm13,%xmm13 vpaddq %xmm3,%xmm14,%xmm14 - - - - vpsrldq $8,%xmm12,%xmm7 - vpsrldq $8,%xmm10,%xmm5 - vpsrldq $8,%xmm11,%xmm6 - vpsrldq $8,%xmm13,%xmm8 - vpsrldq $8,%xmm14,%xmm9 - vpaddq %xmm7,%xmm12,%xmm2 - vpaddq %xmm5,%xmm10,%xmm0 - vpaddq %xmm6,%xmm11,%xmm1 - vpaddq %xmm8,%xmm13,%xmm3 - vpaddq %xmm9,%xmm14,%xmm4 - - vmovd %xmm0,-112(%rdi) - vmovd %xmm1,-108(%rdi) - vmovd %xmm2,-104(%rdi) - vmovd %xmm3,-100(%rdi) - vmovd %xmm4,-96(%rdi) + vmovd %xmm10,-112(%rdi) + vmovd %xmm11,-108(%rdi) + vmovd %xmm12,-104(%rdi) + vmovd %xmm13,-100(%rdi) + vmovd %xmm14,-96(%rdi) leaq 88(%r11),%rsp vzeroupper .byte 0xf3,0xc3 @@ -1746,6 +1746,31 @@ L$tail_avx2: + vpsrldq $8,%ymm12,%ymm8 + vpsrldq $8,%ymm2,%ymm9 + vpsrldq $8,%ymm3,%ymm10 + vpsrldq $8,%ymm4,%ymm6 + vpsrldq $8,%ymm0,%ymm7 + vpaddq %ymm8,%ymm12,%ymm12 + vpaddq %ymm9,%ymm2,%ymm2 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm6,%ymm4,%ymm4 + vpaddq %ymm7,%ymm0,%ymm0 + + vpermq $0x2,%ymm3,%ymm10 + vpermq $0x2,%ymm4,%ymm6 + vpermq $0x2,%ymm0,%ymm7 + vpermq $0x2,%ymm12,%ymm8 + vpermq $0x2,%ymm2,%ymm9 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm6,%ymm4,%ymm4 + vpaddq %ymm7,%ymm0,%ymm0 + vpaddq %ymm8,%ymm12,%ymm12 + vpaddq %ymm9,%ymm2,%ymm2 + + + + vpsrlq $26,%ymm3,%ymm14 vpand %ymm5,%ymm3,%ymm3 vpaddq %ymm14,%ymm4,%ymm4 @@ -1777,31 +1802,6 @@ L$tail_avx2: vpand %ymm5,%ymm3,%ymm3 vpaddq %ymm14,%ymm4,%ymm4 - - - - vpsrldq $8,%ymm2,%ymm9 - vpsrldq $8,%ymm0,%ymm7 - vpsrldq $8,%ymm1,%ymm8 - vpsrldq $8,%ymm3,%ymm10 - vpsrldq $8,%ymm4,%ymm6 - vpaddq %ymm9,%ymm2,%ymm2 - vpaddq %ymm7,%ymm0,%ymm0 - vpaddq %ymm8,%ymm1,%ymm1 - vpaddq %ymm10,%ymm3,%ymm3 - vpaddq %ymm6,%ymm4,%ymm4 - - vpermq $0x2,%ymm2,%ymm9 - vpermq $0x2,%ymm0,%ymm7 - vpermq $0x2,%ymm1,%ymm8 - vpermq $0x2,%ymm3,%ymm10 - vpermq $0x2,%ymm4,%ymm6 - vpaddq %ymm9,%ymm2,%ymm2 - vpaddq %ymm7,%ymm0,%ymm0 - vpaddq %ymm8,%ymm1,%ymm1 - vpaddq %ymm10,%ymm3,%ymm3 - vpaddq %ymm6,%ymm4,%ymm4 - vmovd %xmm0,-112(%rdi) vmovd %xmm1,-108(%rdi) vmovd %xmm2,-104(%rdi) diff --git a/third_party/boringssl/win-x86/crypto/poly1305/poly1305-x86.asm b/third_party/boringssl/win-x86/crypto/poly1305/poly1305-x86.asm index 3380c90..ca61027 100644 --- a/third_party/boringssl/win-x86/crypto/poly1305/poly1305-x86.asm +++ b/third_party/boringssl/win-x86/crypto/poly1305/poly1305-x86.asm @@ -1200,9 +1200,19 @@ L$016long_tail: paddq xmm1,xmm5 paddq xmm2,xmm6 L$017short_tail: + pshufd xmm6,xmm4,78 + pshufd xmm5,xmm3,78 + paddq xmm4,xmm6 + paddq xmm3,xmm5 + pshufd xmm6,xmm0,78 + pshufd xmm5,xmm1,78 + paddq xmm0,xmm6 + paddq xmm1,xmm5 + pshufd xmm6,xmm2,78 movdqa xmm5,xmm3 pand xmm3,xmm7 psrlq xmm5,26 + paddq xmm2,xmm6 paddq xmm5,xmm4 movdqa xmm6,xmm0 pand xmm0,xmm7 @@ -1230,24 +1240,14 @@ L$017short_tail: paddd xmm1,xmm5 pand xmm3,xmm7 paddd xmm4,xmm6 - pshufd xmm6,xmm0,78 - pshufd xmm5,xmm1,78 - paddd xmm0,xmm6 - pshufd xmm6,xmm2,78 - paddd xmm1,xmm5 - pshufd xmm5,xmm3,78 - paddd xmm2,xmm6 - pshufd xmm6,xmm4,78 - paddd xmm3,xmm5 - paddd xmm4,xmm6 L$013done: movd DWORD [edi-48],xmm0 movd DWORD [edi-44],xmm1 movd DWORD [edi-40],xmm2 movd DWORD [edi-36],xmm3 movd DWORD [edi-32],xmm4 -L$007nodata: mov esp,ebp +L$007nodata: pop edi pop esi pop ebx diff --git a/third_party/boringssl/win-x86_64/crypto/poly1305/poly1305-x86_64.asm b/third_party/boringssl/win-x86_64/crypto/poly1305/poly1305-x86_64.asm index 65c9ac6..6d285b6 100644 --- a/third_party/boringssl/win-x86_64/crypto/poly1305/poly1305-x86_64.asm +++ b/third_party/boringssl/win-x86_64/crypto/poly1305/poly1305-x86_64.asm @@ -1159,6 +1159,20 @@ $L$short_tail_avx: + vpsrldq xmm9,xmm14,8 + vpsrldq xmm8,xmm13,8 + vpsrldq xmm6,xmm11,8 + vpsrldq xmm5,xmm10,8 + vpsrldq xmm7,xmm12,8 + vpaddq xmm13,xmm13,xmm8 + vpaddq xmm14,xmm14,xmm9 + vpaddq xmm10,xmm10,xmm5 + vpaddq xmm11,xmm11,xmm6 + vpaddq xmm12,xmm12,xmm7 + + + + vpsrlq xmm3,xmm13,26 vpand xmm13,xmm13,xmm15 vpaddq xmm14,xmm14,xmm3 @@ -1190,25 +1204,11 @@ $L$short_tail_avx: vpand xmm13,xmm13,xmm15 vpaddq xmm14,xmm14,xmm3 - - - - vpsrldq xmm7,xmm12,8 - vpsrldq xmm5,xmm10,8 - vpsrldq xmm6,xmm11,8 - vpsrldq xmm8,xmm13,8 - vpsrldq xmm9,xmm14,8 - vpaddq xmm2,xmm12,xmm7 - vpaddq xmm0,xmm10,xmm5 - vpaddq xmm1,xmm11,xmm6 - vpaddq xmm3,xmm13,xmm8 - vpaddq xmm4,xmm14,xmm9 - - vmovd DWORD[(-112)+rdi],xmm0 - vmovd DWORD[(-108)+rdi],xmm1 - vmovd DWORD[(-104)+rdi],xmm2 - vmovd DWORD[(-100)+rdi],xmm3 - vmovd DWORD[(-96)+rdi],xmm4 + vmovd DWORD[(-112)+rdi],xmm10 + vmovd DWORD[(-108)+rdi],xmm11 + vmovd DWORD[(-104)+rdi],xmm12 + vmovd DWORD[(-100)+rdi],xmm13 + vmovd DWORD[(-96)+rdi],xmm14 vmovdqa xmm6,XMMWORD[80+r11] vmovdqa xmm7,XMMWORD[96+r11] vmovdqa xmm8,XMMWORD[112+r11] @@ -1851,6 +1851,31 @@ $L$tail_avx2: + vpsrldq ymm8,ymm12,8 + vpsrldq ymm9,ymm2,8 + vpsrldq ymm10,ymm3,8 + vpsrldq ymm6,ymm4,8 + vpsrldq ymm7,ymm0,8 + vpaddq ymm12,ymm12,ymm8 + vpaddq ymm2,ymm2,ymm9 + vpaddq ymm3,ymm3,ymm10 + vpaddq ymm4,ymm4,ymm6 + vpaddq ymm0,ymm0,ymm7 + + vpermq ymm10,ymm3,0x2 + vpermq ymm6,ymm4,0x2 + vpermq ymm7,ymm0,0x2 + vpermq ymm8,ymm12,0x2 + vpermq ymm9,ymm2,0x2 + vpaddq ymm3,ymm3,ymm10 + vpaddq ymm4,ymm4,ymm6 + vpaddq ymm0,ymm0,ymm7 + vpaddq ymm12,ymm12,ymm8 + vpaddq ymm2,ymm2,ymm9 + + + + vpsrlq ymm14,ymm3,26 vpand ymm3,ymm3,ymm5 vpaddq ymm4,ymm4,ymm14 @@ -1882,31 +1907,6 @@ $L$tail_avx2: vpand ymm3,ymm3,ymm5 vpaddq ymm4,ymm4,ymm14 - - - - vpsrldq ymm9,ymm2,8 - vpsrldq ymm7,ymm0,8 - vpsrldq ymm8,ymm1,8 - vpsrldq ymm10,ymm3,8 - vpsrldq ymm6,ymm4,8 - vpaddq ymm2,ymm2,ymm9 - vpaddq ymm0,ymm0,ymm7 - vpaddq ymm1,ymm1,ymm8 - vpaddq ymm3,ymm3,ymm10 - vpaddq ymm4,ymm4,ymm6 - - vpermq ymm9,ymm2,0x2 - vpermq ymm7,ymm0,0x2 - vpermq ymm8,ymm1,0x2 - vpermq ymm10,ymm3,0x2 - vpermq ymm6,ymm4,0x2 - vpaddq ymm2,ymm2,ymm9 - vpaddq ymm0,ymm0,ymm7 - vpaddq ymm1,ymm1,ymm8 - vpaddq ymm3,ymm3,ymm10 - vpaddq ymm4,ymm4,ymm6 - vmovd DWORD[(-112)+rdi],xmm0 vmovd DWORD[(-108)+rdi],xmm1 vmovd DWORD[(-104)+rdi],xmm2 |