diff options
author | Rong Xu <xur@google.com> | 2014-07-21 16:47:22 -0700 |
---|---|---|
committer | Rong Xu <xur@google.com> | 2014-07-29 15:31:03 -0700 |
commit | 38a8aecfb882072900434499696b5c32a2274515 (patch) | |
tree | 2aac97f0ae24b03cd98c1a06e989c031c173f889 /gcc-4.9/gcc/config/aarch64/aarch64-simd.md | |
parent | c231900e5dcc14d8296bd9f62b45997a49d4d5e7 (diff) | |
download | toolchain_gcc-38a8aecfb882072900434499696b5c32a2274515.zip toolchain_gcc-38a8aecfb882072900434499696b5c32a2274515.tar.gz toolchain_gcc-38a8aecfb882072900434499696b5c32a2274515.tar.bz2 |
[4.9] Switch gcc-4.9 to use google/gcc-4_9 branch.
This source drop uses svn version r212828 of google/gcc-4.9 branch.
We also cherry-picked r213062, r213063 and r213064 to fix windows
build issues.
All gcc-4.9 patches before July 3rd are ported to google/gcc-4.9.
The following prior commits has not been merged to google branch yet.
(They are included in this commit).
e7af147f979e657fe2df00808e5b4319b0e088c6,
baf87df3cb2683649ba7e9872362a7e721117c23, and
c231900e5dcc14d8296bd9f62b45997a49d4d5e7.
Change-Id: I4bea3ea470387ff751c2be4cb0d4a12059b9299b
Diffstat (limited to 'gcc-4.9/gcc/config/aarch64/aarch64-simd.md')
-rw-r--r-- | gcc-4.9/gcc/config/aarch64/aarch64-simd.md | 461 |
1 files changed, 401 insertions, 60 deletions
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md index 73aee2c..1f827b5 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md @@ -934,14 +934,22 @@ [(set_attr "type" "neon_minmax<q>")] ) -;; Move into low-half clearing high half to 0. +;; vec_concat gives a new vector with the low elements from operand 1, and +;; the high elements from operand 2. That is to say, given op1 = { a, b } +;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. +;; What that means, is that the RTL descriptions of the below patterns +;; need to change depending on endianness. -(define_insn "move_lo_quad_<mode>" +;; Move to the low architectural bits of the register. +;; On little-endian this is { operand, zeroes } +;; On big-endian this is { zeroes, operand } + +(define_insn "move_lo_quad_internal_<mode>" [(set (match_operand:VQ 0 "register_operand" "=w,w,w") (vec_concat:VQ (match_operand:<VHALF> 1 "register_operand" "w,r,r") (vec_duplicate:<VHALF> (const_int 0))))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "@ dup\\t%d0, %1.d[0] fmov\\t%d0, %1 @@ -952,7 +960,39 @@ (set_attr "length" "4")] ) -;; Move into high-half. +(define_insn "move_lo_quad_internal_be_<mode>" + [(set (match_operand:VQ 0 "register_operand" "=w,w,w") + (vec_concat:VQ + (vec_duplicate:<VHALF> (const_int 0)) + (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "@ + dup\\t%d0, %1.d[0] + fmov\\t%d0, %1 + dup\\t%d0, %1" + [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") + (set_attr "simd" "yes,*,yes") + (set_attr "fp" "*,yes,*") + (set_attr "length" "4")] +) + +(define_expand "move_lo_quad_<mode>" + [(match_operand:VQ 0 "register_operand") + (match_operand:VQ 1 "register_operand")] + "TARGET_SIMD" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1])); + else + emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1])); + DONE; +} +) + +;; Move operand1 to the high architectural bits of the register, keeping +;; the low architectural bits of operand2. +;; For little-endian this is { operand2, operand1 } +;; For big-endian this is { operand1, operand2 } (define_insn "aarch64_simd_move_hi_quad_<mode>" [(set (match_operand:VQ 0 "register_operand" "+w,w") @@ -961,12 +1001,25 @@ (match_dup 0) (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) (match_operand:<VHALF> 1 "register_operand" "w,r")))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "@ ins\\t%0.d[1], %1.d[0] ins\\t%0.d[1], %1" - [(set_attr "type" "neon_ins") - (set_attr "length" "4")] + [(set_attr "type" "neon_ins")] +) + +(define_insn "aarch64_simd_move_hi_quad_be_<mode>" + [(set (match_operand:VQ 0 "register_operand" "+w,w") + (vec_concat:VQ + (match_operand:<VHALF> 1 "register_operand" "w,r") + (vec_select:<VHALF> + (match_dup 0) + (match_operand:VQ 2 "vect_par_cnst_hi_half" ""))))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "@ + ins\\t%0.d[1], %1.d[0] + ins\\t%0.d[1], %1" + [(set_attr "type" "neon_ins")] ) (define_expand "move_hi_quad_<mode>" @@ -974,9 +1027,13 @@ (match_operand:<VHALF> 1 "register_operand" "")] "TARGET_SIMD" { - rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); - emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], - operands[1], p)); + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, BYTES_BIG_ENDIAN); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], + operands[1], p)); + else + emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], + operands[1], p)); DONE; }) @@ -2321,12 +2378,44 @@ (vec_concat:<VDBL> (match_operand:VDIC 1 "register_operand" "w") (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "mov\\t%0.8b, %1.8b" [(set_attr "type" "neon_move<q>")] ) -(define_insn_and_split "aarch64_combine<mode>" +(define_insn "*aarch64_combinez_be<mode>" + [(set (match_operand:<VDBL> 0 "register_operand" "=&w") + (vec_concat:<VDBL> + (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz") + (match_operand:VDIC 1 "register_operand" "w")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "mov\\t%0.8b, %1.8b" + [(set_attr "type" "neon_move<q>")] +) + +(define_expand "aarch64_combine<mode>" + [(match_operand:<VDBL> 0 "register_operand") + (match_operand:VDC 1 "register_operand") + (match_operand:VDC 2 "register_operand")] + "TARGET_SIMD" +{ + rtx op1, op2; + if (BYTES_BIG_ENDIAN) + { + op1 = operands[2]; + op2 = operands[1]; + } + else + { + op1 = operands[1]; + op2 = operands[2]; + } + emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2)); + DONE; +} +) + +(define_insn_and_split "aarch64_combine_internal<mode>" [(set (match_operand:<VDBL> 0 "register_operand" "=&w") (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") (match_operand:VDC 2 "register_operand" "w")))] @@ -2335,16 +2424,19 @@ "&& reload_completed" [(const_int 0)] { - aarch64_split_simd_combine (operands[0], operands[1], operands[2]); + if (BYTES_BIG_ENDIAN) + aarch64_split_simd_combine (operands[0], operands[2], operands[1]); + else + aarch64_split_simd_combine (operands[0], operands[1], operands[2]); DONE; } [(set_attr "type" "multiple")] ) (define_expand "aarch64_simd_combine<mode>" - [(set (match_operand:<VDBL> 0 "register_operand" "=&w") - (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") - (match_operand:VDC 2 "register_operand" "w")))] + [(match_operand:<VDBL> 0 "register_operand") + (match_operand:VDC 1 "register_operand") + (match_operand:VDC 2 "register_operand")] "TARGET_SIMD" { emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); @@ -2633,7 +2725,41 @@ ;; sq<r>dmulh_lane -(define_insn "aarch64_sq<r>dmulh_lane<mode>" +(define_expand "aarch64_sqdmulh_lane<mode>" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand:<VCOND> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_lane<mode>" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand:<VCOND> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal" [(set (match_operand:VDQHS 0 "register_operand" "=w") (unspec:VDQHS [(match_operand:VDQHS 1 "register_operand" "w") @@ -2649,7 +2775,41 @@ [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] ) -(define_insn "aarch64_sq<r>dmulh_laneq<mode>" +(define_expand "aarch64_sqdmulh_laneq<mode>" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand:<VCONQ> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_laneq<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_laneq<mode>" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand:<VCONQ> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_laneq<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sq<r>dmulh_laneq<mode>_internal" [(set (match_operand:VDQHS 0 "register_operand" "=w") (unspec:VDQHS [(match_operand:VDQHS 1 "register_operand" "w") @@ -2659,24 +2819,56 @@ VQDMULH))] "TARGET_SIMD" "* - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] ) -(define_insn "aarch64_sq<r>dmulh_lane<mode>" +(define_expand "aarch64_sqdmulh_lane<mode>" + [(match_operand:SD_HSI 0 "register_operand" "") + (match_operand:SD_HSI 1 "register_operand" "") + (match_operand:<VCOND> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_lane<mode>" + [(match_operand:SD_HSI 0 "register_operand" "") + (match_operand:SD_HSI 1 "register_operand" "") + (match_operand:<VCOND> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal" [(set (match_operand:SD_HSI 0 "register_operand" "=w") (unspec:SD_HSI [(match_operand:SD_HSI 1 "register_operand" "w") (vec_select:<VEL> - (match_operand:<VCONQ> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] VQDMULH))] "TARGET_SIMD" "* - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); - operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] ) @@ -2712,7 +2904,31 @@ (sign_extend:<VWIDE> (vec_duplicate:VD_HSI (vec_select:<VEL> - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + )) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); + return + "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_duplicate:VD_HSI + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) )) (const_int 1))))] @@ -2735,7 +2951,30 @@ (match_operand:SD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (vec_select:<VEL> - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + ) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); + return + "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:SD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) ) (const_int 1))))] @@ -2752,11 +2991,12 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2767,12 +3007,13 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode)); - emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlal_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); DONE; @@ -2782,11 +3023,12 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2797,12 +3039,13 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode)); - emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlsl_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); DONE; @@ -2890,7 +3133,33 @@ (sign_extend:<VWIDE> (vec_duplicate:<VHALF> (vec_select:<VEL> - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]) + )))) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); + return + "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_duplicate:<VHALF> + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) )))) (const_int 1))))] @@ -2907,12 +3176,13 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -2923,13 +3193,14 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode)); - emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; @@ -2939,12 +3210,13 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -2955,13 +3227,14 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode)); - emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; @@ -3041,7 +3314,28 @@ (sign_extend:<VWIDE> (vec_duplicate:VD_HSI (vec_select:<VEL> - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmull_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VD_HSI 1 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_duplicate:VD_HSI + (vec_select:<VEL> + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) (const_int 1)))] @@ -3061,7 +3355,27 @@ (match_operand:SD_HSI 1 "register_operand" "w")) (sign_extend:<VWIDE> (vec_select:<VEL> - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")])) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmull_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:SD_HSI 1 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_select:<VEL> + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")])) )) (const_int 1)))] @@ -3076,11 +3390,12 @@ (define_expand "aarch64_sqdmull_lane<mode>" [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:VSD_HSI 1 "register_operand" "w") - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3])); DONE; @@ -3089,12 +3404,13 @@ (define_expand "aarch64_sqdmull_laneq<mode>" [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:VD_HSI 1 "register_operand" "w") - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode)); - emit_insn (gen_aarch64_sqdmull_lane<mode>_internal + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmull_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3])); DONE; }) @@ -3143,7 +3459,7 @@ (define_expand "aarch64_sqdmull2<mode>" [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:<VCON> 2 "register_operand" "w")] + (match_operand:VQ_HSI 2 "register_operand" "w")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); @@ -3165,7 +3481,30 @@ (sign_extend:<VWIDE> (vec_duplicate:<VHALF> (vec_select:<VEL> - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmull2_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_duplicate:<VHALF> + (vec_select:<VEL> + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) (const_int 1)))] @@ -3180,12 +3519,13 @@ (define_expand "aarch64_sqdmull2_lane<mode>" [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3], p)); @@ -3195,13 +3535,14 @@ (define_expand "aarch64_sqdmull2_laneq<mode>" [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); - emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3], p)); DONE; |