; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s ; CHECK: variable_shl0 ; CHECK: psllvd ; CHECK: ret define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) { %k = shl <4 x i32> %x, %y ret <4 x i32> %k } ; CHECK: variable_shl1 ; CHECK: psllvd ; CHECK: ret define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) { %k = shl <8 x i32> %x, %y ret <8 x i32> %k } ; CHECK: variable_shl2 ; CHECK: psllvq ; CHECK: ret define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) { %k = shl <2 x i64> %x, %y ret <2 x i64> %k } ; CHECK: variable_shl3 ; CHECK: psllvq ; CHECK: ret define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) { %k = shl <4 x i64> %x, %y ret <4 x i64> %k } ; CHECK: variable_srl0 ; CHECK: psrlvd ; CHECK: ret define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) { %k = lshr <4 x i32> %x, %y ret <4 x i32> %k } ; CHECK: variable_srl1 ; CHECK: psrlvd ; CHECK: ret define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) { %k = lshr <8 x i32> %x, %y ret <8 x i32> %k } ; CHECK: variable_srl2 ; CHECK: psrlvq ; CHECK: ret define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) { %k = lshr <2 x i64> %x, %y ret <2 x i64> %k } ; CHECK: variable_srl3 ; CHECK: psrlvq ; CHECK: ret define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) { %k = lshr <4 x i64> %x, %y ret <4 x i64> %k } ; CHECK: variable_sra0 ; CHECK: vpsravd ; CHECK: ret define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) { %k = ashr <4 x i32> %x, %y ret <4 x i32> %k } ; CHECK: variable_sra1 ; CHECK: vpsravd ; CHECK: ret define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) { %k = ashr <8 x i32> %x, %y ret <8 x i32> %k } ;;; Shift left ; CHECK: vpslld define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone { %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> ret <8 x i32> %s } ; CHECK: vpsllw define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone { %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> ret <16 x i16> %s } ; CHECK: vpsllq define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone { %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> ret <4 x i64> %s } ;;; Logical Shift right ; CHECK: vpsrld define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone { %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> ret <8 x i32> %s } ; CHECK: vpsrlw define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone { %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> ret <16 x i16> %s } ; CHECK: vpsrlq define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone { %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> ret <4 x i64> %s } ;;; Arithmetic Shift right ; CHECK: vpsrad define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone { %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> ret <8 x i32> %s } ; CHECK: vpsraw define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone { %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> ret <16 x i16> %s } ; CHECK: variable_sra0_load ; CHECK: vpsravd (% ; CHECK: ret define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) { %y1 = load <4 x i32>, <4 x i32>* %y %k = ashr <4 x i32> %x, %y1 ret <4 x i32> %k } ; CHECK: variable_sra1_load ; CHECK: vpsravd (% ; CHECK: ret define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) { %y1 = load <8 x i32>, <8 x i32>* %y %k = ashr <8 x i32> %x, %y1 ret <8 x i32> %k } ; CHECK: variable_shl0_load ; CHECK: vpsllvd (% ; CHECK: ret define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) { %y1 = load <4 x i32>, <4 x i32>* %y %k = shl <4 x i32> %x, %y1 ret <4 x i32> %k } ; CHECK: variable_shl1_load ; CHECK: vpsllvd (% ; CHECK: ret define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) { %y1 = load <8 x i32>, <8 x i32>* %y %k = shl <8 x i32> %x, %y1 ret <8 x i32> %k } ; CHECK: variable_shl2_load ; CHECK: vpsllvq (% ; CHECK: ret define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) { %y1 = load <2 x i64>, <2 x i64>* %y %k = shl <2 x i64> %x, %y1 ret <2 x i64> %k } ; CHECK: variable_shl3_load ; CHECK: vpsllvq (% ; CHECK: ret define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) { %y1 = load <4 x i64>, <4 x i64>* %y %k = shl <4 x i64> %x, %y1 ret <4 x i64> %k } ; CHECK: variable_srl0_load ; CHECK: vpsrlvd (% ; CHECK: ret define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) { %y1 = load <4 x i32>, <4 x i32>* %y %k = lshr <4 x i32> %x, %y1 ret <4 x i32> %k } ; CHECK: variable_srl1_load ; CHECK: vpsrlvd (% ; CHECK: ret define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) { %y1 = load <8 x i32>, <8 x i32>* %y %k = lshr <8 x i32> %x, %y1 ret <8 x i32> %k } ; CHECK: variable_srl2_load ; CHECK: vpsrlvq (% ; CHECK: ret define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) { %y1 = load <2 x i64>, <2 x i64>* %y %k = lshr <2 x i64> %x, %y1 ret <2 x i64> %k } ; CHECK: variable_srl3_load ; CHECK: vpsrlvq (% ; CHECK: ret define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) { %y1 = load <4 x i64>, <4 x i64>* %y %k = lshr <4 x i64> %x, %y1 ret <4 x i64> %k } define <32 x i8> @shl9(<32 x i8> %A) nounwind { %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <32 x i8> %B ; CHECK-LABEL: shl9: ; CHECK: vpsllw $3 ; CHECK: vpand ; CHECK: ret } define <32 x i8> @shr9(<32 x i8> %A) nounwind { %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <32 x i8> %B ; CHECK-LABEL: shr9: ; CHECK: vpsrlw $3 ; CHECK: vpand ; CHECK: ret } define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind { %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> ret <32 x i8> %B ; CHECK-LABEL: sra_v32i8_7: ; CHECK: vpxor ; CHECK: vpcmpgtb ; CHECK: ret } define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind { %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <32 x i8> %B ; CHECK-LABEL: sra_v32i8: ; CHECK: vpsrlw $3 ; CHECK: vpand ; CHECK: vpxor ; CHECK: vpsubb ; CHECK: ret } ; CHECK: _sext_v16i16 ; CHECK: vpsllw ; CHECK: vpsraw ; CHECK-NOT: vinsertf128 define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind { %b = trunc <16 x i16> %a to <16 x i8> %c = sext <16 x i8> %b to <16 x i16> ret <16 x i16> %c } ; CHECK: _sext_v8i32 ; CHECK: vpslld ; CHECK: vpsrad ; CHECK-NOT: vinsertf128 define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind { %b = trunc <8 x i32> %a to <8 x i16> %c = sext <8 x i16> %b to <8 x i32> ret <8 x i32> %c } define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-LABEL: variable_shl16: ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] ; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]] ; CHECK: vpsllvd [[AMT]], [[LHS]], {{%ymm[0-9]+}} ; CHECK: vpshufb ; CHECK: vpermq %res = shl <8 x i16> %lhs, %rhs ret <8 x i16> %res } define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-LABEL: variable_ashr16: ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] ; CHECK-DAG: vpmovsxwd %xmm0, [[LHS:%ymm[0-9]+]] ; CHECK: vpsravd [[AMT]], [[LHS]], {{%ymm[0-9]+}} ; CHECK: vpshufb ; CHECK: vpermq %res = ashr <8 x i16> %lhs, %rhs ret <8 x i16> %res } define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-LABEL: variable_lshr16: ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] ; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]] ; CHECK: vpsrlvd [[AMT]], [[LHS]], {{%ymm[0-9]+}} ; CHECK: vpshufb ; CHECK: vpermq %res = lshr <8 x i16> %lhs, %rhs ret <8 x i16> %res }