; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s

; CHECK: variable_shl0
; CHECK: psllvd
; CHECK: ret
define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
  %k = shl <4 x i32> %x, %y
  ret <4 x i32> %k
}
; CHECK: variable_shl1
; CHECK: psllvd
; CHECK: ret
define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
  %k = shl <8 x i32> %x, %y
  ret <8 x i32> %k
}
; CHECK: variable_shl2
; CHECK: psllvq
; CHECK: ret
define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
  %k = shl <2 x i64> %x, %y
  ret <2 x i64> %k
}
; CHECK: variable_shl3
; CHECK: psllvq
; CHECK: ret
define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
  %k = shl <4 x i64> %x, %y
  ret <4 x i64> %k
}
; CHECK: variable_srl0
; CHECK: psrlvd
; CHECK: ret
define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
  %k = lshr <4 x i32> %x, %y
  ret <4 x i32> %k
}
; CHECK: variable_srl1
; CHECK: psrlvd
; CHECK: ret
define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
  %k = lshr <8 x i32> %x, %y
  ret <8 x i32> %k
}
; CHECK: variable_srl2
; CHECK: psrlvq
; CHECK: ret
define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
  %k = lshr <2 x i64> %x, %y
  ret <2 x i64> %k
}
; CHECK: variable_srl3
; CHECK: psrlvq
; CHECK: ret
define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
  %k = lshr <4 x i64> %x, %y
  ret <4 x i64> %k
}

; CHECK: variable_sra0
; CHECK: vpsravd
; CHECK: ret
define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
  %k = ashr <4 x i32> %x, %y
  ret <4 x i32> %k
}
; CHECK: variable_sra1
; CHECK: vpsravd
; CHECK: ret
define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
  %k = ashr <8 x i32> %x, %y
  ret <8 x i32> %k
}

;;; Shift left
; CHECK: vpslld
define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
  %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
2>
  ret <8 x i32> %s
}

; CHECK: vpsllw
define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
  %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
  ret <16 x i16> %s
}

; CHECK: vpsllq
define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
  %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
  ret <4 x i64> %s
}

;;; Logical Shift right
; CHECK: vpsrld
define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
  %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
2>
  ret <8 x i32> %s
}

; CHECK: vpsrlw
define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
  %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
  ret <16 x i16> %s
}

; CHECK: vpsrlq
define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
  %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
  ret <4 x i64> %s
}

;;; Arithmetic Shift right
; CHECK: vpsrad
define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
  %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
2>
  ret <8 x i32> %s
}

; CHECK: vpsraw
define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
  %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
  ret <16 x i16> %s
}

; CHECK: variable_sra0_load
; CHECK: vpsravd (%
; CHECK: ret
define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
  %y1 = load <4 x i32>, <4 x i32>* %y
  %k = ashr <4 x i32> %x, %y1
  ret <4 x i32> %k
}

; CHECK: variable_sra1_load
; CHECK: vpsravd (%
; CHECK: ret
define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
  %y1 = load <8 x i32>, <8 x i32>* %y
  %k = ashr <8 x i32> %x, %y1
  ret <8 x i32> %k
}

; CHECK: variable_shl0_load
; CHECK: vpsllvd (%
; CHECK: ret
define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
  %y1 = load <4 x i32>, <4 x i32>* %y
  %k = shl <4 x i32> %x, %y1
  ret <4 x i32> %k
}
; CHECK: variable_shl1_load
; CHECK: vpsllvd (%
; CHECK: ret
define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
  %y1 = load <8 x i32>, <8 x i32>* %y
  %k = shl <8 x i32> %x, %y1
  ret <8 x i32> %k
}
; CHECK: variable_shl2_load
; CHECK: vpsllvq (%
; CHECK: ret
define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
  %y1 = load <2 x i64>, <2 x i64>* %y
  %k = shl <2 x i64> %x, %y1
  ret <2 x i64> %k
}
; CHECK: variable_shl3_load
; CHECK: vpsllvq (%
; CHECK: ret
define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
  %y1 = load <4 x i64>, <4 x i64>* %y
  %k = shl <4 x i64> %x, %y1
  ret <4 x i64> %k
}
; CHECK: variable_srl0_load
; CHECK: vpsrlvd (%
; CHECK: ret
define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
  %y1 = load <4 x i32>, <4 x i32>* %y
  %k = lshr <4 x i32> %x, %y1
  ret <4 x i32> %k
}
; CHECK: variable_srl1_load
; CHECK: vpsrlvd (%
; CHECK: ret
define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
  %y1 = load <8 x i32>, <8 x i32>* %y
  %k = lshr <8 x i32> %x, %y1
  ret <8 x i32> %k
}
; CHECK: variable_srl2_load
; CHECK: vpsrlvq (%
; CHECK: ret
define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
  %y1 = load <2 x i64>, <2 x i64>* %y
  %k = lshr <2 x i64> %x, %y1
  ret <2 x i64> %k
}
; CHECK: variable_srl3_load
; CHECK: vpsrlvq (%
; CHECK: ret
define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
  %y1 = load <4 x i64>, <4 x i64>* %y
  %k = lshr <4 x i64> %x, %y1
  ret <4 x i64> %k
}

define <32 x i8> @shl9(<32 x i8> %A) nounwind {
  %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
  ret <32 x i8> %B
; CHECK-LABEL: shl9:
; CHECK: vpsllw $3
; CHECK: vpand
; CHECK: ret
}

define <32 x i8> @shr9(<32 x i8> %A) nounwind {
  %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
  ret <32 x i8> %B
; CHECK-LABEL: shr9:
; CHECK: vpsrlw $3
; CHECK: vpand
; CHECK: ret
}

define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
  %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
  ret <32 x i8> %B
; CHECK-LABEL: sra_v32i8_7:
; CHECK: vpxor
; CHECK: vpcmpgtb
; CHECK: ret
}

define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
  %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
  ret <32 x i8> %B
; CHECK-LABEL: sra_v32i8:
; CHECK: vpsrlw $3
; CHECK: vpand
; CHECK: vpxor
; CHECK: vpsubb
; CHECK: ret
}

; CHECK: _sext_v16i16
; CHECK: vpsllw
; CHECK: vpsraw
; CHECK-NOT: vinsertf128
define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
  %b = trunc <16 x i16> %a to <16 x i8>
  %c = sext <16 x i8> %b to <16 x i16>
  ret <16 x i16> %c
}

; CHECK: _sext_v8i32
; CHECK: vpslld
; CHECK: vpsrad
; CHECK-NOT: vinsertf128
define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
  %b = trunc <8 x i32> %a to <8 x i16>
  %c = sext <8 x i16> %b to <8 x i32>
  ret <8 x i32> %c
}

define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8  x i16> %rhs) {
; CHECK-LABEL: variable_shl16:
; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
; CHECK: vpsllvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
; CHECK: vpshufb
; CHECK: vpermq
  %res = shl <8 x i16> %lhs, %rhs
  ret <8 x i16> %res
}

define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8  x i16> %rhs) {
; CHECK-LABEL: variable_ashr16:
; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
; CHECK-DAG: vpmovsxwd %xmm0, [[LHS:%ymm[0-9]+]]
; CHECK: vpsravd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
; CHECK: vpshufb
; CHECK: vpermq
  %res = ashr <8 x i16> %lhs, %rhs
  ret <8 x i16> %res
}

define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8  x i16> %rhs) {
; CHECK-LABEL: variable_lshr16:
; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]]
; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]]
; CHECK: vpsrlvd [[AMT]], [[LHS]], {{%ymm[0-9]+}}
; CHECK: vpshufb
; CHECK: vpermq
  %res = lshr <8 x i16> %lhs, %rhs
  ret <8 x i16> %res
}