diff options
Diffstat (limited to 'test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll')
-rw-r--r-- | test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll new file mode 100644 index 0000000..10b2c3a --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll @@ -0,0 +1,101 @@ +; RUN: llc < %s -O3 -mtriple=arm64-unknown-unknown -mcpu=cyclone -pre-RA-sched=list-hybrid | FileCheck %s +; <rdar://problem/11635990> [arm64] [lsr] Inefficient EA/loop-exit calc in bzero_phys +; +; LSR on loop %while.cond should reassociate non-address mode +; expressions at use %cmp16 to avoid sinking computation into %while.body18. +; +; Remove the -pre-RA-sched=list-hybrid option after fixing: +; <rdar://problem/12702735> [ARM64][coalescer] need better register +; coalescing for simple unit tests. + +; CHECK: @memset +; CHECK: %while.body18{{$}} +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #8 +; First set the IVREG variable, then use it +; CHECK-NEXT: sub [[IVREG:x[0-9]+]], +; CHECK: [[IVREG]], #8 +; CHECK-NEXT: cmp [[IVREG]], #7 +; CHECK-NEXT: b.hi +define i8* @memset(i8* %dest, i32 %val, i64 %len) nounwind ssp noimplicitfloat { +entry: + %cmp = icmp eq i64 %len, 0 + br i1 %cmp, label %done, label %while.cond.preheader + +while.cond.preheader: ; preds = %entry + %conv = trunc i32 %val to i8 + br label %while.cond + +while.cond: ; preds = %while.body, %while.cond.preheader + %ptr.0 = phi i8* [ %incdec.ptr, %while.body ], [ %dest, %while.cond.preheader ] + %len.addr.0 = phi i64 [ %dec, %while.body ], [ %len, %while.cond.preheader ] + %cond = icmp eq i64 %len.addr.0, 0 + br i1 %cond, label %done, label %land.rhs + +land.rhs: ; preds = %while.cond + %0 = ptrtoint i8* %ptr.0 to i64 + %and = and i64 %0, 7 + %cmp5 = icmp eq i64 %and, 0 + br i1 %cmp5, label %if.end9, label %while.body + +while.body: ; preds = %land.rhs + %incdec.ptr = getelementptr inbounds i8* %ptr.0, i64 1 + store i8 %conv, i8* %ptr.0, align 1, !tbaa !0 + %dec = add i64 %len.addr.0, -1 + br label %while.cond + +if.end9: ; preds = %land.rhs + %conv.mask = and i32 %val, 255 + %1 = zext i32 %conv.mask to i64 + %2 = shl nuw nsw i64 %1, 8 + %ins18 = or i64 %2, %1 + %3 = shl nuw nsw i64 %1, 16 + %ins15 = or i64 %ins18, %3 + %4 = shl nuw nsw i64 %1, 24 + %5 = shl nuw nsw i64 %1, 32 + %mask8 = or i64 %ins15, %4 + %6 = shl nuw nsw i64 %1, 40 + %mask5 = or i64 %mask8, %5 + %7 = shl nuw nsw i64 %1, 48 + %8 = shl nuw i64 %1, 56 + %mask2.masked = or i64 %mask5, %6 + %mask = or i64 %mask2.masked, %7 + %ins = or i64 %mask, %8 + %9 = bitcast i8* %ptr.0 to i64* + %cmp1636 = icmp ugt i64 %len.addr.0, 7 + br i1 %cmp1636, label %while.body18, label %while.body29.lr.ph + +while.body18: ; preds = %if.end9, %while.body18 + %wideptr.038 = phi i64* [ %incdec.ptr19, %while.body18 ], [ %9, %if.end9 ] + %len.addr.137 = phi i64 [ %sub, %while.body18 ], [ %len.addr.0, %if.end9 ] + %incdec.ptr19 = getelementptr inbounds i64* %wideptr.038, i64 1 + store i64 %ins, i64* %wideptr.038, align 8, !tbaa !2 + %sub = add i64 %len.addr.137, -8 + %cmp16 = icmp ugt i64 %sub, 7 + br i1 %cmp16, label %while.body18, label %while.end20 + +while.end20: ; preds = %while.body18 + %cmp21 = icmp eq i64 %sub, 0 + br i1 %cmp21, label %done, label %while.body29.lr.ph + +while.body29.lr.ph: ; preds = %while.end20, %if.end9 + %len.addr.1.lcssa49 = phi i64 [ %sub, %while.end20 ], [ %len.addr.0, %if.end9 ] + %wideptr.0.lcssa48 = phi i64* [ %incdec.ptr19, %while.end20 ], [ %9, %if.end9 ] + %10 = bitcast i64* %wideptr.0.lcssa48 to i8* + br label %while.body29 + +while.body29: ; preds = %while.body29, %while.body29.lr.ph + %len.addr.235 = phi i64 [ %len.addr.1.lcssa49, %while.body29.lr.ph ], [ %dec26, %while.body29 ] + %ptr.134 = phi i8* [ %10, %while.body29.lr.ph ], [ %incdec.ptr31, %while.body29 ] + %dec26 = add i64 %len.addr.235, -1 + %incdec.ptr31 = getelementptr inbounds i8* %ptr.134, i64 1 + store i8 %conv, i8* %ptr.134, align 1, !tbaa !0 + %cmp27 = icmp eq i64 %dec26, 0 + br i1 %cmp27, label %done, label %while.body29 + +done: ; preds = %while.cond, %while.body29, %while.end20, %entry + ret i8* %dest +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA"} +!2 = metadata !{metadata !"long long", metadata !0} |