summaryrefslogtreecommitdiffstats
path: root/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
blob: 534c4ad0e94fb019c084491b2fc7f995f0468be9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
; RUN: opt < %s -analyze -block-freq | FileCheck %s

; This code contains three loops. One is triple-nested, the
; second is double nested and the third is a single loop. At
; runtime, all three loops execute 1,000,000 times each. We use to
; give different frequencies to each of the loops because loop
; scales were limited to no more than 4,096.
;
; This was penalizing the hotness of the second and third loops
; because BFI was reducing the loop scale for for.cond16 and
; for.cond26 to a max of 4,096.
;
; Without this restriction, all loops are now correctly given the same
; frequency values.
;
; Original C code:
;
;
; int g;
; __attribute__((noinline)) void bar() {
;  g++;
; }
;
; extern int printf(const char*, ...);
;
; int main()
; {
;   int i, j, k;
;
;   g = 0;
;   for (i = 0; i < 100; i++)
;     for (j = 0; j < 100; j++)
;        for (k = 0; k < 100; k++)
;            bar();
;
;   printf ("g = %d\n", g);
;   g = 0;
;
;   for (i = 0; i < 100; i++)
;     for (j = 0; j < 10000; j++)
;         bar();
;
;   printf ("g = %d\n", g);
;   g = 0;
;
;
;   for (i = 0; i < 1000000; i++)
;     bar();
;
;   printf ("g = %d\n", g);
;   g = 0;
; }

@g = common global i32 0, align 4
@.str = private unnamed_addr constant [8 x i8] c"g = %d\0A\00", align 1

declare void @bar()
declare i32 @printf(i8*, ...)

; CHECK: Printing analysis {{.*}} for function 'main':
; CHECK-NEXT: block-frequency-info: main
define i32 @main() {
entry:
  %retval = alloca i32, align 4
  %i = alloca i32, align 4
  %j = alloca i32, align 4
  %k = alloca i32, align 4
  store i32 0, i32* %retval
  store i32 0, i32* @g, align 4
  store i32 0, i32* %i, align 4
  br label %for.cond

for.cond:                                         ; preds = %for.inc10, %entry
  %0 = load i32, i32* %i, align 4
  %cmp = icmp slt i32 %0, 100
  br i1 %cmp, label %for.body, label %for.end12, !prof !1

for.body:                                         ; preds = %for.cond
  store i32 0, i32* %j, align 4
  br label %for.cond1

for.cond1:                                        ; preds = %for.inc7, %for.body
  %1 = load i32, i32* %j, align 4
  %cmp2 = icmp slt i32 %1, 100
  br i1 %cmp2, label %for.body3, label %for.end9, !prof !2

for.body3:                                        ; preds = %for.cond1
  store i32 0, i32* %k, align 4
  br label %for.cond4

for.cond4:                                        ; preds = %for.inc, %for.body3
  %2 = load i32, i32* %k, align 4
  %cmp5 = icmp slt i32 %2, 100
  br i1 %cmp5, label %for.body6, label %for.end, !prof !3

; CHECK: - for.body6: float = 500000.5, int = 4000003
for.body6:                                        ; preds = %for.cond4
  call void @bar()
  br label %for.inc

for.inc:                                          ; preds = %for.body6
  %3 = load i32, i32* %k, align 4
  %inc = add nsw i32 %3, 1
  store i32 %inc, i32* %k, align 4
  br label %for.cond4

for.end:                                          ; preds = %for.cond4
  br label %for.inc7

for.inc7:                                         ; preds = %for.end
  %4 = load i32, i32* %j, align 4
  %inc8 = add nsw i32 %4, 1
  store i32 %inc8, i32* %j, align 4
  br label %for.cond1

for.end9:                                         ; preds = %for.cond1
  br label %for.inc10

for.inc10:                                        ; preds = %for.end9
  %5 = load i32, i32* %i, align 4
  %inc11 = add nsw i32 %5, 1
  store i32 %inc11, i32* %i, align 4
  br label %for.cond

for.end12:                                        ; preds = %for.cond
  %6 = load i32, i32* @g, align 4
  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %6)
  store i32 0, i32* @g, align 4
  store i32 0, i32* %i, align 4
  br label %for.cond13

for.cond13:                                       ; preds = %for.inc22, %for.end12
  %7 = load i32, i32* %i, align 4
  %cmp14 = icmp slt i32 %7, 100
  br i1 %cmp14, label %for.body15, label %for.end24, !prof !1

for.body15:                                       ; preds = %for.cond13
  store i32 0, i32* %j, align 4
  br label %for.cond16

for.cond16:                                       ; preds = %for.inc19, %for.body15
  %8 = load i32, i32* %j, align 4
  %cmp17 = icmp slt i32 %8, 10000
  br i1 %cmp17, label %for.body18, label %for.end21, !prof !4

; CHECK: - for.body18: float = 500000.5, int = 4000003
for.body18:                                       ; preds = %for.cond16
  call void @bar()
  br label %for.inc19

for.inc19:                                        ; preds = %for.body18
  %9 = load i32, i32* %j, align 4
  %inc20 = add nsw i32 %9, 1
  store i32 %inc20, i32* %j, align 4
  br label %for.cond16

for.end21:                                        ; preds = %for.cond16
  br label %for.inc22

for.inc22:                                        ; preds = %for.end21
  %10 = load i32, i32* %i, align 4
  %inc23 = add nsw i32 %10, 1
  store i32 %inc23, i32* %i, align 4
  br label %for.cond13

for.end24:                                        ; preds = %for.cond13
  %11 = load i32, i32* @g, align 4
  %call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %11)
  store i32 0, i32* @g, align 4
  store i32 0, i32* %i, align 4
  br label %for.cond26

for.cond26:                                       ; preds = %for.inc29, %for.end24
  %12 = load i32, i32* %i, align 4
  %cmp27 = icmp slt i32 %12, 1000000
  br i1 %cmp27, label %for.body28, label %for.end31, !prof !5

; CHECK: - for.body28: float = 500000.5, int = 4000003
for.body28:                                       ; preds = %for.cond26
  call void @bar()
  br label %for.inc29

for.inc29:                                        ; preds = %for.body28
  %13 = load i32, i32* %i, align 4
  %inc30 = add nsw i32 %13, 1
  store i32 %inc30, i32* %i, align 4
  br label %for.cond26

for.end31:                                        ; preds = %for.cond26
  %14 = load i32, i32* @g, align 4
  %call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %14)
  store i32 0, i32* @g, align 4
  %15 = load i32, i32* %retval
  ret i32 %15
}

!llvm.ident = !{!0}

!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"}
!1 = !{!"branch_weights", i32 101, i32 2}
!2 = !{!"branch_weights", i32 10001, i32 101}
!3 = !{!"branch_weights", i32 1000001, i32 10001}
!4 = !{!"branch_weights", i32 1000001, i32 101}
!5 = !{!"branch_weights", i32 1000001, i32 2}