diff options
author | Paul Berry <stereotype441@gmail.com> | 2013-11-29 00:16:43 -0800 |
---|---|---|
committer | Paul Berry <stereotype441@gmail.com> | 2013-12-09 10:55:06 -0800 |
commit | 7ea3baa64da061f86a50c41081a26e0c2859e99c (patch) | |
tree | 49e2a530583627aaefd7aa0af28878d8b9a41e00 /src/glsl/loop_unroll.cpp | |
parent | 4d844cfa56220b7de8ca676ad222d89f81c60c09 (diff) | |
download | external_mesa3d-7ea3baa64da061f86a50c41081a26e0c2859e99c.zip external_mesa3d-7ea3baa64da061f86a50c41081a26e0c2859e99c.tar.gz external_mesa3d-7ea3baa64da061f86a50c41081a26e0c2859e99c.tar.bz2 |
glsl/loops: Stop creating normatively bound loops in loop_controls.
Previously, when loop_controls analyzed a loop and found that it had a
fixed bound (known at compile time), it would remove all of the loop
terminators and instead set the loop's normative_bound field to force
the loop to execute the correct number of times.
This made loop unrolling easy, but it had a serious disadvantage.
Since most GPU's don't have a native mechanism for executing a loop a
fixed number of times, in order to implement the normative bound, the
back-ends would have to synthesize a new loop induction variable. As
a result, many loops wound up having two induction variables instead
of one. This caused extra register pressure and unnecessary
instructions.
This patch modifies loop_controls so that it doesn't set the loop's
normative_bound anymore. Instead it leaves one of the terminators in
the loop (the limiting terminator), so the back-end doesn't have to go
to any extra work to ensure the loop terminates at the right time.
This complicates loop unrolling slightly: when deciding whether a loop
can be unrolled, we have to account for the presence of the limiting
terminator. And when we do unroll the loop, we have to remove the
limiting terminator first.
For an example of how this results in more efficient back end code,
consider the loop:
for (int i = 0; i < 100; i++) {
total += i;
}
Previous to this patch, on i965, this loop would compile down to this
(vec4) native code:
mov(8) g4<1>.xD 0D
mov(8) g8<1>.xD 0D
loop:
cmp.ge.f0(8) null g8<4;4,1>.xD 100D
(+f0) if(8)
break(8)
endif(8)
add(8) g5<1>.xD g5<4;4,1>.xD g4<4;4,1>.xD
add(8) g8<1>.xD g8<4;4,1>.xD 1D
add(8) g4<1>.xD g4<4;4,1>.xD 1D
while(8) loop
(notice that both g8 and g4 are loop induction variables; one is used
to terminate the loop, and the other is used to accumulate the total).
After this patch, the same loop compiles to:
mov(8) g4<1>.xD 0D
loop:
cmp.ge.f0(8) null g4<4;4,1>.xD 100D
(+f0) if(8)
break(8)
endif(8)
add(8) g5<1>.xD g5<4;4,1>.xD g4<4;4,1>.xD
add(8) g4<1>.xD g4<4;4,1>.xD 1D
while(8) loop
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Diffstat (limited to 'src/glsl/loop_unroll.cpp')
-rw-r--r-- | src/glsl/loop_unroll.cpp | 27 |
1 files changed, 23 insertions, 4 deletions
diff --git a/src/glsl/loop_unroll.cpp b/src/glsl/loop_unroll.cpp index 9472bc5..4645dcb 100644 --- a/src/glsl/loop_unroll.cpp +++ b/src/glsl/loop_unroll.cpp @@ -228,6 +228,9 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) loop_variable_state *const ls = this->state->get(ir); int iterations; + /* Note: normatively-bounded loops aren't created anymore. */ + assert(ir->normative_bound < 0); + /* If we've entered a loop that hasn't been analyzed, something really, * really bad has happened. */ @@ -239,10 +242,10 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) /* Don't try to unroll loops where the number of iterations is not known * at compile-time. */ - if (ir->normative_bound < 0) + if (ls->limiting_terminator == NULL) return visit_continue; - iterations = ir->normative_bound; + iterations = ls->limiting_terminator->iterations; /* Don't try to unroll loops that have zillions of iterations either. */ @@ -256,10 +259,17 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) if (count.fail || count.nodes * iterations > (int)max_iterations * 5) return visit_continue; - if (ls->num_loop_jumps > 1) + /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps. + * We'll be removing the limiting terminator before we unroll. + */ + assert(ls->num_loop_jumps > 0); + unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1; + + if (predicted_num_loop_jumps > 1) return visit_continue; - if (ls->num_loop_jumps == 0) { + if (predicted_num_loop_jumps == 0) { + ls->limiting_terminator->ir->remove(); simple_unroll(ir, iterations); return visit_continue; } @@ -274,6 +284,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) */ last_ir->remove(); + ls->limiting_terminator->ir->remove(); simple_unroll(ir, 1); return visit_continue; } @@ -282,6 +293,12 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) /* recognize loops in the form produced by ir_lower_jumps */ ir_instruction *cur_ir = (ir_instruction *) node; + /* Skip the limiting terminator, since it will go away when we + * unroll. + */ + if (cur_ir == ls->limiting_terminator->ir) + continue; + ir_if *ir_if = cur_ir->as_if(); if (ir_if != NULL) { /* Determine which if-statement branch, if any, ends with a @@ -296,6 +313,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) (ir_instruction *) ir_if->then_instructions.get_tail(); if (is_break(ir_if_last)) { + ls->limiting_terminator->ir->remove(); splice_post_if_instructions(ir_if, &ir_if->else_instructions); ir_if_last->remove(); complex_unroll(ir, iterations, false); @@ -305,6 +323,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) (ir_instruction *) ir_if->else_instructions.get_tail(); if (is_break(ir_if_last)) { + ls->limiting_terminator->ir->remove(); splice_post_if_instructions(ir_if, &ir_if->then_instructions); ir_if_last->remove(); complex_unroll(ir, iterations, true); |