summaryrefslogtreecommitdiffstats
path: root/src/glsl/loop_unroll.cpp
diff options
context:
space:
mode:
authorPaul Berry <stereotype441@gmail.com>2013-11-29 00:16:43 -0800
committerPaul Berry <stereotype441@gmail.com>2013-12-09 10:55:06 -0800
commit7ea3baa64da061f86a50c41081a26e0c2859e99c (patch)
tree49e2a530583627aaefd7aa0af28878d8b9a41e00 /src/glsl/loop_unroll.cpp
parent4d844cfa56220b7de8ca676ad222d89f81c60c09 (diff)
downloadexternal_mesa3d-7ea3baa64da061f86a50c41081a26e0c2859e99c.zip
external_mesa3d-7ea3baa64da061f86a50c41081a26e0c2859e99c.tar.gz
external_mesa3d-7ea3baa64da061f86a50c41081a26e0c2859e99c.tar.bz2
glsl/loops: Stop creating normatively bound loops in loop_controls.
Previously, when loop_controls analyzed a loop and found that it had a fixed bound (known at compile time), it would remove all of the loop terminators and instead set the loop's normative_bound field to force the loop to execute the correct number of times. This made loop unrolling easy, but it had a serious disadvantage. Since most GPU's don't have a native mechanism for executing a loop a fixed number of times, in order to implement the normative bound, the back-ends would have to synthesize a new loop induction variable. As a result, many loops wound up having two induction variables instead of one. This caused extra register pressure and unnecessary instructions. This patch modifies loop_controls so that it doesn't set the loop's normative_bound anymore. Instead it leaves one of the terminators in the loop (the limiting terminator), so the back-end doesn't have to go to any extra work to ensure the loop terminates at the right time. This complicates loop unrolling slightly: when deciding whether a loop can be unrolled, we have to account for the presence of the limiting terminator. And when we do unroll the loop, we have to remove the limiting terminator first. For an example of how this results in more efficient back end code, consider the loop: for (int i = 0; i < 100; i++) { total += i; } Previous to this patch, on i965, this loop would compile down to this (vec4) native code: mov(8) g4<1>.xD 0D mov(8) g8<1>.xD 0D loop: cmp.ge.f0(8) null g8<4;4,1>.xD 100D (+f0) if(8) break(8) endif(8) add(8) g5<1>.xD g5<4;4,1>.xD g4<4;4,1>.xD add(8) g8<1>.xD g8<4;4,1>.xD 1D add(8) g4<1>.xD g4<4;4,1>.xD 1D while(8) loop (notice that both g8 and g4 are loop induction variables; one is used to terminate the loop, and the other is used to accumulate the total). After this patch, the same loop compiles to: mov(8) g4<1>.xD 0D loop: cmp.ge.f0(8) null g4<4;4,1>.xD 100D (+f0) if(8) break(8) endif(8) add(8) g5<1>.xD g5<4;4,1>.xD g4<4;4,1>.xD add(8) g4<1>.xD g4<4;4,1>.xD 1D while(8) loop Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Diffstat (limited to 'src/glsl/loop_unroll.cpp')
-rw-r--r--src/glsl/loop_unroll.cpp27
1 files changed, 23 insertions, 4 deletions
diff --git a/src/glsl/loop_unroll.cpp b/src/glsl/loop_unroll.cpp
index 9472bc5..4645dcb 100644
--- a/src/glsl/loop_unroll.cpp
+++ b/src/glsl/loop_unroll.cpp
@@ -228,6 +228,9 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
loop_variable_state *const ls = this->state->get(ir);
int iterations;
+ /* Note: normatively-bounded loops aren't created anymore. */
+ assert(ir->normative_bound < 0);
+
/* If we've entered a loop that hasn't been analyzed, something really,
* really bad has happened.
*/
@@ -239,10 +242,10 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
/* Don't try to unroll loops where the number of iterations is not known
* at compile-time.
*/
- if (ir->normative_bound < 0)
+ if (ls->limiting_terminator == NULL)
return visit_continue;
- iterations = ir->normative_bound;
+ iterations = ls->limiting_terminator->iterations;
/* Don't try to unroll loops that have zillions of iterations either.
*/
@@ -256,10 +259,17 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
if (count.fail || count.nodes * iterations > (int)max_iterations * 5)
return visit_continue;
- if (ls->num_loop_jumps > 1)
+ /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
+ * We'll be removing the limiting terminator before we unroll.
+ */
+ assert(ls->num_loop_jumps > 0);
+ unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1;
+
+ if (predicted_num_loop_jumps > 1)
return visit_continue;
- if (ls->num_loop_jumps == 0) {
+ if (predicted_num_loop_jumps == 0) {
+ ls->limiting_terminator->ir->remove();
simple_unroll(ir, iterations);
return visit_continue;
}
@@ -274,6 +284,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
*/
last_ir->remove();
+ ls->limiting_terminator->ir->remove();
simple_unroll(ir, 1);
return visit_continue;
}
@@ -282,6 +293,12 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
/* recognize loops in the form produced by ir_lower_jumps */
ir_instruction *cur_ir = (ir_instruction *) node;
+ /* Skip the limiting terminator, since it will go away when we
+ * unroll.
+ */
+ if (cur_ir == ls->limiting_terminator->ir)
+ continue;
+
ir_if *ir_if = cur_ir->as_if();
if (ir_if != NULL) {
/* Determine which if-statement branch, if any, ends with a
@@ -296,6 +313,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
(ir_instruction *) ir_if->then_instructions.get_tail();
if (is_break(ir_if_last)) {
+ ls->limiting_terminator->ir->remove();
splice_post_if_instructions(ir_if, &ir_if->else_instructions);
ir_if_last->remove();
complex_unroll(ir, iterations, false);
@@ -305,6 +323,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
(ir_instruction *) ir_if->else_instructions.get_tail();
if (is_break(ir_if_last)) {
+ ls->limiting_terminator->ir->remove();
splice_post_if_instructions(ir_if, &ir_if->then_instructions);
ir_if_last->remove();
complex_unroll(ir, iterations, true);