1 files changed, 40 insertions, 5 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 5e5bf68..5ead940 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1014,7 +1014,7 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
 	if (type == TGSI_TYPE_DOUBLE) {
 		LLVMValueRef value2;
 		dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
-				       lp_build_const_int32(gallivm, swizzle + 1));
+				       lp_build_const_int32(gallivm, 1));
 		value2 = build_indexed_load(ctx, ctx->lds, dw_addr, false);
 		return radeon_llvm_emit_fetch_double(bld_base, value, value2);
 	}
@@ -1846,13 +1846,13 @@ static LLVMValueRef fetch_constant(
 		result = bitcast(bld_base, type, result);
 	else {
 		LLVMValueRef addr2, result2;
-		addr2 = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
+		addr2 = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
 		addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
 		addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
 		addr2 = lp_build_add(&bld_base->uint_bld, addr2,
-				     lp_build_const_int32(base->gallivm, idx * 4));
+				     lp_build_const_int32(base->gallivm, (idx + 1) * 4));
 
-		result2 = buffer_load_const(base->gallivm->builder, ctx->const_buffers[buf],
+		result2 = buffer_load_const(base->gallivm->builder, bufp,
 				   addr2, ctx->f32);
 
 		result = radeon_llvm_emit_fetch_double(bld_base,
@@ -5072,7 +5072,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
 	}
 
 	intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
-	for (chan = 0; chan < 2; chan++) {
+	for (chan = 0; chan < 4; chan++) {
 		LLVMValueRef args[4];
 		LLVMValueRef llvm_chan;
 		unsigned schan;
@@ -6567,6 +6567,41 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 
 	radeon_llvm_dispose(&ctx.radeon_bld);
 
+	/* Validate SGPR and VGPR usage for compute to detect compiler bugs.
+	 * LLVM 3.9svn has this bug.
+	 */
+	if (sel->type == PIPE_SHADER_COMPUTE) {
+		unsigned *props = sel->info.properties;
+		unsigned wave_size = 64;
+		unsigned max_vgprs = 256;
+		unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
+		unsigned max_sgprs_per_wave = 128;
+		unsigned min_waves_per_cu =
+			DIV_ROUND_UP(props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
+				     props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
+				     props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH],
+				     wave_size);
+		unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
+
+		max_vgprs = max_vgprs / min_waves_per_simd;
+		max_sgprs = MIN2(max_sgprs / min_waves_per_simd, max_sgprs_per_wave);
+
+		if (shader->config.num_sgprs > max_sgprs ||
+		    shader->config.num_vgprs > max_vgprs) {
+			fprintf(stderr, "LLVM failed to compile a shader correctly: "
+				"SGPR:VGPR usage is %u:%u, but the hw limit is %u:%u\n",
+				shader->config.num_sgprs, shader->config.num_vgprs,
+				max_sgprs, max_vgprs);
+
+			/* Just terminate the process, because dependent
+			 * shaders can hang due to bad input data, but use
+			 * the env var to allow shader-db to work.
+			 */
+			if (!debug_get_bool_option("SI_PASS_BAD_SHADERS", false))
+				abort();
+		}
+	}
+
 	/* Add the scratch offset to input SGPRs. */
 	if (shader->config.scratch_bytes_per_wave)
 		shader->info.num_input_sgprs += 1; /* scratch byte offset */