diff options
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_shader.c')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 45 |
1 files changed, 40 insertions, 5 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5e5bf68..5ead940 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1014,7 +1014,7 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base, if (type == TGSI_TYPE_DOUBLE) { LLVMValueRef value2; dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, - lp_build_const_int32(gallivm, swizzle + 1)); + lp_build_const_int32(gallivm, 1)); value2 = build_indexed_load(ctx, ctx->lds, dw_addr, false); return radeon_llvm_emit_fetch_double(bld_base, value, value2); } @@ -1846,13 +1846,13 @@ static LLVMValueRef fetch_constant( result = bitcast(bld_base, type, result); else { LLVMValueRef addr2, result2; - addr2 = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1]; + addr2 = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle]; addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2"); addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16); addr2 = lp_build_add(&bld_base->uint_bld, addr2, - lp_build_const_int32(base->gallivm, idx * 4)); + lp_build_const_int32(base->gallivm, (idx + 1) * 4)); - result2 = buffer_load_const(base->gallivm->builder, ctx->const_buffers[buf], + result2 = buffer_load_const(base->gallivm->builder, bufp, addr2, ctx->f32); result = radeon_llvm_emit_fetch_double(bld_base, @@ -5072,7 +5072,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, } intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; - for (chan = 0; chan < 2; chan++) { + for (chan = 0; chan < 4; chan++) { LLVMValueRef args[4]; LLVMValueRef llvm_chan; unsigned schan; @@ -6567,6 +6567,41 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, radeon_llvm_dispose(&ctx.radeon_bld); + /* Validate SGPR and VGPR usage for compute to detect compiler bugs. + * LLVM 3.9svn has this bug. + */ + if (sel->type == PIPE_SHADER_COMPUTE) { + unsigned *props = sel->info.properties; + unsigned wave_size = 64; + unsigned max_vgprs = 256; + unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512; + unsigned max_sgprs_per_wave = 128; + unsigned min_waves_per_cu = + DIV_ROUND_UP(props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] * + props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] * + props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH], + wave_size); + unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4); + + max_vgprs = max_vgprs / min_waves_per_simd; + max_sgprs = MIN2(max_sgprs / min_waves_per_simd, max_sgprs_per_wave); + + if (shader->config.num_sgprs > max_sgprs || + shader->config.num_vgprs > max_vgprs) { + fprintf(stderr, "LLVM failed to compile a shader correctly: " + "SGPR:VGPR usage is %u:%u, but the hw limit is %u:%u\n", + shader->config.num_sgprs, shader->config.num_vgprs, + max_sgprs, max_vgprs); + + /* Just terminate the process, because dependent + * shaders can hang due to bad input data, but use + * the env var to allow shader-db to work. + */ + if (!debug_get_bool_option("SI_PASS_BAD_SHADERS", false)) + abort(); + } + } + /* Add the scratch offset to input SGPRs. */ if (shader->config.scratch_bytes_per_wave) shader->info.num_input_sgprs += 1; /* scratch byte offset */ |