diff options
author | Marek Olšák <marek.olsak@amd.com> | 2016-11-29 19:25:03 +0100 |
---|---|---|
committer | Emil Velikov <emil.l.velikov@gmail.com> | 2016-12-14 19:03:11 +0000 |
commit | 40e16eac7535181adacf6fb583eb278380ef8dfd (patch) | |
tree | 77ae619628ac96d6c11fa663dc89c7a5909d07d9 /src | |
parent | 3ece25662945396689b20954d2278447841024cc (diff) | |
download | external_mesa3d-40e16eac7535181adacf6fb583eb278380ef8dfd.zip external_mesa3d-40e16eac7535181adacf6fb583eb278380ef8dfd.tar.gz external_mesa3d-40e16eac7535181adacf6fb583eb278380ef8dfd.tar.bz2 |
radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips
All codepaths are handled except for clover.
Cc: 13.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
(cherry picked from commit 72d48fcd8eb5862c72d27e5462c289c5de65396e)
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_compute.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 24 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 6 |
4 files changed, 29 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index a35187c..0845711 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -343,6 +343,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, lds_blocks += align(program->local_size, 512) >> 9; } + /* TODO: use si_multiwave_lds_size_workaround */ assert(lds_blocks <= 0xFF); config->rsrc2 &= C_00B84C_LDS_SIZE; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 00e78cf..0b02235 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -7741,11 +7741,31 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, return true; } -static void si_fix_num_sgprs(struct si_shader *shader) +void si_multiwave_lds_size_workaround(struct si_screen *sscreen, + unsigned *lds_size) +{ + /* SPI barrier management bug: + * Make sure we have at least 4k of LDS in use to avoid the bug. + * It applies to workgroup sizes of more than one wavefront. + */ + if (sscreen->b.family == CHIP_BONAIRE || + sscreen->b.family == CHIP_KABINI || + sscreen->b.family == CHIP_MULLINS) + *lds_size = MAX2(*lds_size, 8); +} + +static void si_fix_resource_usage(struct si_screen *sscreen, + struct si_shader *shader) { unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */ shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs); + + if (shader->selector->type == PIPE_SHADER_COMPUTE && + si_get_max_workgroup_size(shader) > 64) { + si_multiwave_lds_size_workaround(sscreen, + &shader->config.lds_size); + } } int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, @@ -7841,7 +7861,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, } } - si_fix_num_sgprs(shader); + si_fix_resource_usage(sscreen, shader); si_shader_dump(sscreen, shader, debug, sel->info.processor, stderr); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index b07210c..10bafca 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -482,6 +482,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug, unsigned processor, FILE *f); +void si_multiwave_lds_size_workaround(struct si_screen *sscreen, + unsigned *lds_size); void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, struct si_shader_config *config, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index d18137b..447acc1 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -162,11 +162,13 @@ static void si_emit_derived_tess_state(struct si_context *sctx, if (sctx->b.chip_class >= CIK) { assert(lds_size <= 65536); - ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512); + lds_size = align(lds_size, 512) / 512; } else { assert(lds_size <= 32768); - ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256); + lds_size = align(lds_size, 256) / 256; } + si_multiwave_lds_size_workaround(sctx->screen, &lds_size); + ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size); if (sctx->last_ls == ls->current && sctx->last_tcs == tcs && |