diff options
Diffstat (limited to 'src/mesa/drivers/dri')
32 files changed, 442 insertions, 187 deletions
diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c index e5a3f00..5607d5b 100644 --- a/src/mesa/drivers/dri/i915/intel_context.c +++ b/src/mesa/drivers/dri/i915/intel_context.c @@ -858,6 +858,7 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable) struct __DRIimageList images; unsigned int format; uint32_t buffer_mask = 0; + int ret; front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); @@ -877,12 +878,14 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable) if (back_rb) buffer_mask |= __DRI_IMAGE_BUFFER_BACK; - (*screen->image.loader->getBuffers) (drawable, - driGLFormatToImageFormat(format), - &drawable->dri2.stamp, - drawable->loaderPrivate, - buffer_mask, - &images); + ret = screen->image.loader->getBuffers(drawable, + driGLFormatToImageFormat(format), + &drawable->dri2.stamp, + drawable->loaderPrivate, + buffer_mask, + &images); + if (!ret) + return; if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) { drawable->w = images.front->width; diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index f448551..194b412 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -48,6 +48,7 @@ i965_compiler_FILES = \ brw_nir_attribute_workarounds.c \ brw_nir_intrinsics.c \ brw_nir_opt_peephole_ffma.c \ + brw_nir_tcs_workarounds.c \ brw_packed_float.c \ brw_predicated_break.cpp \ brw_reg.h \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 9590968..6be82c5 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -167,7 +167,8 @@ nir_uniform_type_size(const struct glsl_type *type) } const unsigned * -brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir, +brw_blorp_compile_nir_shader(struct brw_context *brw, void *mem_ctx, + struct nir_shader *nir, const struct brw_wm_prog_key *wm_key, bool use_repclear, struct brw_blorp_prog_data *prog_data, @@ -175,13 +176,6 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir, { const struct brw_compiler *compiler = brw->intelScreen->compiler; - void *mem_ctx = ralloc_context(NULL); - - /* Calling brw_preprocess_nir and friends is destructive and, if cloning is - * enabled, may end up completely replacing the nir_shader. Therefore, we - * own it and might as well put it in our context for easy cleanup. - */ - ralloc_steal(mem_ctx, nir); nir->options = compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions; diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 7ec5875..133a8ac 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -366,7 +366,8 @@ struct brw_blorp_blit_prog_key void brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key); const unsigned * -brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir, +brw_blorp_compile_nir_shader(struct brw_context *brw, void *mem_ctx, + struct nir_shader *nir, const struct brw_wm_prog_key *wm_key, bool use_repclear, struct brw_blorp_prog_data *prog_data, diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 782d285..db94f33 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1296,7 +1296,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos, * of samples). */ static nir_shader * -brw_blorp_build_nir_shader(struct brw_context *brw, +brw_blorp_build_nir_shader(struct brw_context *brw, void *mem_ctx, const brw_blorp_blit_prog_key *key) { nir_ssa_def *src_pos, *dst_pos, *color; @@ -1342,7 +1342,7 @@ brw_blorp_build_nir_shader(struct brw_context *brw, (key->dst_samples == 0)); nir_builder b; - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL); struct brw_blorp_blit_vars v; brw_blorp_blit_vars_init(&b, &v, key); @@ -1505,6 +1505,8 @@ brw_blorp_get_blit_kernel(struct brw_context *brw, ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) return; + void *mem_ctx = ralloc_context(NULL); + const unsigned *program; unsigned program_size; struct brw_blorp_prog_data prog_data; @@ -1512,7 +1514,7 @@ brw_blorp_get_blit_kernel(struct brw_context *brw, /* Try and compile with NIR first. If that fails, fall back to the old * method of building shaders manually. */ - nir_shader *nir = brw_blorp_build_nir_shader(brw, prog_key); + nir_shader *nir = brw_blorp_build_nir_shader(brw, mem_ctx, prog_key); struct brw_wm_prog_key wm_key; brw_blorp_init_wm_prog_key(&wm_key); wm_key.tex.compressed_multisample_layout_mask = @@ -1520,7 +1522,7 @@ brw_blorp_get_blit_kernel(struct brw_context *brw, wm_key.tex.msaa_16 = prog_key->tex_samples == 16; wm_key.multisample_fbo = prog_key->rt_samples > 1; - program = brw_blorp_compile_nir_shader(brw, nir, &wm_key, false, + program = brw_blorp_compile_nir_shader(brw, mem_ctx, nir, &wm_key, false, &prog_data, &program_size); brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, @@ -1528,6 +1530,8 @@ brw_blorp_get_blit_kernel(struct brw_context *brw, program, program_size, &prog_data, sizeof(prog_data), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); + + ralloc_free(mem_ctx); } static void diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp index 2515a04..6400218 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp @@ -64,7 +64,7 @@ brw_blorp_params_get_clear_kernel(struct brw_context *brw, void *mem_ctx = ralloc_context(NULL); nir_builder b; - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL); b.shader->info.name = ralloc_strdup(b.shader, "BLORP-clear"); nir_variable *u_color = nir_variable_create(b.shader, nir_var_uniform, @@ -84,7 +84,8 @@ brw_blorp_params_get_clear_kernel(struct brw_context *brw, struct brw_blorp_prog_data prog_data; unsigned program_size; const unsigned *program = - brw_blorp_compile_nir_shader(brw, b.shader, &wm_key, use_replicated_data, + brw_blorp_compile_nir_shader(brw, mem_ctx, + b.shader, &wm_key, use_replicated_data, &prog_data, &program_size); brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 10e9f47..7d15c28 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -220,6 +220,8 @@ struct brw_tcs_prog_key /** A bitfield of per-vertex outputs written. */ uint64_t outputs_written; + bool quads_workaround; + struct brw_sampler_prog_key_data tex; }; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 1cb99da..2af42e0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5681,7 +5681,7 @@ fs_visitor::setup_gs_payload() * have to multiply by VerticesIn to obtain the total storage requirement. */ if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in > - max_push_components) { + max_push_components || gs_prog_data->invocations > 1) { gs_prog_data->base.include_vue_handles = true; /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 11c078a..91763d3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2322,23 +2322,23 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, break; fs_reg m0 = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - fs_reg m0_2 = byte_offset(m0, 2 * sizeof(uint32_t)); + fs_reg m0_2 = component(m0, 2); - const fs_builder fwa_bld = bld.exec_all(); + const fs_builder chanbld = bld.exec_all().group(1, 0); /* Zero the message header */ - fwa_bld.MOV(m0, brw_imm_ud(0u)); + bld.exec_all().MOV(m0, brw_imm_ud(0u)); /* Copy "Barrier ID" from r0.2, bits 16:13 */ - fwa_bld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), + chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(INTEL_MASK(16, 13))); /* Shift it up to bits 27:24. */ - fwa_bld.SHL(m0_2, m0_2, brw_imm_ud(11)); + chanbld.SHL(m0_2, m0_2, brw_imm_ud(11)); /* Set the Barrier Count and the enable bit */ - fwa_bld.OR(m0_2, m0_2, - brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15))); + chanbld.OR(m0_2, m0_2, + brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15))); bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0); break; @@ -4060,12 +4060,23 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, dest = get_nir_dest(instr->dest); fs_reg surface = brw_imm_ud(GEN7_BTI_SLM); - fs_reg offset = get_nir_src(instr->src[0]); + fs_reg offset; fs_reg data1 = get_nir_src(instr->src[1]); fs_reg data2; if (op == BRW_AOP_CMPWR) data2 = get_nir_src(instr->src[2]); + /* Get the offset */ + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); + } else { + offset = vgrf(glsl_type::uint_type); + bld.ADD(offset, + retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0])); + } + /* Emit the actual atomic operation operation */ fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset, diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 74c354f..6185310 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -117,6 +117,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir, bool brw_nir_apply_trig_workarounds(nir_shader *nir); +void brw_nir_apply_tcs_quads_workaround(nir_shader *nir); + nir_shader *brw_nir_apply_sampler_key(nir_shader *nir, const struct brw_device_info *devinfo, const struct brw_sampler_prog_key_data *key, diff --git a/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c new file mode 100644 index 0000000..0626981 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c @@ -0,0 +1,152 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "compiler/nir/nir_builder.h" +#include "brw_nir.h" + +/** + * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8). + * + * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the + * definition of the patch header layouts): + * + * "HW Bug: The Tessellation stage will incorrectly add domain points + * along patch edges under the following conditions, which may result + * in conformance failures and/or cracking artifacts: + * + * * QUAD domain + * * INTEGER partitioning + * * All three TessFactors in a given U or V direction (e.g., V + * direction: UEQ0, InsideV, UEQ1) are all exactly 1.0 + * * All three TessFactors in the other direction are > 1.0 and all + * round up to the same integer value (e.g, U direction: + * VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4) + * + * The suggested workaround (to be implemented as part of the postamble + * to the HS shader in the HS kernel) is: + * + * if ( + * (TF[UEQ0] > 1.0) || + * (TF[VEQ0] > 1.0) || + * (TF[UEQ1] > 1.0) || + * (TF[VEQ1] > 1.0) || + * (TF[INSIDE_U] > 1.0) || + * (TF[INSIDE_V] > 1.0) ) + * { + * TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U]; + * TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V]; + * }" + * + * There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes + * that the above workaround fails to fix certain GL/ES CTS tests which + * have inside tessellation factors of -1.0. This can be explained by + * a quote from the ARB_tessellation_shader specification: + * + * "If "equal_spacing" is used, the floating-point tessellation level is + * first clamped to the range [1,<max>], where <max> is implementation- + * dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)." + * + * In other words, the actual inner tessellation factor used is + * clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped + * value against 1.0. To accomplish this, we change the comparison from + * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0). + */ + +static inline nir_ssa_def * +load_output(nir_builder *b, int num_components, int offset) +{ + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output); + nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL); + load->num_components = num_components; + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_intrinsic_set_base(load, offset); + + nir_builder_instr_insert(b, &load->instr); + + return &load->dest.ssa; +} + +static inline void +store_output(nir_builder *b, nir_ssa_def *value, int offset, unsigned comps) +{ + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); + store->num_components = comps; + nir_intrinsic_set_write_mask(store, (1u << comps) - 1); + store->src[0] = nir_src_for_ssa(value); + store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_builder_instr_insert(b, &store->instr); +} + +static void +emit_quads_workaround(nir_builder *b, nir_block *block) +{ + /* We're going to insert a new if-statement in a predecessor of the end + * block. This would normally create a new block (after the if) which + * would then become the predecessor of the end block, causing our set + * walking to get screwed up. To avoid this, just emit a constant at + * the end of our current block, and insert the if before that. + */ + b->cursor = nir_after_block_before_jump(block); + b->cursor = nir_before_instr(nir_imm_int(b, 0)->parent_instr); + + nir_ssa_def *inner = load_output(b, 2, 0); + nir_ssa_def *outer = load_output(b, 4, 1); + + nir_ssa_def *any_greater_than_1 = + nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)), + nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner))); + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(any_greater_than_1); + nir_builder_cf_insert(b, &if_stmt->cf_node); + + /* Fill out the new then-block */ + b->cursor = nir_after_cf_list(&if_stmt->then_list); + + store_output(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner), + nir_imm_float(b, 2.0f), inner), 0, 2); +} + +void +brw_nir_apply_tcs_quads_workaround(nir_shader *nir) +{ + assert(nir->stage == MESA_SHADER_TESS_CTRL); + + nir_foreach_function(func, nir) { + if (!func->impl) + continue; + + nir_builder b; + nir_builder_init(&b, func->impl); + + struct set_entry *entry; + set_foreach(func->impl->end_block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + emit_quads_workaround(&b, pred); + } + + nir_metadata_preserve(func->impl, 0); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c b/src/mesa/drivers/dri/i965/brw_performance_monitor.c index a91c6e2..a42a322 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c +++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c @@ -686,12 +686,12 @@ stop_oa_counters(struct brw_context *brw) * The amount of batch space it takes to emit an MI_REPORT_PERF_COUNT snapshot, * including the required PIPE_CONTROL flushes. * - * Sandybridge is the worst case scenario: brw_emit_mi_flush - * expands to three PIPE_CONTROLs which are 4 DWords each. We have to flush - * before and after MI_REPORT_PERF_COUNT, so multiply by two. Finally, add - * the 3 DWords for MI_REPORT_PERF_COUNT itself. + * Sandybridge is the worst case scenario: brw_emit_mi_flush expands to four + * PIPE_CONTROLs which are 5 DWords each. We have to flush before and after + * MI_REPORT_PERF_COUNT, so multiply by two. Finally, add the 3 DWords for + * MI_REPORT_PERF_COUNT itself. */ -#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (3 * 4) + 3) +#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (4 * 5) + 3) /** * Emit an MI_REPORT_PERF_COUNT command packet. diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index 4672efd..d51cf1b 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -96,10 +96,38 @@ gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags) void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags) { + if (brw->gen >= 6 && + (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) && + (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) { + /* A pipe control command with flush and invalidate bits set + * simultaneously is an inherently racy operation on Gen6+ if the + * contents of the flushed caches were intended to become visible from + * any of the invalidated caches. Split it in two PIPE_CONTROLs, the + * first one should stall the pipeline to make sure that the flushed R/W + * caches are coherent with memory once the specified R/O caches are + * invalidated. On pre-Gen6 hardware the (implicit) R/O cache + * invalidation seems to happen at the bottom of the pipeline together + * with any write cache flush, so this shouldn't be a concern. + */ + brw_emit_pipe_control_flush(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) | + PIPE_CONTROL_CS_STALL); + flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL); + } + if (brw->gen >= 8) { if (brw->gen == 8) gen8_add_cs_stall_workaround_bits(&flags); + if (brw->gen == 9 && + (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { + /* Hardware workaround: SKL + * + * Emit Pipe Control with all bits set to zero before emitting + * a Pipe Control with VF Cache Invalidate set. + */ + brw_emit_pipe_control_flush(brw, 0); + } + BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); OUT_BATCH(flags); @@ -311,15 +339,6 @@ brw_emit_mi_flush(struct brw_context *brw) } else { int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH; if (brw->gen >= 6) { - if (brw->gen == 9) { - /* Hardware workaround: SKL - * - * Emit Pipe Control with all bits set to zero before emitting - * a Pipe Control with VF Cache Invalidate set. - */ - brw_emit_pipe_control_flush(brw, 0); - } - flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_VF_CACHE_INVALIDATE | diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c index 8a5dd7e..6b7fde2 100644 --- a/src/mesa/drivers/dri/i965/brw_tcs.c +++ b/src/mesa/drivers/dri/i965/brw_tcs.c @@ -153,6 +153,8 @@ brw_tcs_debug_recompile(struct brw_context *brw, key->patch_outputs_written); found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode, key->tes_primitive_mode); + found |= key_debug(brw, "quads and equal_spacing workaround", + old_key->quads_workaround, key->quads_workaround); found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); if (!found) { @@ -346,6 +348,9 @@ brw_upload_tcs_prog(struct brw_context *brw, * based on the domain the DS is expecting to tessellate. */ key.tes_primitive_mode = tep->program.PrimitiveMode; + key.quads_workaround = brw->gen < 9 && + tep->program.PrimitiveMode == GL_QUADS && + tep->program.Spacing == GL_EQUAL; if (tcp) { key.program_string_id = tcp->id; @@ -383,6 +388,8 @@ brw_tcs_precompile(struct gl_context *ctx, struct gl_tess_ctrl_program *tcp = (struct gl_tess_ctrl_program *)prog; struct brw_tess_ctrl_program *btcp = brw_tess_ctrl_program(tcp); + const struct gl_shader *tes = + shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; memset(&key, 0, sizeof(key)); @@ -393,9 +400,14 @@ brw_tcs_precompile(struct gl_context *ctx, if (brw->gen < 8) key.input_vertices = shader_prog->TessCtrl.VerticesOut; - key.tes_primitive_mode = - shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] ? - shader_prog->TessEval.PrimitiveMode : GL_TRIANGLES; + if (tes) { + key.tes_primitive_mode = shader_prog->TessEval.PrimitiveMode; + key.quads_workaround = brw->gen < 9 && + shader_prog->TessEval.PrimitiveMode == GL_QUADS && + shader_prog->TessEval.Spacing == GL_EQUAL; + } else { + key.tes_primitive_mode = GL_TRIANGLES; + } key.outputs_written = prog->OutputsWritten; key.patch_outputs_written = prog->PatchOutputsWritten; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 162b481..a7398a7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -354,95 +354,97 @@ vec4_visitor::opt_vector_float() { bool progress = false; - int last_reg = -1, last_reg_offset = -1; - enum brw_reg_file last_reg_file = BAD_FILE; + foreach_block(block, cfg) { + int last_reg = -1, last_reg_offset = -1; + enum brw_reg_file last_reg_file = BAD_FILE; + + uint8_t imm[4] = { 0 }; + int inst_count = 0; + vec4_instruction *imm_inst[4]; + unsigned writemask = 0; + enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F; + + foreach_inst_in_block_safe(vec4_instruction, inst, block) { + int vf = -1; + enum brw_reg_type need_type; + + /* Look for unconditional MOVs from an immediate with a partial + * writemask. Skip type-conversion MOVs other than integer 0, + * where the type doesn't matter. See if the immediate can be + * represented as a VF. + */ + if (inst->opcode == BRW_OPCODE_MOV && + inst->src[0].file == IMM && + inst->predicate == BRW_PREDICATE_NONE && + inst->dst.writemask != WRITEMASK_XYZW && + (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) { + + vf = brw_float_to_vf(inst->src[0].d); + need_type = BRW_REGISTER_TYPE_D; + + if (vf == -1) { + vf = brw_float_to_vf(inst->src[0].f); + need_type = BRW_REGISTER_TYPE_F; + } + } else { + last_reg = -1; + } - uint8_t imm[4] = { 0 }; - int inst_count = 0; - vec4_instruction *imm_inst[4]; - unsigned writemask = 0; - enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F; + /* If this wasn't a MOV, or the destination register doesn't match, + * or we have to switch destination types, then this breaks our + * sequence. Combine anything we've accumulated so far. + */ + if (last_reg != inst->dst.nr || + last_reg_offset != inst->dst.reg_offset || + last_reg_file != inst->dst.file || + (vf > 0 && dest_type != need_type)) { + + if (inst_count > 1) { + unsigned vf; + memcpy(&vf, imm, sizeof(vf)); + vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf)); + mov->dst.type = dest_type; + mov->dst.writemask = writemask; + inst->insert_before(block, mov); + + for (int i = 0; i < inst_count; i++) { + imm_inst[i]->remove(block); + } - foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { - int vf = -1; - enum brw_reg_type need_type; + progress = true; + } - /* Look for unconditional MOVs from an immediate with a partial - * writemask. Skip type-conversion MOVs other than integer 0, - * where the type doesn't matter. See if the immediate can be - * represented as a VF. - */ - if (inst->opcode == BRW_OPCODE_MOV && - inst->src[0].file == IMM && - inst->predicate == BRW_PREDICATE_NONE && - inst->dst.writemask != WRITEMASK_XYZW && - (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) { - - vf = brw_float_to_vf(inst->src[0].d); - need_type = BRW_REGISTER_TYPE_D; - - if (vf == -1) { - vf = brw_float_to_vf(inst->src[0].f); - need_type = BRW_REGISTER_TYPE_F; - } - } else { - last_reg = -1; - } + inst_count = 0; + last_reg = -1; + writemask = 0; + dest_type = BRW_REGISTER_TYPE_F; - /* If this wasn't a MOV, or the destination register doesn't match, - * or we have to switch destination types, then this breaks our - * sequence. Combine anything we've accumulated so far. - */ - if (last_reg != inst->dst.nr || - last_reg_offset != inst->dst.reg_offset || - last_reg_file != inst->dst.file || - (vf > 0 && dest_type != need_type)) { - - if (inst_count > 1) { - unsigned vf; - memcpy(&vf, imm, sizeof(vf)); - vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf)); - mov->dst.type = dest_type; - mov->dst.writemask = writemask; - inst->insert_before(block, mov); - - for (int i = 0; i < inst_count; i++) { - imm_inst[i]->remove(block); + for (int i = 0; i < 4; i++) { + imm[i] = 0; } - - progress = true; } - inst_count = 0; - last_reg = -1; - writemask = 0; - dest_type = BRW_REGISTER_TYPE_F; - - for (int i = 0; i < 4; i++) { - imm[i] = 0; + /* Record this instruction's value (if it was representable). */ + if (vf != -1) { + if ((inst->dst.writemask & WRITEMASK_X) != 0) + imm[0] = vf; + if ((inst->dst.writemask & WRITEMASK_Y) != 0) + imm[1] = vf; + if ((inst->dst.writemask & WRITEMASK_Z) != 0) + imm[2] = vf; + if ((inst->dst.writemask & WRITEMASK_W) != 0) + imm[3] = vf; + + writemask |= inst->dst.writemask; + imm_inst[inst_count++] = inst; + + last_reg = inst->dst.nr; + last_reg_offset = inst->dst.reg_offset; + last_reg_file = inst->dst.file; + if (vf > 0) + dest_type = need_type; } } - - /* Record this instruction's value (if it was representable). */ - if (vf != -1) { - if ((inst->dst.writemask & WRITEMASK_X) != 0) - imm[0] = vf; - if ((inst->dst.writemask & WRITEMASK_Y) != 0) - imm[1] = vf; - if ((inst->dst.writemask & WRITEMASK_Z) != 0) - imm[2] = vf; - if ((inst->dst.writemask & WRITEMASK_W) != 0) - imm[3] = vf; - - writemask |= inst->dst.writemask; - imm_inst[inst_count++] = inst; - - last_reg = inst->dst.nr; - last_reg_offset = inst->dst.reg_offset; - last_reg_file = inst->dst.file; - if (vf > 0) - dest_type = need_type; - } } if (progress) @@ -1109,7 +1111,7 @@ vec4_visitor::opt_register_coalesce() /* Can't coalesce this GRF if someone else was going to * read it later. */ - if (var_range_end(var_from_reg(alloc, inst->src[0]), 4) > ip) + if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 4) > ip) continue; /* We need to check interference with the final destination between this diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index 0c1f0c3..10898a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -246,7 +246,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) * more -- a sure sign they'll fail operands_match(). */ if (src->file == VGRF) { - if (var_range_end(var_from_reg(alloc, *src), 4) < ip) { + if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 4) < ip) { entry->remove(); ralloc_free(entry); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 927438f..26a910c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -59,7 +59,10 @@ vec4_gs_visitor::make_reg_for_system_value(int location) switch (location) { case SYSTEM_VALUE_INVOCATION_ID: this->current_annotation = "initialize gl_InvocationID"; - emit(GS_OPCODE_GET_INSTANCE_ID, *reg); + if (gs_prog_data->invocations > 1) + emit(GS_OPCODE_GET_INSTANCE_ID, *reg); + else + emit(MOV(*reg, brw_imm_ud(0))); break; default: unreachable("not reached"); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index f61c612..5440dba 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -451,6 +451,9 @@ brw_compile_tcs(const struct brw_compiler *compiler, nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map); brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map); + if (key->quads_workaround) + brw_nir_apply_tcs_quads_workaround(nir); + nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); if (is_scalar) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 609285e..61ada53 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1443,10 +1443,12 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw) /* _NEW_PROGRAM */ struct gl_shader_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + /* BRW_NEW_CS_PROG_DATA */ + const struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data; - if (prog && brw->cs.prog_data->uses_num_work_groups) { + if (prog && cs_prog_data->uses_num_work_groups) { const unsigned surf_idx = - brw->cs.prog_data->binding_table.work_groups_start; + cs_prog_data->binding_table.work_groups_start; uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx]; drm_intel_bo *bo; uint32_t bo_offset; @@ -1475,6 +1477,7 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw) const struct brw_tracked_state brw_cs_work_groups_surface = { .dirty = { .brw = BRW_NEW_BLORP | + BRW_NEW_CS_PROG_DATA | BRW_NEW_CS_WORK_GROUPS }, .emit = brw_upload_cs_work_groups_surface, diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index 26de633..64ccdb6 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -50,6 +50,7 @@ upload_clip_state(struct brw_context *brw) dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE; } + /* BRW_NEW_VS_PROG_DATA */ dw1 |= brw->vs.prog_data->base.cull_distance_mask; if (brw->gen >= 7) @@ -224,6 +225,7 @@ const struct brw_tracked_state gen7_clip_state = { BRW_NEW_CONTEXT | BRW_NEW_FS_PROG_DATA | BRW_NEW_GEOMETRY_PROGRAM | + BRW_NEW_VS_PROG_DATA | BRW_NEW_META_IN_PROGRESS | BRW_NEW_PRIMITIVE | BRW_NEW_RASTERIZER_DISCARD | diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c index 5427fa5..b245226 100644 --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c @@ -283,7 +283,7 @@ gen7_upload_cs_push_constants(struct brw_context *brw) (struct brw_compute_program *) brw->compute_program; if (cp) { - /* CACHE_NEW_CS_PROG */ + /* BRW_NEW_CS_PROG_DATA */ struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data; brw_upload_cs_push_constants(brw, &cp->program.Base, cs_prog_data, @@ -297,6 +297,7 @@ const struct brw_tracked_state gen7_cs_push_constants = { .brw = BRW_NEW_BATCH | BRW_NEW_BLORP | BRW_NEW_COMPUTE_PROGRAM | + BRW_NEW_CS_PROG_DATA | BRW_NEW_PUSH_CONSTANT_ALLOCATION, }, .emit = gen7_upload_cs_push_constants, diff --git a/src/mesa/drivers/dri/i965/gen8_ds_state.c b/src/mesa/drivers/dri/i965/gen8_ds_state.c index 6f01abb..3b79b55 100644 --- a/src/mesa/drivers/dri/i965/gen8_ds_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ds_state.c @@ -69,6 +69,7 @@ gen8_upload_ds_state(struct brw_context *brw) GEN7_DS_SIMD8_DISPATCH_ENABLE : 0) | (tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ? GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0)); + /* _NEW_TRANSFORM */ OUT_BATCH(SET_FIELD(ctx->Transform.ClipPlanesEnabled, GEN8_DS_USER_CLIP_DISTANCE) | SET_FIELD(vue_prog_data->cull_distance_mask, @@ -106,7 +107,7 @@ gen8_upload_ds_state(struct brw_context *brw) const struct brw_tracked_state gen8_ds_state = { .dirty = { - .mesa = 0, + .mesa = _NEW_TRANSFORM, .brw = BRW_NEW_BATCH | BRW_NEW_BLORP | BRW_NEW_TESS_PROGRAMS | diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 8a904fe..f916d99 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -124,6 +124,7 @@ const struct brw_tracked_state gen8_ps_extra = { .mesa = _NEW_BUFFERS | _NEW_COLOR, .brw = BRW_NEW_BLORP | BRW_NEW_CONTEXT | + BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA, }, .emit = upload_ps_extra, @@ -283,7 +284,6 @@ const struct brw_tracked_state gen8_ps_state = { .mesa = _NEW_MULTISAMPLE, .brw = BRW_NEW_BATCH | BRW_NEW_BLORP | - BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA, }, .emit = upload_ps_state, diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index aa1dc38..67e8e8f 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -21,13 +21,13 @@ extern "C" { * - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes) * - Disabling OA counters on Gen6+ (3 DWords = 12 bytes) * - Ending MI_REPORT_PERF_COUNT on Gen5+, plus associated PIPE_CONTROLs: - * - Two sets of PIPE_CONTROLs, which become 3 PIPE_CONTROLs each on SNB, - * which are 5 DWords each ==> 2 * 3 * 5 * 4 = 120 bytes + * - Two sets of PIPE_CONTROLs, which become 4 PIPE_CONTROLs each on SNB, + * which are 5 DWords each ==> 2 * 4 * 5 * 4 = 160 bytes * - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+. ==> 12 bytes. * On Ironlake, it's 6 DWords, but we have some slack due to the lack of * Sandybridge PIPE_CONTROL madness. - * - CC_STATE workaround on HSW (12 * 4 = 48 bytes) - * - 5 dwords for initial mi_flush + * - CC_STATE workaround on HSW (17 * 4 = 68 bytes) + * - 10 dwords for initial mi_flush * - 2 dwords for CC state setup * - 5 dwords for the required pipe control at the end * - Restoring L3 configuration: (24 dwords = 96 bytes) @@ -35,7 +35,7 @@ extern "C" { * - 7 dwords for L3 configuration set-up. * - 5 dwords for L3 atomic set-up (on HSW). */ -#define BATCH_RESERVED 248 +#define BATCH_RESERVED 308 struct intel_batchbuffer; diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 939f9a0..8a0d2ad 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -374,6 +374,19 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, if (!irb->mt) return; + /* Adjust the miptree's upper-left coordinate. + * + * FIXME: Adjusting the miptree's layout outside of + * intel_miptree_create_layout() is fragile. Plumb the adjustment through + * intel_miptree_create_layout() and brw_tex_layout(). + */ + irb->mt->level[0].level_x = image->tile_x; + irb->mt->level[0].level_y = image->tile_y; + irb->mt->level[0].slice[0].x_offset = image->tile_x; + irb->mt->level[0].slice[0].y_offset = image->tile_y; + irb->mt->total_width += image->tile_x; + irb->mt->total_height += image->tile_y; + rb->InternalFormat = image->internal_format; rb->Width = image->width; rb->Height = image->height; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index b6265dc..e74a2dc 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -366,25 +366,8 @@ intel_miptree_create_layout(struct brw_context *brw, _mesa_get_format_name(format), first_level, last_level, depth0, mt); - if (target == GL_TEXTURE_1D_ARRAY) { - /* For a 1D Array texture the OpenGL API will treat the height0 - * parameter as the number of array slices. For Intel hardware, we treat - * the 1D array as a 2D Array with a height of 1. - * - * So, when we first come through this path to create a 1D Array - * texture, height0 stores the number of slices, and depth0 is 1. In - * this case, we want to swap height0 and depth0. - * - * Since some miptrees will be created based on the base miptree, we may - * come through this path and see height0 as 1 and depth0 being the - * number of slices. In this case we don't need to do the swap. - */ - assert(height0 == 1 || depth0 == 1); - if (height0 > 1) { - depth0 = height0; - height0 = 1; - } - } + if (target == GL_TEXTURE_1D_ARRAY) + assert(height0 == 1); mt->target = target; mt->format = format; @@ -1050,6 +1033,7 @@ intel_get_image_dims(struct gl_texture_image *image, * as a 2D Array with a height of 1. So, here we want to swap image * height and depth. */ + assert(image->Depth == 1); *width = image->Width; *height = 1; *depth = image->Height; diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index a486d6e..cacd7e2 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -110,22 +110,6 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, if (ctx->_ImageTransferState) return false; - /* This renderbuffer can come from a texture. In this case, we impose - * some of the same restrictions we have for textures and adjust for - * miplevels. - */ - if (rb->TexImage) { - if (rb->TexImage->TexObject->Target != GL_TEXTURE_2D && - rb->TexImage->TexObject->Target != GL_TEXTURE_RECTANGLE) - return false; - - int level = rb->TexImage->Level + rb->TexImage->TexObject->MinLevel; - - /* Adjust x and y offset based on miplevel */ - xoffset += irb->mt->level[level].level_x; - yoffset += irb->mt->level[level].level_y; - } - /* It is possible that the renderbuffer (or underlying texture) is * multisampled. Since ReadPixels from a multisampled buffer requires a * multisample resolve, we can't handle this here @@ -169,6 +153,9 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, return false; } + xoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].x_offset; + yoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].y_offset; + dst_pitch = _mesa_image_row_stride(pack, width, format, type); /* For a window-system renderbuffer, the buffer is actually flipped @@ -201,7 +188,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp, - bo->virtual, + bo->virtual + irb->mt->offset, dst_pitch, irb->mt->pitch, brw->has_swizzling, irb->mt->tiling, diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h index 95365fe..7a82be4 100644 --- a/src/mesa/drivers/dri/i965/intel_reg.h +++ b/src/mesa/drivers/dri/i965/intel_reg.h @@ -134,6 +134,15 @@ #define PIPE_CONTROL_PPGTT_WRITE (0 << 2) #define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) +#define PIPE_CONTROL_CACHE_FLUSH_BITS \ + (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \ + PIPE_CONTROL_RENDER_TARGET_FLUSH) + +#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \ + (PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \ + PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ + PIPE_CONTROL_INSTRUCTION_INVALIDATE) + /** @} */ #define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22)) diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c index 3e359a5..39c9636 100644 --- a/src/mesa/drivers/dri/i965/intel_syncobj.c +++ b/src/mesa/drivers/dri/i965/intel_syncobj.c @@ -49,6 +49,7 @@ struct brw_fence { /** The fence waits for completion of this batch. */ drm_intel_bo *batch_bo; + mtx_t mutex; bool signalled; }; @@ -58,10 +59,20 @@ struct intel_gl_sync_object { }; static void +brw_fence_init(struct brw_context *brw, struct brw_fence *fence) +{ + fence->brw = brw; + fence->batch_bo = NULL; + mtx_init(&fence->mutex, mtx_plain); +} + +static void brw_fence_finish(struct brw_fence *fence) { if (fence->batch_bo) drm_intel_bo_unreference(fence->batch_bo); + + mtx_destroy(&fence->mutex); } static void @@ -77,7 +88,7 @@ brw_fence_insert(struct brw_context *brw, struct brw_fence *fence) } static bool -brw_fence_has_completed(struct brw_fence *fence) +brw_fence_has_completed_locked(struct brw_fence *fence) { if (fence->signalled) return true; @@ -92,13 +103,21 @@ brw_fence_has_completed(struct brw_fence *fence) return false; } -/** - * Return true if the function successfully signals or has already signalled. - * (This matches the behavior expected from __DRI2fence::client_wait_sync). - */ static bool -brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence, - uint64_t timeout) +brw_fence_has_completed(struct brw_fence *fence) +{ + bool ret; + + mtx_lock(&fence->mutex); + ret = brw_fence_has_completed_locked(fence); + mtx_unlock(&fence->mutex); + + return ret; +} + +static bool +brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence, + uint64_t timeout) { if (fence->signalled) return true; @@ -123,6 +142,23 @@ brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence, return true; } +/** + * Return true if the function successfully signals or has already signalled. + * (This matches the behavior expected from __DRI2fence::client_wait_sync). + */ +static bool +brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence, + uint64_t timeout) +{ + bool ret; + + mtx_lock(&fence->mutex); + ret = brw_fence_client_wait_locked(brw, fence, timeout); + mtx_unlock(&fence->mutex); + + return ret; +} + static void brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence) { @@ -161,6 +197,7 @@ intel_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *s, struct brw_context *brw = brw_context(ctx); struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s; + brw_fence_init(brw, &sync->fence); brw_fence_insert(brw, &sync->fence); } @@ -215,7 +252,7 @@ intel_dri_create_fence(__DRIcontext *ctx) if (!fence) return NULL; - fence->brw = brw; + brw_fence_init(brw, fence); brw_fence_insert(brw, fence); return fence; @@ -244,6 +281,12 @@ intel_dri_server_wait_sync(__DRIcontext *ctx, void *driver_fence, unsigned flags { struct brw_fence *fence = driver_fence; + /* We might be called here with a NULL fence as a result of WaitSyncKHR + * on a EGL_KHR_reusable_sync fence. Nothing to do here in such case. + */ + if (!fence) + return; + brw_fence_server_wait(fence->brw, fence); } diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index cac33ac..a1364b9 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -140,6 +140,8 @@ intel_alloc_texture_storage(struct gl_context *ctx, !intel_miptree_match_image(intel_texobj->mt, first_image) || intel_texobj->mt->last_level != levels - 1) { intel_miptree_release(&intel_texobj->mt); + + intel_get_image_dims(first_image, &width, &height, &depth); intel_texobj->mt = intel_miptree_create(brw, texobj->Target, first_image->TexFormat, 0, levels - 1, diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_frag.c b/src/mesa/drivers/dri/nouveau/nv20_state_frag.c index 492ecdc..2c5c2db 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_state_frag.c +++ b/src/mesa/drivers/dri/nouveau/nv20_state_frag.c @@ -67,5 +67,5 @@ nv20_emit_frag(struct gl_context *ctx, int emit) PUSH_DATA (push, in >> 32); BEGIN_NV04(push, NV20_3D(RC_ENABLE), 1); - PUSH_DATA (push, n); + PUSH_DATA (push, MAX2(1, n)); } diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index 2d4bb70..6e006f8 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -484,14 +484,14 @@ swrast_map_renderbuffer(struct gl_context *ctx, xrb->map_mode = mode; xrb->map_x = x; - xrb->map_y = y; + xrb->map_y = rb->Height - y - h; xrb->map_w = w; xrb->map_h = h; stride = w * cpp; xrb->Base.Buffer = malloc(h * stride); - sPriv->swrast_loader->getImage(dPriv, x, rb->Height - y - h, w, h, + sPriv->swrast_loader->getImage(dPriv, x, xrb->map_y, w, h, (char *) xrb->Base.Buffer, dPriv->loaderPrivate); |