summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r--src/mesa/drivers/dri/i915/intel_context.c15
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit.cpp12
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_clear.cpp5
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp27
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c152
-rw-r--r--src/mesa/drivers/dri/i965/brw_performance_monitor.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_pipe_control.c37
-rw-r--r--src/mesa/drivers/dri/i965/brw_tcs.c18
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp164
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cse.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/gen6_clip_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen7_cs_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ds_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.h10
-rw-r--r--src/mesa/drivers/dri/i965/intel_fbo.c13
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c22
-rw-r--r--src/mesa/drivers/dri/i965/intel_pixel_read.c21
-rw-r--r--src/mesa/drivers/dri/i965/intel_reg.h9
-rw-r--r--src/mesa/drivers/dri/i965/intel_syncobj.c59
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex.c2
-rw-r--r--src/mesa/drivers/dri/nouveau/nv20_state_frag.c2
-rw-r--r--src/mesa/drivers/dri/swrast/swrast.c4
32 files changed, 442 insertions, 187 deletions
diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c
index e5a3f00..5607d5b 100644
--- a/src/mesa/drivers/dri/i915/intel_context.c
+++ b/src/mesa/drivers/dri/i915/intel_context.c
@@ -858,6 +858,7 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable)
struct __DRIimageList images;
unsigned int format;
uint32_t buffer_mask = 0;
+ int ret;
front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
@@ -877,12 +878,14 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable)
if (back_rb)
buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
- (*screen->image.loader->getBuffers) (drawable,
- driGLFormatToImageFormat(format),
- &drawable->dri2.stamp,
- drawable->loaderPrivate,
- buffer_mask,
- &images);
+ ret = screen->image.loader->getBuffers(drawable,
+ driGLFormatToImageFormat(format),
+ &drawable->dri2.stamp,
+ drawable->loaderPrivate,
+ buffer_mask,
+ &images);
+ if (!ret)
+ return;
if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
drawable->w = images.front->width;
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index f448551..194b412 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -48,6 +48,7 @@ i965_compiler_FILES = \
brw_nir_attribute_workarounds.c \
brw_nir_intrinsics.c \
brw_nir_opt_peephole_ffma.c \
+ brw_nir_tcs_workarounds.c \
brw_packed_float.c \
brw_predicated_break.cpp \
brw_reg.h \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
index 9590968..6be82c5 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -167,7 +167,8 @@ nir_uniform_type_size(const struct glsl_type *type)
}
const unsigned *
-brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir,
+brw_blorp_compile_nir_shader(struct brw_context *brw, void *mem_ctx,
+ struct nir_shader *nir,
const struct brw_wm_prog_key *wm_key,
bool use_repclear,
struct brw_blorp_prog_data *prog_data,
@@ -175,13 +176,6 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir,
{
const struct brw_compiler *compiler = brw->intelScreen->compiler;
- void *mem_ctx = ralloc_context(NULL);
-
- /* Calling brw_preprocess_nir and friends is destructive and, if cloning is
- * enabled, may end up completely replacing the nir_shader. Therefore, we
- * own it and might as well put it in our context for easy cleanup.
- */
- ralloc_steal(mem_ctx, nir);
nir->options =
compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions;
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index 7ec5875..133a8ac 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -366,7 +366,8 @@ struct brw_blorp_blit_prog_key
void brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key);
const unsigned *
-brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir,
+brw_blorp_compile_nir_shader(struct brw_context *brw, void *mem_ctx,
+ struct nir_shader *nir,
const struct brw_wm_prog_key *wm_key,
bool use_repclear,
struct brw_blorp_prog_data *prog_data,
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 782d285..db94f33 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1296,7 +1296,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
* of samples).
*/
static nir_shader *
-brw_blorp_build_nir_shader(struct brw_context *brw,
+brw_blorp_build_nir_shader(struct brw_context *brw, void *mem_ctx,
const brw_blorp_blit_prog_key *key)
{
nir_ssa_def *src_pos, *dst_pos, *color;
@@ -1342,7 +1342,7 @@ brw_blorp_build_nir_shader(struct brw_context *brw,
(key->dst_samples == 0));
nir_builder b;
- nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL);
struct brw_blorp_blit_vars v;
brw_blorp_blit_vars_init(&b, &v, key);
@@ -1505,6 +1505,8 @@ brw_blorp_get_blit_kernel(struct brw_context *brw,
&params->wm_prog_kernel, &params->wm_prog_data))
return;
+ void *mem_ctx = ralloc_context(NULL);
+
const unsigned *program;
unsigned program_size;
struct brw_blorp_prog_data prog_data;
@@ -1512,7 +1514,7 @@ brw_blorp_get_blit_kernel(struct brw_context *brw,
/* Try and compile with NIR first. If that fails, fall back to the old
* method of building shaders manually.
*/
- nir_shader *nir = brw_blorp_build_nir_shader(brw, prog_key);
+ nir_shader *nir = brw_blorp_build_nir_shader(brw, mem_ctx, prog_key);
struct brw_wm_prog_key wm_key;
brw_blorp_init_wm_prog_key(&wm_key);
wm_key.tex.compressed_multisample_layout_mask =
@@ -1520,7 +1522,7 @@ brw_blorp_get_blit_kernel(struct brw_context *brw,
wm_key.tex.msaa_16 = prog_key->tex_samples == 16;
wm_key.multisample_fbo = prog_key->rt_samples > 1;
- program = brw_blorp_compile_nir_shader(brw, nir, &wm_key, false,
+ program = brw_blorp_compile_nir_shader(brw, mem_ctx, nir, &wm_key, false,
&prog_data, &program_size);
brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG,
@@ -1528,6 +1530,8 @@ brw_blorp_get_blit_kernel(struct brw_context *brw,
program, program_size,
&prog_data, sizeof(prog_data),
&params->wm_prog_kernel, &params->wm_prog_data);
+
+ ralloc_free(mem_ctx);
}
static void
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
index 2515a04..6400218 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
@@ -64,7 +64,7 @@ brw_blorp_params_get_clear_kernel(struct brw_context *brw,
void *mem_ctx = ralloc_context(NULL);
nir_builder b;
- nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "BLORP-clear");
nir_variable *u_color = nir_variable_create(b.shader, nir_var_uniform,
@@ -84,7 +84,8 @@ brw_blorp_params_get_clear_kernel(struct brw_context *brw,
struct brw_blorp_prog_data prog_data;
unsigned program_size;
const unsigned *program =
- brw_blorp_compile_nir_shader(brw, b.shader, &wm_key, use_replicated_data,
+ brw_blorp_compile_nir_shader(brw, mem_ctx,
+ b.shader, &wm_key, use_replicated_data,
&prog_data, &program_size);
brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG,
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 10e9f47..7d15c28 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -220,6 +220,8 @@ struct brw_tcs_prog_key
/** A bitfield of per-vertex outputs written. */
uint64_t outputs_written;
+ bool quads_workaround;
+
struct brw_sampler_prog_key_data tex;
};
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 1cb99da..2af42e0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5681,7 +5681,7 @@ fs_visitor::setup_gs_payload()
* have to multiply by VerticesIn to obtain the total storage requirement.
*/
if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >
- max_push_components) {
+ max_push_components || gs_prog_data->invocations > 1) {
gs_prog_data->base.include_vue_handles = true;
/* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 11c078a..91763d3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2322,23 +2322,23 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
break;
fs_reg m0 = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- fs_reg m0_2 = byte_offset(m0, 2 * sizeof(uint32_t));
+ fs_reg m0_2 = component(m0, 2);
- const fs_builder fwa_bld = bld.exec_all();
+ const fs_builder chanbld = bld.exec_all().group(1, 0);
/* Zero the message header */
- fwa_bld.MOV(m0, brw_imm_ud(0u));
+ bld.exec_all().MOV(m0, brw_imm_ud(0u));
/* Copy "Barrier ID" from r0.2, bits 16:13 */
- fwa_bld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+ chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(INTEL_MASK(16, 13)));
/* Shift it up to bits 27:24. */
- fwa_bld.SHL(m0_2, m0_2, brw_imm_ud(11));
+ chanbld.SHL(m0_2, m0_2, brw_imm_ud(11));
/* Set the Barrier Count and the enable bit */
- fwa_bld.OR(m0_2, m0_2,
- brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15)));
+ chanbld.OR(m0_2, m0_2,
+ brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15)));
bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0);
break;
@@ -4060,12 +4060,23 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
dest = get_nir_dest(instr->dest);
fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
- fs_reg offset = get_nir_src(instr->src[0]);
+ fs_reg offset;
fs_reg data1 = get_nir_src(instr->src[1]);
fs_reg data2;
if (op == BRW_AOP_CMPWR)
data2 = get_nir_src(instr->src[2]);
+ /* Get the offset */
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
+ offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+ } else {
+ offset = vgrf(glsl_type::uint_type);
+ bld.ADD(offset,
+ retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(instr->const_index[0]));
+ }
+
/* Emit the actual atomic operation operation */
fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index 74c354f..6185310 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -117,6 +117,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
bool brw_nir_apply_trig_workarounds(nir_shader *nir);
+void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
+
nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
const struct brw_device_info *devinfo,
const struct brw_sampler_prog_key_data *key,
diff --git a/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c
new file mode 100644
index 0000000..0626981
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "compiler/nir/nir_builder.h"
+#include "brw_nir.h"
+
+/**
+ * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
+ *
+ * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
+ * definition of the patch header layouts):
+ *
+ * "HW Bug: The Tessellation stage will incorrectly add domain points
+ * along patch edges under the following conditions, which may result
+ * in conformance failures and/or cracking artifacts:
+ *
+ * * QUAD domain
+ * * INTEGER partitioning
+ * * All three TessFactors in a given U or V direction (e.g., V
+ * direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
+ * * All three TessFactors in the other direction are > 1.0 and all
+ * round up to the same integer value (e.g, U direction:
+ * VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
+ *
+ * The suggested workaround (to be implemented as part of the postamble
+ * to the HS shader in the HS kernel) is:
+ *
+ * if (
+ * (TF[UEQ0] > 1.0) ||
+ * (TF[VEQ0] > 1.0) ||
+ * (TF[UEQ1] > 1.0) ||
+ * (TF[VEQ1] > 1.0) ||
+ * (TF[INSIDE_U] > 1.0) ||
+ * (TF[INSIDE_V] > 1.0) )
+ * {
+ * TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
+ * TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
+ * }"
+ *
+ * There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes
+ * that the above workaround fails to fix certain GL/ES CTS tests which
+ * have inside tessellation factors of -1.0. This can be explained by
+ * a quote from the ARB_tessellation_shader specification:
+ *
+ * "If "equal_spacing" is used, the floating-point tessellation level is
+ * first clamped to the range [1,<max>], where <max> is implementation-
+ * dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
+ *
+ * In other words, the actual inner tessellation factor used is
+ * clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped
+ * value against 1.0. To accomplish this, we change the comparison from
+ * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0).
+ */
+
+static inline nir_ssa_def *
+load_output(nir_builder *b, int num_components, int offset)
+{
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output);
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL);
+ load->num_components = num_components;
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+ nir_intrinsic_set_base(load, offset);
+
+ nir_builder_instr_insert(b, &load->instr);
+
+ return &load->dest.ssa;
+}
+
+static inline void
+store_output(nir_builder *b, nir_ssa_def *value, int offset, unsigned comps)
+{
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+ store->num_components = comps;
+ nir_intrinsic_set_write_mask(store, (1u << comps) - 1);
+ store->src[0] = nir_src_for_ssa(value);
+ store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+ nir_builder_instr_insert(b, &store->instr);
+}
+
+static void
+emit_quads_workaround(nir_builder *b, nir_block *block)
+{
+ /* We're going to insert a new if-statement in a predecessor of the end
+ * block. This would normally create a new block (after the if) which
+ * would then become the predecessor of the end block, causing our set
+ * walking to get screwed up. To avoid this, just emit a constant at
+ * the end of our current block, and insert the if before that.
+ */
+ b->cursor = nir_after_block_before_jump(block);
+ b->cursor = nir_before_instr(nir_imm_int(b, 0)->parent_instr);
+
+ nir_ssa_def *inner = load_output(b, 2, 0);
+ nir_ssa_def *outer = load_output(b, 4, 1);
+
+ nir_ssa_def *any_greater_than_1 =
+ nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)),
+ nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner)));
+
+ nir_if *if_stmt = nir_if_create(b->shader);
+ if_stmt->condition = nir_src_for_ssa(any_greater_than_1);
+ nir_builder_cf_insert(b, &if_stmt->cf_node);
+
+ /* Fill out the new then-block */
+ b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+ store_output(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner),
+ nir_imm_float(b, 2.0f), inner), 0, 2);
+}
+
+void
+brw_nir_apply_tcs_quads_workaround(nir_shader *nir)
+{
+ assert(nir->stage == MESA_SHADER_TESS_CTRL);
+
+ nir_foreach_function(func, nir) {
+ if (!func->impl)
+ continue;
+
+ nir_builder b;
+ nir_builder_init(&b, func->impl);
+
+ struct set_entry *entry;
+ set_foreach(func->impl->end_block->predecessors, entry) {
+ nir_block *pred = (nir_block *) entry->key;
+ emit_quads_workaround(&b, pred);
+ }
+
+ nir_metadata_preserve(func->impl, 0);
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
index a91c6e2..a42a322 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
@@ -686,12 +686,12 @@ stop_oa_counters(struct brw_context *brw)
* The amount of batch space it takes to emit an MI_REPORT_PERF_COUNT snapshot,
* including the required PIPE_CONTROL flushes.
*
- * Sandybridge is the worst case scenario: brw_emit_mi_flush
- * expands to three PIPE_CONTROLs which are 4 DWords each. We have to flush
- * before and after MI_REPORT_PERF_COUNT, so multiply by two. Finally, add
- * the 3 DWords for MI_REPORT_PERF_COUNT itself.
+ * Sandybridge is the worst case scenario: brw_emit_mi_flush expands to four
+ * PIPE_CONTROLs which are 5 DWords each. We have to flush before and after
+ * MI_REPORT_PERF_COUNT, so multiply by two. Finally, add the 3 DWords for
+ * MI_REPORT_PERF_COUNT itself.
*/
-#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (3 * 4) + 3)
+#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (4 * 5) + 3)
/**
* Emit an MI_REPORT_PERF_COUNT command packet.
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 4672efd..d51cf1b 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -96,10 +96,38 @@ gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
void
brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
{
+ if (brw->gen >= 6 &&
+ (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
+ (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
+ /* A pipe control command with flush and invalidate bits set
+ * simultaneously is an inherently racy operation on Gen6+ if the
+ * contents of the flushed caches were intended to become visible from
+ * any of the invalidated caches. Split it in two PIPE_CONTROLs, the
+ * first one should stall the pipeline to make sure that the flushed R/W
+ * caches are coherent with memory once the specified R/O caches are
+ * invalidated. On pre-Gen6 hardware the (implicit) R/O cache
+ * invalidation seems to happen at the bottom of the pipeline together
+ * with any write cache flush, so this shouldn't be a concern.
+ */
+ brw_emit_pipe_control_flush(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) |
+ PIPE_CONTROL_CS_STALL);
+ flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
+ }
+
if (brw->gen >= 8) {
if (brw->gen == 8)
gen8_add_cs_stall_workaround_bits(&flags);
+ if (brw->gen == 9 &&
+ (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
+ /* Hardware workaround: SKL
+ *
+ * Emit Pipe Control with all bits set to zero before emitting
+ * a Pipe Control with VF Cache Invalidate set.
+ */
+ brw_emit_pipe_control_flush(brw, 0);
+ }
+
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
OUT_BATCH(flags);
@@ -311,15 +339,6 @@ brw_emit_mi_flush(struct brw_context *brw)
} else {
int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
if (brw->gen >= 6) {
- if (brw->gen == 9) {
- /* Hardware workaround: SKL
- *
- * Emit Pipe Control with all bits set to zero before emitting
- * a Pipe Control with VF Cache Invalidate set.
- */
- brw_emit_pipe_control_flush(brw, 0);
- }
-
flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_VF_CACHE_INVALIDATE |
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c
index 8a5dd7e..6b7fde2 100644
--- a/src/mesa/drivers/dri/i965/brw_tcs.c
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c
@@ -153,6 +153,8 @@ brw_tcs_debug_recompile(struct brw_context *brw,
key->patch_outputs_written);
found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode,
key->tes_primitive_mode);
+ found |= key_debug(brw, "quads and equal_spacing workaround",
+ old_key->quads_workaround, key->quads_workaround);
found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
if (!found) {
@@ -346,6 +348,9 @@ brw_upload_tcs_prog(struct brw_context *brw,
* based on the domain the DS is expecting to tessellate.
*/
key.tes_primitive_mode = tep->program.PrimitiveMode;
+ key.quads_workaround = brw->gen < 9 &&
+ tep->program.PrimitiveMode == GL_QUADS &&
+ tep->program.Spacing == GL_EQUAL;
if (tcp) {
key.program_string_id = tcp->id;
@@ -383,6 +388,8 @@ brw_tcs_precompile(struct gl_context *ctx,
struct gl_tess_ctrl_program *tcp = (struct gl_tess_ctrl_program *)prog;
struct brw_tess_ctrl_program *btcp = brw_tess_ctrl_program(tcp);
+ const struct gl_shader *tes =
+ shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
memset(&key, 0, sizeof(key));
@@ -393,9 +400,14 @@ brw_tcs_precompile(struct gl_context *ctx,
if (brw->gen < 8)
key.input_vertices = shader_prog->TessCtrl.VerticesOut;
- key.tes_primitive_mode =
- shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] ?
- shader_prog->TessEval.PrimitiveMode : GL_TRIANGLES;
+ if (tes) {
+ key.tes_primitive_mode = shader_prog->TessEval.PrimitiveMode;
+ key.quads_workaround = brw->gen < 9 &&
+ shader_prog->TessEval.PrimitiveMode == GL_QUADS &&
+ shader_prog->TessEval.Spacing == GL_EQUAL;
+ } else {
+ key.tes_primitive_mode = GL_TRIANGLES;
+ }
key.outputs_written = prog->OutputsWritten;
key.patch_outputs_written = prog->PatchOutputsWritten;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 162b481..a7398a7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -354,95 +354,97 @@ vec4_visitor::opt_vector_float()
{
bool progress = false;
- int last_reg = -1, last_reg_offset = -1;
- enum brw_reg_file last_reg_file = BAD_FILE;
+ foreach_block(block, cfg) {
+ int last_reg = -1, last_reg_offset = -1;
+ enum brw_reg_file last_reg_file = BAD_FILE;
+
+ uint8_t imm[4] = { 0 };
+ int inst_count = 0;
+ vec4_instruction *imm_inst[4];
+ unsigned writemask = 0;
+ enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F;
+
+ foreach_inst_in_block_safe(vec4_instruction, inst, block) {
+ int vf = -1;
+ enum brw_reg_type need_type;
+
+ /* Look for unconditional MOVs from an immediate with a partial
+ * writemask. Skip type-conversion MOVs other than integer 0,
+ * where the type doesn't matter. See if the immediate can be
+ * represented as a VF.
+ */
+ if (inst->opcode == BRW_OPCODE_MOV &&
+ inst->src[0].file == IMM &&
+ inst->predicate == BRW_PREDICATE_NONE &&
+ inst->dst.writemask != WRITEMASK_XYZW &&
+ (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) {
+
+ vf = brw_float_to_vf(inst->src[0].d);
+ need_type = BRW_REGISTER_TYPE_D;
+
+ if (vf == -1) {
+ vf = brw_float_to_vf(inst->src[0].f);
+ need_type = BRW_REGISTER_TYPE_F;
+ }
+ } else {
+ last_reg = -1;
+ }
- uint8_t imm[4] = { 0 };
- int inst_count = 0;
- vec4_instruction *imm_inst[4];
- unsigned writemask = 0;
- enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F;
+ /* If this wasn't a MOV, or the destination register doesn't match,
+ * or we have to switch destination types, then this breaks our
+ * sequence. Combine anything we've accumulated so far.
+ */
+ if (last_reg != inst->dst.nr ||
+ last_reg_offset != inst->dst.reg_offset ||
+ last_reg_file != inst->dst.file ||
+ (vf > 0 && dest_type != need_type)) {
+
+ if (inst_count > 1) {
+ unsigned vf;
+ memcpy(&vf, imm, sizeof(vf));
+ vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf));
+ mov->dst.type = dest_type;
+ mov->dst.writemask = writemask;
+ inst->insert_before(block, mov);
+
+ for (int i = 0; i < inst_count; i++) {
+ imm_inst[i]->remove(block);
+ }
- foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
- int vf = -1;
- enum brw_reg_type need_type;
+ progress = true;
+ }
- /* Look for unconditional MOVs from an immediate with a partial
- * writemask. Skip type-conversion MOVs other than integer 0,
- * where the type doesn't matter. See if the immediate can be
- * represented as a VF.
- */
- if (inst->opcode == BRW_OPCODE_MOV &&
- inst->src[0].file == IMM &&
- inst->predicate == BRW_PREDICATE_NONE &&
- inst->dst.writemask != WRITEMASK_XYZW &&
- (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) {
-
- vf = brw_float_to_vf(inst->src[0].d);
- need_type = BRW_REGISTER_TYPE_D;
-
- if (vf == -1) {
- vf = brw_float_to_vf(inst->src[0].f);
- need_type = BRW_REGISTER_TYPE_F;
- }
- } else {
- last_reg = -1;
- }
+ inst_count = 0;
+ last_reg = -1;
+ writemask = 0;
+ dest_type = BRW_REGISTER_TYPE_F;
- /* If this wasn't a MOV, or the destination register doesn't match,
- * or we have to switch destination types, then this breaks our
- * sequence. Combine anything we've accumulated so far.
- */
- if (last_reg != inst->dst.nr ||
- last_reg_offset != inst->dst.reg_offset ||
- last_reg_file != inst->dst.file ||
- (vf > 0 && dest_type != need_type)) {
-
- if (inst_count > 1) {
- unsigned vf;
- memcpy(&vf, imm, sizeof(vf));
- vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf));
- mov->dst.type = dest_type;
- mov->dst.writemask = writemask;
- inst->insert_before(block, mov);
-
- for (int i = 0; i < inst_count; i++) {
- imm_inst[i]->remove(block);
+ for (int i = 0; i < 4; i++) {
+ imm[i] = 0;
}
-
- progress = true;
}
- inst_count = 0;
- last_reg = -1;
- writemask = 0;
- dest_type = BRW_REGISTER_TYPE_F;
-
- for (int i = 0; i < 4; i++) {
- imm[i] = 0;
+ /* Record this instruction's value (if it was representable). */
+ if (vf != -1) {
+ if ((inst->dst.writemask & WRITEMASK_X) != 0)
+ imm[0] = vf;
+ if ((inst->dst.writemask & WRITEMASK_Y) != 0)
+ imm[1] = vf;
+ if ((inst->dst.writemask & WRITEMASK_Z) != 0)
+ imm[2] = vf;
+ if ((inst->dst.writemask & WRITEMASK_W) != 0)
+ imm[3] = vf;
+
+ writemask |= inst->dst.writemask;
+ imm_inst[inst_count++] = inst;
+
+ last_reg = inst->dst.nr;
+ last_reg_offset = inst->dst.reg_offset;
+ last_reg_file = inst->dst.file;
+ if (vf > 0)
+ dest_type = need_type;
}
}
-
- /* Record this instruction's value (if it was representable). */
- if (vf != -1) {
- if ((inst->dst.writemask & WRITEMASK_X) != 0)
- imm[0] = vf;
- if ((inst->dst.writemask & WRITEMASK_Y) != 0)
- imm[1] = vf;
- if ((inst->dst.writemask & WRITEMASK_Z) != 0)
- imm[2] = vf;
- if ((inst->dst.writemask & WRITEMASK_W) != 0)
- imm[3] = vf;
-
- writemask |= inst->dst.writemask;
- imm_inst[inst_count++] = inst;
-
- last_reg = inst->dst.nr;
- last_reg_offset = inst->dst.reg_offset;
- last_reg_file = inst->dst.file;
- if (vf > 0)
- dest_type = need_type;
- }
}
if (progress)
@@ -1109,7 +1111,7 @@ vec4_visitor::opt_register_coalesce()
/* Can't coalesce this GRF if someone else was going to
* read it later.
*/
- if (var_range_end(var_from_reg(alloc, inst->src[0]), 4) > ip)
+ if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 4) > ip)
continue;
/* We need to check interference with the final destination between this
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 0c1f0c3..10898a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -246,7 +246,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
* more -- a sure sign they'll fail operands_match().
*/
if (src->file == VGRF) {
- if (var_range_end(var_from_reg(alloc, *src), 4) < ip) {
+ if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 4) < ip) {
entry->remove();
ralloc_free(entry);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 927438f..26a910c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -59,7 +59,10 @@ vec4_gs_visitor::make_reg_for_system_value(int location)
switch (location) {
case SYSTEM_VALUE_INVOCATION_ID:
this->current_annotation = "initialize gl_InvocationID";
- emit(GS_OPCODE_GET_INSTANCE_ID, *reg);
+ if (gs_prog_data->invocations > 1)
+ emit(GS_OPCODE_GET_INSTANCE_ID, *reg);
+ else
+ emit(MOV(*reg, brw_imm_ud(0)));
break;
default:
unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index f61c612..5440dba 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -451,6 +451,9 @@ brw_compile_tcs(const struct brw_compiler *compiler,
nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map);
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map);
+ if (key->quads_workaround)
+ brw_nir_apply_tcs_quads_workaround(nir);
+
nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
if (is_scalar)
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 609285e..61ada53 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1443,10 +1443,12 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw)
/* _NEW_PROGRAM */
struct gl_shader_program *prog =
ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+ /* BRW_NEW_CS_PROG_DATA */
+ const struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
- if (prog && brw->cs.prog_data->uses_num_work_groups) {
+ if (prog && cs_prog_data->uses_num_work_groups) {
const unsigned surf_idx =
- brw->cs.prog_data->binding_table.work_groups_start;
+ cs_prog_data->binding_table.work_groups_start;
uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
drm_intel_bo *bo;
uint32_t bo_offset;
@@ -1475,6 +1477,7 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw)
const struct brw_tracked_state brw_cs_work_groups_surface = {
.dirty = {
.brw = BRW_NEW_BLORP |
+ BRW_NEW_CS_PROG_DATA |
BRW_NEW_CS_WORK_GROUPS
},
.emit = brw_upload_cs_work_groups_surface,
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index 26de633..64ccdb6 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -50,6 +50,7 @@ upload_clip_state(struct brw_context *brw)
dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
}
+ /* BRW_NEW_VS_PROG_DATA */
dw1 |= brw->vs.prog_data->base.cull_distance_mask;
if (brw->gen >= 7)
@@ -224,6 +225,7 @@ const struct brw_tracked_state gen7_clip_state = {
BRW_NEW_CONTEXT |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_GEOMETRY_PROGRAM |
+ BRW_NEW_VS_PROG_DATA |
BRW_NEW_META_IN_PROGRESS |
BRW_NEW_PRIMITIVE |
BRW_NEW_RASTERIZER_DISCARD |
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index 5427fa5..b245226 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -283,7 +283,7 @@ gen7_upload_cs_push_constants(struct brw_context *brw)
(struct brw_compute_program *) brw->compute_program;
if (cp) {
- /* CACHE_NEW_CS_PROG */
+ /* BRW_NEW_CS_PROG_DATA */
struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
brw_upload_cs_push_constants(brw, &cp->program.Base, cs_prog_data,
@@ -297,6 +297,7 @@ const struct brw_tracked_state gen7_cs_push_constants = {
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_CS_PROG_DATA |
BRW_NEW_PUSH_CONSTANT_ALLOCATION,
},
.emit = gen7_upload_cs_push_constants,
diff --git a/src/mesa/drivers/dri/i965/gen8_ds_state.c b/src/mesa/drivers/dri/i965/gen8_ds_state.c
index 6f01abb..3b79b55 100644
--- a/src/mesa/drivers/dri/i965/gen8_ds_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ds_state.c
@@ -69,6 +69,7 @@ gen8_upload_ds_state(struct brw_context *brw)
GEN7_DS_SIMD8_DISPATCH_ENABLE : 0) |
(tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ?
GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0));
+ /* _NEW_TRANSFORM */
OUT_BATCH(SET_FIELD(ctx->Transform.ClipPlanesEnabled,
GEN8_DS_USER_CLIP_DISTANCE) |
SET_FIELD(vue_prog_data->cull_distance_mask,
@@ -106,7 +107,7 @@ gen8_upload_ds_state(struct brw_context *brw)
const struct brw_tracked_state gen8_ds_state = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_TRANSFORM,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_TESS_PROGRAMS |
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 8a904fe..f916d99 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -124,6 +124,7 @@ const struct brw_tracked_state gen8_ps_extra = {
.mesa = _NEW_BUFFERS | _NEW_COLOR,
.brw = BRW_NEW_BLORP |
BRW_NEW_CONTEXT |
+ BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA,
},
.emit = upload_ps_extra,
@@ -283,7 +284,6 @@ const struct brw_tracked_state gen8_ps_state = {
.mesa = _NEW_MULTISAMPLE,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
- BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA,
},
.emit = upload_ps_state,
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index aa1dc38..67e8e8f 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -21,13 +21,13 @@ extern "C" {
* - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes)
* - Disabling OA counters on Gen6+ (3 DWords = 12 bytes)
* - Ending MI_REPORT_PERF_COUNT on Gen5+, plus associated PIPE_CONTROLs:
- * - Two sets of PIPE_CONTROLs, which become 3 PIPE_CONTROLs each on SNB,
- * which are 5 DWords each ==> 2 * 3 * 5 * 4 = 120 bytes
+ * - Two sets of PIPE_CONTROLs, which become 4 PIPE_CONTROLs each on SNB,
+ * which are 5 DWords each ==> 2 * 4 * 5 * 4 = 160 bytes
* - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+. ==> 12 bytes.
* On Ironlake, it's 6 DWords, but we have some slack due to the lack of
* Sandybridge PIPE_CONTROL madness.
- * - CC_STATE workaround on HSW (12 * 4 = 48 bytes)
- * - 5 dwords for initial mi_flush
+ * - CC_STATE workaround on HSW (17 * 4 = 68 bytes)
+ * - 10 dwords for initial mi_flush
* - 2 dwords for CC state setup
* - 5 dwords for the required pipe control at the end
* - Restoring L3 configuration: (24 dwords = 96 bytes)
@@ -35,7 +35,7 @@ extern "C" {
* - 7 dwords for L3 configuration set-up.
* - 5 dwords for L3 atomic set-up (on HSW).
*/
-#define BATCH_RESERVED 248
+#define BATCH_RESERVED 308
struct intel_batchbuffer;
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 939f9a0..8a0d2ad 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -374,6 +374,19 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
if (!irb->mt)
return;
+ /* Adjust the miptree's upper-left coordinate.
+ *
+ * FIXME: Adjusting the miptree's layout outside of
+ * intel_miptree_create_layout() is fragile. Plumb the adjustment through
+ * intel_miptree_create_layout() and brw_tex_layout().
+ */
+ irb->mt->level[0].level_x = image->tile_x;
+ irb->mt->level[0].level_y = image->tile_y;
+ irb->mt->level[0].slice[0].x_offset = image->tile_x;
+ irb->mt->level[0].slice[0].y_offset = image->tile_y;
+ irb->mt->total_width += image->tile_x;
+ irb->mt->total_height += image->tile_y;
+
rb->InternalFormat = image->internal_format;
rb->Width = image->width;
rb->Height = image->height;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b6265dc..e74a2dc 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -366,25 +366,8 @@ intel_miptree_create_layout(struct brw_context *brw,
_mesa_get_format_name(format),
first_level, last_level, depth0, mt);
- if (target == GL_TEXTURE_1D_ARRAY) {
- /* For a 1D Array texture the OpenGL API will treat the height0
- * parameter as the number of array slices. For Intel hardware, we treat
- * the 1D array as a 2D Array with a height of 1.
- *
- * So, when we first come through this path to create a 1D Array
- * texture, height0 stores the number of slices, and depth0 is 1. In
- * this case, we want to swap height0 and depth0.
- *
- * Since some miptrees will be created based on the base miptree, we may
- * come through this path and see height0 as 1 and depth0 being the
- * number of slices. In this case we don't need to do the swap.
- */
- assert(height0 == 1 || depth0 == 1);
- if (height0 > 1) {
- depth0 = height0;
- height0 = 1;
- }
- }
+ if (target == GL_TEXTURE_1D_ARRAY)
+ assert(height0 == 1);
mt->target = target;
mt->format = format;
@@ -1050,6 +1033,7 @@ intel_get_image_dims(struct gl_texture_image *image,
* as a 2D Array with a height of 1. So, here we want to swap image
* height and depth.
*/
+ assert(image->Depth == 1);
*width = image->Width;
*height = 1;
*depth = image->Height;
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index a486d6e..cacd7e2 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -110,22 +110,6 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
if (ctx->_ImageTransferState)
return false;
- /* This renderbuffer can come from a texture. In this case, we impose
- * some of the same restrictions we have for textures and adjust for
- * miplevels.
- */
- if (rb->TexImage) {
- if (rb->TexImage->TexObject->Target != GL_TEXTURE_2D &&
- rb->TexImage->TexObject->Target != GL_TEXTURE_RECTANGLE)
- return false;
-
- int level = rb->TexImage->Level + rb->TexImage->TexObject->MinLevel;
-
- /* Adjust x and y offset based on miplevel */
- xoffset += irb->mt->level[level].level_x;
- yoffset += irb->mt->level[level].level_y;
- }
-
/* It is possible that the renderbuffer (or underlying texture) is
* multisampled. Since ReadPixels from a multisampled buffer requires a
* multisample resolve, we can't handle this here
@@ -169,6 +153,9 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
return false;
}
+ xoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].x_offset;
+ yoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].y_offset;
+
dst_pitch = _mesa_image_row_stride(pack, width, format, type);
/* For a window-system renderbuffer, the buffer is actually flipped
@@ -201,7 +188,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp,
- bo->virtual,
+ bo->virtual + irb->mt->offset,
dst_pitch, irb->mt->pitch,
brw->has_swizzling,
irb->mt->tiling,
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index 95365fe..7a82be4 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -134,6 +134,15 @@
#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
+#define PIPE_CONTROL_CACHE_FLUSH_BITS \
+ (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
+ PIPE_CONTROL_RENDER_TARGET_FLUSH)
+
+#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
+ (PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
+ PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+ PIPE_CONTROL_INSTRUCTION_INVALIDATE)
+
/** @} */
#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22))
diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c
index 3e359a5..39c9636 100644
--- a/src/mesa/drivers/dri/i965/intel_syncobj.c
+++ b/src/mesa/drivers/dri/i965/intel_syncobj.c
@@ -49,6 +49,7 @@ struct brw_fence {
/** The fence waits for completion of this batch. */
drm_intel_bo *batch_bo;
+ mtx_t mutex;
bool signalled;
};
@@ -58,10 +59,20 @@ struct intel_gl_sync_object {
};
static void
+brw_fence_init(struct brw_context *brw, struct brw_fence *fence)
+{
+ fence->brw = brw;
+ fence->batch_bo = NULL;
+ mtx_init(&fence->mutex, mtx_plain);
+}
+
+static void
brw_fence_finish(struct brw_fence *fence)
{
if (fence->batch_bo)
drm_intel_bo_unreference(fence->batch_bo);
+
+ mtx_destroy(&fence->mutex);
}
static void
@@ -77,7 +88,7 @@ brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
}
static bool
-brw_fence_has_completed(struct brw_fence *fence)
+brw_fence_has_completed_locked(struct brw_fence *fence)
{
if (fence->signalled)
return true;
@@ -92,13 +103,21 @@ brw_fence_has_completed(struct brw_fence *fence)
return false;
}
-/**
- * Return true if the function successfully signals or has already signalled.
- * (This matches the behavior expected from __DRI2fence::client_wait_sync).
- */
static bool
-brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
- uint64_t timeout)
+brw_fence_has_completed(struct brw_fence *fence)
+{
+ bool ret;
+
+ mtx_lock(&fence->mutex);
+ ret = brw_fence_has_completed_locked(fence);
+ mtx_unlock(&fence->mutex);
+
+ return ret;
+}
+
+static bool
+brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence,
+ uint64_t timeout)
{
if (fence->signalled)
return true;
@@ -123,6 +142,23 @@ brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
return true;
}
+/**
+ * Return true if the function successfully signals or has already signalled.
+ * (This matches the behavior expected from __DRI2fence::client_wait_sync).
+ */
+static bool
+brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
+ uint64_t timeout)
+{
+ bool ret;
+
+ mtx_lock(&fence->mutex);
+ ret = brw_fence_client_wait_locked(brw, fence, timeout);
+ mtx_unlock(&fence->mutex);
+
+ return ret;
+}
+
static void
brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
{
@@ -161,6 +197,7 @@ intel_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *s,
struct brw_context *brw = brw_context(ctx);
struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
+ brw_fence_init(brw, &sync->fence);
brw_fence_insert(brw, &sync->fence);
}
@@ -215,7 +252,7 @@ intel_dri_create_fence(__DRIcontext *ctx)
if (!fence)
return NULL;
- fence->brw = brw;
+ brw_fence_init(brw, fence);
brw_fence_insert(brw, fence);
return fence;
@@ -244,6 +281,12 @@ intel_dri_server_wait_sync(__DRIcontext *ctx, void *driver_fence, unsigned flags
{
struct brw_fence *fence = driver_fence;
+ /* We might be called here with a NULL fence as a result of WaitSyncKHR
+ * on a EGL_KHR_reusable_sync fence. Nothing to do here in such case.
+ */
+ if (!fence)
+ return;
+
brw_fence_server_wait(fence->brw, fence);
}
diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c
index cac33ac..a1364b9 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.c
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -140,6 +140,8 @@ intel_alloc_texture_storage(struct gl_context *ctx,
!intel_miptree_match_image(intel_texobj->mt, first_image) ||
intel_texobj->mt->last_level != levels - 1) {
intel_miptree_release(&intel_texobj->mt);
+
+ intel_get_image_dims(first_image, &width, &height, &depth);
intel_texobj->mt = intel_miptree_create(brw, texobj->Target,
first_image->TexFormat,
0, levels - 1,
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_frag.c b/src/mesa/drivers/dri/nouveau/nv20_state_frag.c
index 492ecdc..2c5c2db 100644
--- a/src/mesa/drivers/dri/nouveau/nv20_state_frag.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_frag.c
@@ -67,5 +67,5 @@ nv20_emit_frag(struct gl_context *ctx, int emit)
PUSH_DATA (push, in >> 32);
BEGIN_NV04(push, NV20_3D(RC_ENABLE), 1);
- PUSH_DATA (push, n);
+ PUSH_DATA (push, MAX2(1, n));
}
diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c
index 2d4bb70..6e006f8 100644
--- a/src/mesa/drivers/dri/swrast/swrast.c
+++ b/src/mesa/drivers/dri/swrast/swrast.c
@@ -484,14 +484,14 @@ swrast_map_renderbuffer(struct gl_context *ctx,
xrb->map_mode = mode;
xrb->map_x = x;
- xrb->map_y = y;
+ xrb->map_y = rb->Height - y - h;
xrb->map_w = w;
xrb->map_h = h;
stride = w * cpp;
xrb->Base.Buffer = malloc(h * stride);
- sPriv->swrast_loader->getImage(dPriv, x, rb->Height - y - h, w, h,
+ sPriv->swrast_loader->getImage(dPriv, x, xrb->map_y, w, h,
(char *) xrb->Base.Buffer,
dPriv->loaderPrivate);