summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWolfgang Wiedmeyer <wolfgit@wiedmeyer.de>2016-12-17 03:40:28 +0100
committerWolfgang Wiedmeyer <wolfgit@wiedmeyer.de>2016-12-17 03:40:28 +0100
commitef9a82038acd73936830671dbe43205c28a2151d (patch)
tree90be2cdd9f48750c18b669ca2ab9553575d9f822
parentf84f60446aebaeee8a1df741328cbd4a30dd24ea (diff)
parent743c2327b167b95046e02af4c7b2f7a282a0943d (diff)
downloadexternal_mesa3d-replicant-6.0-old.zip
external_mesa3d-replicant-6.0-old.tar.gz
external_mesa3d-replicant-6.0-old.tar.bz2
Merge remote-tracking branch 'androidx86/marshmallow-x86' into replicant-6.0replicant-6.0-beta-0001replicant-6.0-alpha-0006replicant-6.0-old
-rw-r--r--.travis.yml28
-rw-r--r--Makefile.am1
-rw-r--r--VERSION2
-rw-r--r--appveyor.yml6
-rw-r--r--bin/.cherry-ignore23
-rwxr-xr-xbin/get-pick-list.sh2
-rw-r--r--configure.ac90
-rw-r--r--docs/relnotes/12.0.2.html403
-rw-r--r--docs/relnotes/12.0.3.html71
-rw-r--r--docs/relnotes/12.0.4.html321
-rw-r--r--include/GL/mesa_glinterop.h12
-rw-r--r--install-gallium-links.mk4
-rw-r--r--src/Makefile.am27
-rw-r--r--src/compiler/glsl/glcpp/glcpp-parse.y42
-rw-r--r--src/compiler/glsl/glcpp/glcpp.h2
-rw-r--r--src/compiler/glsl/glcpp/tests/120-undef-builtin.c1
-rw-r--r--src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected3
-rw-r--r--src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c4
-rw-r--r--src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected4
-rw-r--r--src/compiler/glsl/glsl_lexer.ll8
-rw-r--r--src/compiler/glsl/glsl_parser.yy4
-rw-r--r--src/compiler/glsl/ir.h7
-rw-r--r--src/compiler/glsl/ir_hv_accept.cpp2
-rw-r--r--src/compiler/glsl/ir_set_program_inouts.cpp10
-rw-r--r--src/compiler/glsl/link_varyings.cpp23
-rw-r--r--src/compiler/glsl/linker.cpp15
-rw-r--r--src/compiler/glsl/lower_packed_varyings.cpp11
-rw-r--r--src/compiler/glsl/opt_conditional_discard.cpp9
-rw-r--r--src/compiler/glsl_types.cpp18
-rw-r--r--src/compiler/nir/nir.c116
-rw-r--r--src/compiler/nir/nir.h4
-rw-r--r--src/compiler/nir/nir_builder.h19
-rw-r--r--src/compiler/nir/nir_inline_functions.c42
-rw-r--r--src/compiler/nir/nir_intrinsics.h22
-rw-r--r--src/compiler/nir/nir_lower_alu_to_scalar.c3
-rw-r--r--src/compiler/nir/nir_lower_vars_to_ssa.c209
-rw-r--r--src/compiler/nir/nir_opcodes.py2
-rw-r--r--src/compiler/nir/nir_phi_builder.h3
-rw-r--r--src/compiler/spirv/spirv_to_nir.c8
-rw-r--r--src/compiler/spirv/vtn_cfg.c23
-rw-r--r--src/compiler/spirv/vtn_private.h4
-rw-r--r--src/compiler/spirv/vtn_variables.c171
-rw-r--r--src/egl/drivers/dri2/egl_dri2.c141
-rw-r--r--src/egl/drivers/dri2/egl_dri2.h6
-rw-r--r--src/egl/drivers/dri2/platform_android.c20
-rw-r--r--src/egl/drivers/dri2/platform_drm.c1
-rw-r--r--src/egl/drivers/dri2/platform_surfaceless.c1
-rw-r--r--src/egl/drivers/dri2/platform_wayland.c17
-rw-r--r--src/egl/drivers/dri2/platform_x11.c17
-rw-r--r--src/egl/drivers/dri2/platform_x11_dri3.c12
-rw-r--r--src/egl/main/eglapi.c8
-rw-r--r--src/egl/main/egldefines.h3
-rw-r--r--src/egl/main/eglglobals.c8
-rw-r--r--src/egl/main/eglsurface.c6
-rw-r--r--src/egl/main/eglsync.c7
-rw-r--r--src/egl/wayland/wayland-egl/wayland-egl-priv.h1
-rw-r--r--src/egl/wayland/wayland-egl/wayland-egl.c3
-rw-r--r--src/gallium/auxiliary/Makefile.sources3
-rw-r--r--src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c9
-rw-r--r--src/gallium/auxiliary/util/u_box.h31
-rw-r--r--src/gallium/auxiliary/util/u_format_r11g11b10f.h2
-rw-r--r--src/gallium/auxiliary/util/u_math.h2
-rw-r--r--src/gallium/auxiliary/util/u_viewport.h59
-rw-r--r--src/gallium/auxiliary/vl/vl_winsys_dri3.c1
-rw-r--r--src/gallium/auxiliary/vl/vl_winsys_drm.c3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c3
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c78
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c15
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.h3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/a4xx.xml.h4
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c29
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c3
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c6
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h1
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp9
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp6
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp13
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp15
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp147
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp8
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp19
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp4
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_draw.c20
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_fragprog.c4
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_state.c5
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_transfer.c3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_formats.c4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c9
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c22
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c7
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c22
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c20
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c4
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.c14
-rw-r--r--src/gallium/drivers/r300/r300_context.c10
-rw-r--r--src/gallium/drivers/radeon/cayman_msaa.c7
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c5
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c70
-rw-r--r--src/gallium/drivers/radeonsi/cik_sdma.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c26
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c23
-rw-r--r--src/gallium/drivers/radeonsi/si_hw_context.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c13
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h7
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c45
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c76
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h5
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c19
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c14
-rw-r--r--src/gallium/drivers/radeonsi/sid.h1
-rw-r--r--src/gallium/drivers/svga/svga_shader.c15
-rw-r--r--src/gallium/drivers/swr/Makefile.am8
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.c20
-rw-r--r--src/gallium/drivers/vc4/vc4_bufmgr.h21
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h2
-rw-r--r--src/gallium/drivers/vc4/vc4_emit.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_vpm.c9
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c19
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.h3
-rw-r--r--src/gallium/include/pipe/p_state.h12
-rw-r--r--src/gallium/state_trackers/clover/llvm/invocation.cpp2
-rw-r--r--src/gallium/state_trackers/dri/dri2.c11
-rw-r--r--src/gallium/state_trackers/nine/nine_shader.c2
-rw-r--r--src/gallium/state_trackers/nine/surface9.c1
-rw-r--r--src/gallium/state_trackers/nine/volume9.c2
-rw-r--r--src/gallium/state_trackers/omx/vid_enc.c4
-rw-r--r--src/gallium/state_trackers/va/surface.c36
-rw-r--r--src/gallium/state_trackers/vdpau/output.c2
-rw-r--r--src/gallium/state_trackers/xa/xa_tracker.c3
-rw-r--r--src/gallium/targets/dri/Makefile.am7
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_surface.c20
-rw-r--r--src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c3
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.c3
-rw-r--r--src/gallium/winsys/svga/drm/vmw_screen.c10
-rw-r--r--src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c56
-rw-r--r--src/gallium/winsys/vc4/drm/vc4_drm_winsys.c3
-rw-r--r--src/gallium/winsys/virgl/drm/virgl_drm_winsys.c2
-rw-r--r--src/gbm/backends/dri/gbm_dri.c8
-rw-r--r--src/gbm/main/gbm.c10
-rw-r--r--src/glx/dri3_glx.c11
-rw-r--r--src/glx/glx_error.c4
-rw-r--r--src/glx/glx_pbuffer.c2
-rw-r--r--src/glx/glxcmds.c37
-rw-r--r--src/glx/glxglvnd.c10
-rw-r--r--src/glx/query_renderer.c4
-rw-r--r--src/glx/tests/fake_glx_screen.cpp15
-rw-r--r--src/intel/genxml/Makefile.am1
-rw-r--r--src/intel/genxml/gen6.xml9
-rw-r--r--src/intel/genxml/gen7.xml9
-rw-r--r--src/intel/genxml/gen75.xml9
-rw-r--r--src/intel/genxml/gen8.xml11
-rw-r--r--src/intel/genxml/gen9.xml11
-rw-r--r--src/intel/isl/Makefile.am12
-rw-r--r--src/intel/isl/isl.c8
-rw-r--r--src/intel/isl/isl.h2
-rw-r--r--src/intel/isl/isl_gen6.c2
-rw-r--r--src/intel/isl/isl_surface_state.c9
-rw-r--r--src/intel/vulkan/Makefile.am5
-rw-r--r--src/intel/vulkan/anv_cmd_buffer.c32
-rw-r--r--src/intel/vulkan/anv_descriptor_set.c32
-rw-r--r--src/intel/vulkan/anv_device.c40
-rw-r--r--src/intel/vulkan/anv_entrypoints_gen.py57
-rw-r--r--src/intel/vulkan/anv_image.c82
-rw-r--r--src/intel/vulkan/anv_meta_blit.c2
-rw-r--r--src/intel/vulkan/anv_meta_blit2d.c229
-rw-r--r--src/intel/vulkan/anv_meta_clear.c26
-rw-r--r--src/intel/vulkan/anv_pipeline.c210
-rw-r--r--src/intel/vulkan/anv_pipeline_cache.c594
-rw-r--r--src/intel/vulkan/anv_private.h133
-rw-r--r--src/intel/vulkan/anv_wsi_wayland.c1
-rw-r--r--src/intel/vulkan/anv_wsi_x11.c3
-rw-r--r--src/intel/vulkan/gen7_pipeline.c97
-rw-r--r--src/intel/vulkan/gen8_pipeline.c142
-rw-r--r--src/intel/vulkan/genX_cmd_buffer.c13
-rw-r--r--src/intel/vulkan/genX_l3.c8
-rw-r--r--src/intel/vulkan/genX_pipeline.c13
-rw-r--r--src/intel/vulkan/genX_pipeline_util.h179
-rw-r--r--src/loader/loader.c7
-rw-r--r--src/loader/loader_dri3_helper.c26
-rw-r--r--src/loader/loader_dri3_helper.h1
-rw-r--r--src/mapi/Makefile.am3
-rw-r--r--src/mapi/entry_x86-64_tls.h9
-rw-r--r--src/mapi/entry_x86_tls.h10
-rw-r--r--src/mapi/entry_x86_tsd.h9
-rw-r--r--src/mesa/Android.gen.mk2
-rw-r--r--src/mesa/Makefile.am10
-rw-r--r--src/mesa/Makefile.sources6
-rw-r--r--src/mesa/SConscript6
-rw-r--r--src/mesa/drivers/dri/i915/intel_context.c15
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit.cpp12
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_clear.cpp5
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp27
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c152
-rw-r--r--src/mesa/drivers/dri/i965/brw_performance_monitor.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_pipe_control.c37
-rw-r--r--src/mesa/drivers/dri/i965/brw_tcs.c18
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp164
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cse.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/gen6_clip_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen7_cs_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ds_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.h10
-rw-r--r--src/mesa/drivers/dri/i965/intel_fbo.c13
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c22
-rw-r--r--src/mesa/drivers/dri/i965/intel_pixel_read.c21
-rw-r--r--src/mesa/drivers/dri/i965/intel_reg.h9
-rw-r--r--src/mesa/drivers/dri/i965/intel_syncobj.c59
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex.c2
-rw-r--r--src/mesa/drivers/dri/nouveau/nv20_state_frag.c2
-rw-r--r--src/mesa/drivers/dri/swrast/swrast.c4
-rw-r--r--src/mesa/main/buffers.c71
-rw-r--r--src/mesa/main/fbobject.c16
-rw-r--r--src/mesa/main/ffvertex_prog.c17
-rw-r--r--src/mesa/main/formatquery.c15
-rw-r--r--src/mesa/main/genmipmap.c10
-rw-r--r--src/mesa/main/get.c8
-rw-r--r--src/mesa/main/get_hash_params.py6
-rw-r--r--src/mesa/main/glformats.c31
-rw-r--r--src/mesa/main/glformats.h3
-rw-r--r--src/mesa/main/shader_query.cpp98
-rw-r--r--src/mesa/main/texgetimage.c14
-rw-r--r--src/mesa/main/teximage.c9
-rw-r--r--src/mesa/main/texstorage.c4
-rw-r--r--src/mesa/state_tracker/st_atom_array.c98
-rw-r--r--src/mesa/state_tracker/st_atom_texture.c10
-rw-r--r--src/mesa/state_tracker/st_cb_clear.c10
-rw-r--r--src/mesa/state_tracker/st_draw.c15
-rw-r--r--src/mesa/state_tracker/st_format.c7
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp122
-rw-r--r--src/mesa/state_tracker/st_vdpau.c10
-rw-r--r--src/mesa/vbo/vbo_exec_array.c1
-rw-r--r--src/mesa/vbo/vbo_save_api.c18
-rw-r--r--src/mesa/vbo/vbo_split_copy.c2
248 files changed, 4655 insertions, 2004 deletions
diff --git a/.travis.yml b/.travis.yml
index da1d81e..5f489a4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,7 @@
language: c
-sudo: false
+sudo: true
+dist: trusty
cache:
directories:
@@ -15,7 +16,11 @@ addons:
- libexpat1-dev
- libxcb-dri2-0-dev
- libx11-xcb-dev
- - llvm-3.4-dev
+ - llvm-3.5-dev
+ # llvm-config is not in the dev package?
+ - llvm-3.5
+ # LLVM packaging is broken and misses this dep.
+ - libedit-dev
- scons
env:
@@ -41,6 +46,16 @@ install:
- export PATH="/usr/lib/ccache:$PATH"
- pip install --user mako
+ # Since libdrm gets updated in configure.ac regularly, try to pick up the
+ # latest version from there.
+ - for line in `grep "^LIBDRM_.*_REQUIRED=" configure.ac`; do
+ old_ver=`echo $LIBDRM_VERSION | sed 's/libdrm-//'`;
+ new_ver=`echo $line | sed 's/.*REQUIRED=//'`;
+ if `echo "$old_ver,$new_ver" | tr ',' '\n' | sort -Vc 2> /dev/null`; then
+ export LIBDRM_VERSION="libdrm-$new_ver";
+ fi;
+ done
+
# Install dependencies where we require specific versions (or where
# disallowed by Travis CI's package whitelisting).
@@ -78,22 +93,19 @@ install:
- wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
- tar -jxvf $LIBDRM_VERSION.tar.bz2
- - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix && make install)
+ - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 && make install)
- wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
- tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
- (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)
-# Disabled LLVM (and therefore r300 and r600) because the build fails
-# with "undefined reference to `clock_gettime'" and "undefined
-# reference to `setupterm'" in llvmpipe.
script:
- if test "x$BUILD" = xmake; then
./autogen.sh --enable-debug
- --disable-gallium-llvm
--with-egl-platforms=x11,drm
--with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau
- --with-gallium-drivers=svga,swrast,vc4,virgl
+ --with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600
+ --disable-llvm-shared-libs
;
make && make check;
elif test x$BUILD = xscons; then
diff --git a/Makefile.am b/Makefile.am
index b0fbed6..2027a28 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -62,6 +62,7 @@ noinst_HEADERS = \
include/c99_math.h \
include/c11 \
include/D3D9 \
+ include/GL/wglext.h \
include/HaikuGL \
include/no_extern_c.h \
include/pci_ids
diff --git a/VERSION b/VERSION
index b700dc1..f0ad792 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-12.0.1
+12.0.4
diff --git a/appveyor.yml b/appveyor.yml
index 2e9b9d6..6e69cbf 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -37,6 +37,8 @@ cache:
- win_flex_bison-2.4.5.zip
- llvm-3.3.1-msvc2013-mtd.7z
+os: Visual Studio 2013
+
environment:
WINFLEXBISON_ARCHIVE: win_flex_bison-2.4.5.zip
LLVM_ARCHIVE: llvm-3.3.1-msvc2013-mtd.7z
@@ -47,11 +49,13 @@ install:
- python -m pip --version
# Install Mako
- python -m pip install --egg Mako
+# Install pywin32 extensions, needed by SCons
+- python -m pip install pypiwin32
# Install SCons
- python -m pip install --egg scons==2.4.1
- scons --version
# Install flex/bison
-- if not exist "%WINFLEXBISON_ARCHIVE%" appveyor DownloadFile "http://downloads.sourceforge.net/project/winflexbison/%WINFLEXBISON_ARCHIVE%"
+- if not exist "%WINFLEXBISON_ARCHIVE%" appveyor DownloadFile "https://downloads.sourceforge.net/project/winflexbison/old_versions/%WINFLEXBISON_ARCHIVE%"
- 7z x -y -owinflexbison\ "%WINFLEXBISON_ARCHIVE%" > nul
- set Path=%CD%\winflexbison;%Path%
- win_flex --version
diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore
index 5b0b89c..005e1e3 100644
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -1,2 +1,25 @@
# The offending commit that this patch (part) reverts isn't in 12.0
be32a2132785fbc119f17e62070e007ee7d17af7 i965/compiler: Bring back the INTEL_PRECISE_TRIG environment variable
+
+# The patch depends on the batch_cache work at least.
+89f00f749fda4c1beca38f362c7f86bdc6e32785 a4xx: make sure to actually clamp depth as requested
+
+# The patch depends on the 'generic' interoplation and location
+# implementation introduced with 2d6dd30a9b30
+114874b22beafb2d07006b197c62d717fc7f80cc i965/fs: Use sample interpolation for interpolateAtCentroid in persample mode
+
+# VAAPI encode landed after the branch point.
+a5993022275c20061ac025d9adc26c5f9d02afee st/va Avoid VBR bitrate calculation overflow v2
+
+# EGL_KHR_debug landed after the branch point.
+17084b6f9340f798111e53e08f5d35c7630cee48 egl: Fix missing unlock in eglGetSyncAttribKHR
+
+# Depends on update_renderbuffer_read_surfaces at least
+f2b9b0c730e345bcffa9eadabb25af3ab02642f2 i965: Add missing BRW_NEW_FS_PROG_DATA to render target reads.
+
+# The commit in question hasn't landed in branch
+1ef787339774bc7f1cc9c1615722f944005e070c Revert "egl/android: Set EGL_MAX_PBUFFER_WIDTH and EGL_MAX_PBUFFER_HEIGHT"
+
+# Patches depend on the fence_finish() gallium API change and corresponding driver work
+f240ad98bc05281ea7013d91973cb5f932ae9434 st/mesa: unduplicate st_check_sync code
+b687f766fddb7b39479cd9ee0427984029ea3559 st/mesa: allow multiple concurrent waiters in ClientWaitSync
diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh
index 0902fd0..4515837 100755
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -14,7 +14,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
# Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
+git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*12\.0.*mesa-stable\)' HEAD..origin/master |\
while read sha
do
# Check to see whether the patch is on the ignore list.
diff --git a/configure.ac b/configure.ac
index 535a2e3..78cf178 100644
--- a/configure.ac
+++ b/configure.ac
@@ -225,6 +225,7 @@ AX_GCC_FUNC_ATTRIBUTE([packed])
AX_GCC_FUNC_ATTRIBUTE([pure])
AX_GCC_FUNC_ATTRIBUTE([returns_nonnull])
AX_GCC_FUNC_ATTRIBUTE([unused])
+AX_GCC_FUNC_ATTRIBUTE([visibility])
AX_GCC_FUNC_ATTRIBUTE([warn_unused_result])
AX_GCC_FUNC_ATTRIBUTE([weak])
@@ -783,6 +784,7 @@ if test "x$enable_asm" = xyes; then
esac
fi
+AC_HEADER_MAJOR
AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
@@ -1639,7 +1641,7 @@ esac
AC_ARG_WITH([vulkan-icddir],
[AS_HELP_STRING([--with-vulkan-icddir=DIR],
- [directory for the Vulkan driver icd files @<:@${sysconfdir}/vulkan/icd.d@:>@])],
+ [directory for the Vulkan driver icd files @<:@${datarootdir}/vulkan/icd.d@:>@])],
[VULKAN_ICD_INSTALL_DIR="$withval"],
[VULKAN_ICD_INSTALL_DIR='${datarootdir}/vulkan/icd.d'])
AC_SUBST([VULKAN_ICD_INSTALL_DIR])
@@ -1997,8 +1999,8 @@ if test "x$with_egl_platforms" != "x" -a "x$enable_egl" != xyes; then
AC_MSG_ERROR([cannot build egl state tracker without EGL library])
fi
-PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland_scanner],
- WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland_scanner`,
+PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland-scanner],
+ WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner`,
WAYLAND_SCANNER='')
if test "x$WAYLAND_SCANNER" = x; then
AC_PATH_PROG([WAYLAND_SCANNER], [wayland-scanner])
@@ -2182,6 +2184,10 @@ if test "x$enable_gallium_llvm" = xyes; then
LLVM_COMPONENTS="engine bitwriter mcjit mcdisassembler"
+ if $LLVM_CONFIG --components | grep -q inteljitevents ; then
+ LLVM_COMPONENTS="${LLVM_COMPONENTS} inteljitevents"
+ fi
+
if test "x$enable_opencl" = xyes; then
llvm_check_version_for "3" "5" "0" "opencl"
@@ -2331,6 +2337,45 @@ swr_llvm_check() {
fi
}
+swr_require_cxx_feature_flags() {
+ feature_name="$1"
+ preprocessor_test="$2"
+ option_list="$3"
+ output_var="$4"
+
+ AC_MSG_CHECKING([whether $CXX supports $feature_name])
+ AC_LANG_PUSH([C++])
+ save_CXXFLAGS="$CXXFLAGS"
+ save_IFS="$IFS"
+ IFS=","
+ found=0
+ for opts in $option_list
+ do
+ unset IFS
+ CXXFLAGS="$opts $save_CXXFLAGS"
+ AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM(
+ [ #if !($preprocessor_test)
+ #error
+ #endif
+ ])],
+ [found=1; break],
+ [])
+ IFS=","
+ done
+ IFS="$save_IFS"
+ CXXFLAGS="$save_CXXFLAGS"
+ AC_LANG_POP([C++])
+ if test $found -eq 1; then
+ AC_MSG_RESULT([$opts])
+ eval "$output_var=\$opts"
+ return 0
+ fi
+ AC_MSG_RESULT([no])
+ AC_MSG_ERROR([swr requires $feature_name support])
+ return 1
+}
+
dnl Duplicates in GALLIUM_DRIVERS_DIRS are removed by sorting it after this block
if test -n "$with_gallium_drivers"; then
gallium_drivers=`IFS=', '; echo $with_gallium_drivers`
@@ -2400,29 +2445,20 @@ if test -n "$with_gallium_drivers"; then
xswr)
swr_llvm_check "swr"
- AC_MSG_CHECKING([whether $CXX supports c++11/AVX/AVX2])
- AVX_CXXFLAGS="-march=core-avx-i"
- AVX2_CXXFLAGS="-march=core-avx2"
-
- AC_LANG_PUSH([C++])
- save_CXXFLAGS="$CXXFLAGS"
- CXXFLAGS="-std=c++11 $CXXFLAGS"
- AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
- [AC_MSG_ERROR([c++11 compiler support not detected])])
- CXXFLAGS="$save_CXXFLAGS"
-
- save_CXXFLAGS="$CXXFLAGS"
- CXXFLAGS="$AVX_CXXFLAGS $CXXFLAGS"
- AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
- [AC_MSG_ERROR([AVX compiler support not detected])])
- CXXFLAGS="$save_CXXFLAGS"
-
- save_CFLAGS="$CXXFLAGS"
- CXXFLAGS="$AVX2_CXXFLAGS $CXXFLAGS"
- AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
- [AC_MSG_ERROR([AVX2 compiler support not detected])])
- CXXFLAGS="$save_CXXFLAGS"
- AC_LANG_POP([C++])
+ swr_require_cxx_feature_flags "C++11" "__cplusplus >= 201103L" \
+ ",-std=c++11" \
+ SWR_CXX11_CXXFLAGS
+ AC_SUBST([SWR_CXX11_CXXFLAGS])
+
+ swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \
+ ",-mavx,-march=core-avx" \
+ SWR_AVX_CXXFLAGS
+ AC_SUBST([SWR_AVX_CXXFLAGS])
+
+ swr_require_cxx_feature_flags "AVX2" "defined(__AVX2__)" \
+ ",-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2" \
+ SWR_AVX2_CXXFLAGS
+ AC_SUBST([SWR_AVX2_CXXFLAGS])
HAVE_GALLIUM_SWR=yes
;;
@@ -2560,6 +2596,8 @@ fi
AM_CONDITIONAL(HAVE_LIBDRM, test "x$have_libdrm" = xyes)
AM_CONDITIONAL(HAVE_OSMESA, test "x$enable_osmesa" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_OSMESA, test "x$enable_gallium_osmesa" = xyes)
+AM_CONDITIONAL(HAVE_COMMON_OSMESA, test "x$enable_osmesa" = xyes -o \
+ "x$enable_gallium_osmesa" = xyes)
AM_CONDITIONAL(HAVE_X86_ASM, test "x$asm_arch" = xx86 -o "x$asm_arch" = xx86_64)
AM_CONDITIONAL(HAVE_X86_64_ASM, test "x$asm_arch" = xx86_64)
diff --git a/docs/relnotes/12.0.2.html b/docs/relnotes/12.0.2.html
new file mode 100644
index 0000000..385ef08
--- /dev/null
+++ b/docs/relnotes/12.0.2.html
@@ -0,0 +1,403 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.2 Release Notes / September 2, 2016</h1>
+
+<p>
+Mesa 12.0.2 is a bug fix release which fixes bugs found since the 12.0.1 release.
+</p>
+<p>
+Mesa 12.0.2 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3. OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+a08565ab1273751ebe2ffa928cbf785056594c803077c9719d0763da780f2918 mesa-12.0.2.tar.gz
+d957a5cc371dcd7ff2aa0d87492f263aece46f79352f4520039b58b1f32552cb mesa-12.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=69622">Bug 69622</a> - eglTerminate then eglMakeCurrent crahes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89599">Bug 89599</a> - symbol 'x86_64_entry_start' is already defined when building with LLVM/clang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92306">Bug 92306</a> - GL Excess demo renders incorrectly on nv43</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94148">Bug 94148</a> - Framebuffer considered invalid when a draw call is done before glCheckFramebufferStatus</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96274">Bug 96274</a> - [NVC0] Failure when compiling compute shader: Assertion `bb-&gt;getFirst()-&gt;serial &lt;= bb-&gt;getExit()-&gt;serial' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96358">Bug 96358</a> - SSO: wrong interface validation between GS and VS (regresion due to latest gles 3.1)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96381">Bug 96381</a> - Texture artifacts with immutable texture storage and mipmaps</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96762">Bug 96762</a> - [radeonsi,apitrace] Firewatch: nothing rendered in scrollable (text) areas</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96835">Bug 96835</a> - &quot;gallium: Force blend color to 16-byte alignment&quot; crash with &quot;-march=native -O3&quot; causes some 32bit games to crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96850">Bug 96850</a> - Crucible tests fail for 32bit mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96908">Bug 96908</a> - [radeonsi] MSAA causes graphical artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96911">Bug 96911</a> - webgl2 conformance2/textures/misc/tex-mipmap-levels.html crashes 12.1 Intel driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96971">Bug 96971</a> - invariant qualifier is not valid for shader inputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97039">Bug 97039</a> - The Talos Principle and Serious Sam 3 GPU faults</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97207">Bug 97207</a> - [IVY BRIDGE] Fragment shader discard writing to depth</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97214">Bug 97214</a> - X not running with error &quot;Failed to make EGL context current&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97225">Bug 97225</a> - [i965 on HD4600 Haswell] xcom switch to ingame cinematics cause segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97307">Bug 97307</a> - glsl/glcpp/tests/glcpp-test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97331">Bug 97331</a> - glDrawElementsBaseVertex doesn't work in display list on i915</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97351">Bug 97351</a> - DrawElementsBaseVertex with VBO ignores base vertex on Intel GMA 9xx in some cases</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97426">Bug 97426</a> - glScissor gives vertically inverted result</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97476">Bug 97476</a> - Shader binaries should not be stored in the PipelineCache</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97567">Bug 97567</a> - [SNB, ILK] ctl, piglit regressions in mesa 12.0.2rc1</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andreas Boll (1):</p>
+<ul>
+ <li>configure.ac: Use ${datarootdir} for --with-vulkan-icddir help string too</li>
+</ul>
+
+<p>Bernard Kilarski (1):</p>
+<ul>
+ <li>glx: fix error code when there is no context bound</li>
+</ul>
+
+<p>Brian Paul (4):</p>
+<ul>
+ <li>svga: handle mismatched number of samplers, sampler views</li>
+ <li>mesa: use _mesa_clear_texture_image() in clear_texture_fields()</li>
+ <li>swrast: fix incorrectly positioned putImage() in swrast driver</li>
+ <li>mesa: fix format conversion bug in get_tex_rgba_uncompressed()</li>
+</ul>
+
+<p>Chad Versace (2):</p>
+<ul>
+ <li>i965: Fix miptree layout for EGLImage-based renderbuffers</li>
+ <li>i965: Respect miptree offsets in intel_readpixels_tiled_memcpy()</li>
+</ul>
+
+<p>Christian König (1):</p>
+<ul>
+ <li>st/mesa: fix reference counting bug in st_vdpau</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+ <li>swr: Refactor checks for compiler feature flags</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+ <li>mesa: Fix fixed function spot lighting on newer hardware (again)</li>
+</ul>
+
+<p>Dave Airlie (2):</p>
+<ul>
+ <li>anv: fix writemask on blit fragment shader.</li>
+ <li>st/glsl_to_tgsi: fix st_src_reg_for_double constant.</li>
+</ul>
+
+<p>Emil Velikov (15):</p>
+<ul>
+ <li>docs: add sha256 checksums for 12.0.1</li>
+ <li>mesa: automake: list builddir before srcdir</li>
+ <li>mesa: scons: list builddir before srcdir</li>
+ <li>i965: store reference to the context within struct brw_fence (v2)</li>
+ <li>anv: remove internal 'validate' layer</li>
+ <li>anv: automake: use VISIBILITY_CFLAGS to restrict symbol visibility</li>
+ <li>anv: automake: build with -Bsymbolic</li>
+ <li>anv: do not export the Vulkan API</li>
+ <li>anv: remove dummy VK_DEBUG_MARKER_EXT entry points</li>
+ <li>isl: automake: use VISIBILITY_CFLAGS to restrict symbol visibility</li>
+ <li>cherry-ignore: temporary(?) drop "a4xx: make sure to actually clamp depth"</li>
+ <li>i915: Check return value of screen-&gt;image.loader-&gt;getBuffers</li>
+ <li>Revert "i965/miptree: Set logical_depth0 == 6 for cube maps"</li>
+ <li>glx/glvnd: list the strcmp arguments in correct order</li>
+ <li>Update version to 12.0.2</li>
+</ul>
+
+<p>Eric Anholt (4):</p>
+<ul>
+ <li>vc4: Close our screen's fd on screen close.</li>
+ <li>vc4: Disable early Z with computed depth.</li>
+ <li>vc4: Fix a leak of the src[] array of VPM reads in optimization.</li>
+ <li>vc4: Fix leak of the bo_handles table.</li>
+</ul>
+
+<p>Francisco Jerez (3):</p>
+<ul>
+ <li>i965: Emit SKL VF cache invalidation W/A from brw_emit_pipe_control_flush.</li>
+ <li>i965: Make room in the batch epilogue for three more pipe controls.</li>
+ <li>i965: Fix remaining flush vs invalidate race conditions in brw_emit_pipe_control_flush.</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+ <li>platform_android: prevent deadlock in droid_swap_buffers</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+ <li>mesa: Strip arrayness from interface block names in some IO validation</li>
+ <li>glsl: Pack integer and double varyings as flat even if interpolation mode is none</li>
+ <li>glcpp: Track the actual version instead of just the version_resolved flag</li>
+ <li>glcpp: Only disallow #undef of pre-defined macros on GLSL ES &gt;= 3.00 shaders</li>
+ <li>glsl: Mark cube map array sampler types as reserved in GLSL ES 3.10</li>
+</ul>
+
+<p>Ilia Mirkin (16):</p>
+<ul>
+ <li>mesa: etc2 online compression is unsupported, don't attempt it</li>
+ <li>st/mesa: return appropriate mesa format for ETC texture formats</li>
+ <li>mesa: set _NEW_BUFFERS when updating texture bound to current buffers</li>
+ <li>nv50,nvc0: srgb rendering is only available for rgba/bgra</li>
+ <li>vbo: allow DrawElementsBaseVertex in display lists</li>
+ <li>gallium/util: add helper to compute zmin/zmax for a viewport state</li>
+ <li>nv50,nvc0: fix depth range when halfz is enabled</li>
+ <li>nv50/ir: fix bb positions after exit instructions</li>
+ <li>vbo: add basevertex when looking up elements for vbo splitting</li>
+ <li>a4xx: only disable depth clipping, not all clipping, when requested</li>
+ <li>nv50/ir: make sure cfg iterator always hits all blocks</li>
+ <li>main: add missing EXTRA_END in OES_sample_variables get check</li>
+ <li>nouveau: always enable at least one RC</li>
+ <li>nv30: only bail on color/depth bpp mismatch when surfaces are swizzled</li>
+ <li>a4xx: make sure to actually clamp depth as requested</li>
+ <li>gk110/ir: fix quadop dall emission</li>
+</ul>
+
+<p>Jan Ziak (2):</p>
+<ul>
+ <li>egl/x11: avoid using freed memory if dri2 init fails</li>
+ <li>loader: fix memory leak in loader_dri3_open</li>
+</ul>
+
+<p>Jason Ekstrand (31):</p>
+<ul>
+ <li>nir/spirv: Don't multiply the push constant block size by 4</li>
+ <li>anv: Add a stub for CmdCopyQueryPoolResults on Ivy Bridge</li>
+ <li>glsl/types: Fix function type comparison function</li>
+ <li>glsl/types: Use _mesa_hash_data for hashing function types</li>
+ <li>genxml: Make gen6-7 blending look more like gen8</li>
+ <li>anv/pipeline: Unify blend state setup between gen7 and gen8</li>
+ <li>anv: Enable independentBlend on gen7</li>
+ <li>anv: Add an align_down_npot_u32 helper</li>
+ <li>anv: Handle VK_WHOLE_SIZE properly for buffer views</li>
+ <li>i965/miptree: Enforce that height == 1 for 1-D array textures</li>
+ <li>i965/miptree: Set logical_depth0 == 6 for cube maps</li>
+ <li>nir: Add a nir_deref_foreach_leaf helper</li>
+ <li>nir/inline: Constant-initialize local variables in the callee if needed</li>
+ <li>anv/pipeline: Set up point coord enables</li>
+ <li>i965/miptree: Stop multiplying cube depth by 6 in HiZ calculations</li>
+ <li>i965/vec4: Make opt_vector_float reset at the top of each block</li>
+ <li>anv/blit2d: Add a format parameter to bind_dst and create_iview</li>
+ <li>anv/blit2d: Add support for RGB destinations</li>
+ <li>anv/clear: Make cmd_clear_image take an actual VkClearValue</li>
+ <li>anv/clear: Clear E5B9G9R9 images as R32_UINT</li>
+ <li>anv: Include the pipeline layout in the shader hash</li>
+ <li>isl: Allow multisampled array textures</li>
+ <li>anv/descriptor_set: memset anv_descriptor_set_layout</li>
+ <li>anv/pipeline: Fix bind maps for fragment output arrays</li>
+ <li>anv/allocator: Correctly set the number of buckets</li>
+ <li>anv/pipeline: Properly handle OOM during shader compilation</li>
+ <li>anv: Remove unused fields from anv_pipeline_bind_map</li>
+ <li>anv: Add pipeline_has_stage guards a few places</li>
+ <li>anv: Add a struct for storing a compiled shader</li>
+ <li>anv/pipeline: Add support for caching the push constant map</li>
+ <li>anv: Rework pipeline caching</li>
+</ul>
+
+<p>José Fonseca (2):</p>
+<ul>
+ <li>appveyor: Install pywin32 extensions.</li>
+ <li>appveyor: Force Visual Studio 2013 image.</li>
+</ul>
+
+<p>Kenneth Graunke (21):</p>
+<ul>
+ <li>genxml: Add CLIPMODE_* prefix to 3DSTATE_CLIP's "Clip Mode" enum values.</li>
+ <li>genxml: Add APIMODE_D3D missing enum values and improve consistency.</li>
+ <li>anv: Fix near plane clipping on Gen7/7.5.</li>
+ <li>anv: Enable early culling on Gen7.</li>
+ <li>anv: Unify 3DSTATE_CLIP code across generations.</li>
+ <li>genxml: Rename "API Rendering Disable" to "Rendering Disable".</li>
+ <li>anv: Properly call gen75_emit_state_base_address on Haswell.</li>
+ <li>i965: Include VUE handles for GS with invocations &gt; 1.</li>
+ <li>nir: Add a base const_index to shared atomic intrinsics.</li>
+ <li>i965: Fix shared atomic intrinsics to pay attention to base.</li>
+ <li>mesa: Add GL_BGRA_EXT to the list of GenerateMipmap internal formats.</li>
+ <li>mesa: Don't call GenerateMipmap if Width or Height == 0.</li>
+ <li>glsl: Delete bogus ir_set_program_inouts assert.</li>
+ <li>glsl: Fix the program resource names of gl_TessLevelOuter/Inner[].</li>
+ <li>glsl: Fix location bias for patch variables.</li>
+ <li>glsl: Fix invariant matching in GLSL 4.30 and GLSL ES 1.00.</li>
+ <li>mesa: Fix uf10_to_f32() scale factor in the E == 0 and M != 0 case.</li>
+ <li>nir/builder: Add bany_inequal and bany helpers.</li>
+ <li>i965: Implement the WaPreventHSTessLevelsInterference workaround.</li>
+ <li>i965: Fix execution size of scalar TCS barrier setup code.</li>
+ <li>i965: Fix barrier count shift in scalar TCS backend.</li>
+</ul>
+
+<p>Leo Liu (2):</p>
+<ul>
+ <li>st/omx/enc: check uninitialized list from task release</li>
+ <li>vl/dri3: fix a memory leak from front buffer</li>
+</ul>
+
+<p>Marek Olšák (7):</p>
+<ul>
+ <li>glsl_to_tgsi: don't use the negate modifier in integer ops after bitcast</li>
+ <li>radeonsi: add a workaround for a compute VGPR-usage LLVM bug</li>
+ <li>winsys/amdgpu: disallow DCC with mipmaps</li>
+ <li>gallium/util: fix align64</li>
+ <li>radeonsi: only set dual source blending for MRT0</li>
+ <li>radeonsi: fix VM faults due NULL internal const buffers on CIK</li>
+ <li>radeonsi: disable SDMA texture copying on Carrizo</li>
+</ul>
+
+<p>Matt Turner (4):</p>
+<ul>
+ <li>mapi: Massage code to allow clang to compile.</li>
+ <li>i965/vec4: Ignore swizzle of VGRF for use by var_range_end().</li>
+ <li>mesa: Use AC_HEADER_MAJOR to include correct header for major().</li>
+ <li>nir: Walk blocks in source code order in lower_vars_to_ssa.</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+ <li>glx: Don't use current context in __glXSendError</li>
+</ul>
+
+<p>Miklós Máté (1):</p>
+<ul>
+ <li>vbo: set draw_id</li>
+</ul>
+
+<p>Nanley Chery (5):</p>
+<ul>
+ <li>anv/descriptor_set: Fix binding partly undefined descriptor sets</li>
+ <li>isl: Fix assert on raw buffer surface state size</li>
+ <li>anv/device: Fix max buffer range limits</li>
+ <li>isl: Fix isl_tiling_is_any_y()</li>
+ <li>anv/gen7_pipeline: Set PixelShaderKillPixel for discards</li>
+</ul>
+
+<p>Nicolai Hähnle (7):</p>
+<ul>
+ <li>radeonsi: explicitly choose center locations for 1xAA on Polaris</li>
+ <li>radeonsi: fix Polaris MSAA regression</li>
+ <li>radeonsi: ensure sample locations are set for line and polygon smoothing</li>
+ <li>st_glsl_to_tgsi: only skip over slots of an input array that are present</li>
+ <li>glsl: fix optimization of discard nested multiple levels</li>
+ <li>radeonsi: flush TC L2 cache for indirect draw data</li>
+ <li>radeonsi: add si_set_rw_buffer to be used for internal descriptors</li>
+</ul>
+
+<p>Nicolas Boichat (6):</p>
+<ul>
+ <li>egl/dri2: dri2_make_current: Set EGL error if bindContext fails</li>
+ <li>egl/wayland: Set disp-&gt;DriverData to NULL on error</li>
+ <li>egl/surfaceless: Set disp-&gt;DriverData to NULL on error</li>
+ <li>egl/drm: Set disp-&gt;DriverData to NULL on error</li>
+ <li>egl/android: Set dpy-&gt;DriverData to NULL on error</li>
+ <li>egl/dri2: Add reference count for dri2_egl_display</li>
+</ul>
+
+<p>Rob Herring (3):</p>
+<ul>
+ <li>Android: add missing u_math.h include path for libmesa_isl</li>
+ <li>vc4: fix vc4_resource_from_handle() stride calculation</li>
+ <li>vc4: add hash table look-up for exported dmabufs</li>
+</ul>
+
+<p>Samuel Pitoiset (7):</p>
+<ul>
+ <li>nvc0/ir: fix images indirect access on Fermi</li>
+ <li>nvc0: fix the driver cb size when draw parameters are used</li>
+ <li>gm107/ir: add missing NEG modifier for IADD32I</li>
+ <li>gm107/ir: make use of ADD32I for all immediates</li>
+ <li>nvc0: upload sample locations on GM20x</li>
+ <li>nvc0: invalidate textures/samplers on GK104+</li>
+ <li>nv50/ir: always emit the NDV bit for OP_QUADOP</li>
+</ul>
+
+<p>Stefan Dirsch (1):</p>
+<ul>
+ <li>Avoid overflow in 'last' variable of FindGLXFunction(...)</li>
+</ul>
+
+<p>Stencel, Joanna (1):</p>
+<ul>
+ <li>egl/wayland-egl: Fix for segfault in dri2_wl_destroy_surface.</li>
+</ul>
+
+<p>Tim Rowley (2):</p>
+<ul>
+ <li>Revert "gallium: Force blend color to 16-byte alignment"</li>
+ <li>swr: switch from overriding -march to selecting features</li>
+</ul>
+
+<p>Tomasz Figa (8):</p>
+<ul>
+ <li>gallium/dri: Add shared glapi to LIBADD on Android</li>
+ <li>egl/android: Remove unused variables</li>
+ <li>egl/android: Check return value of dri2_get_dri_config()</li>
+ <li>egl/android: Stop leaking DRI images</li>
+ <li>gallium/winsys/kms: Fix double refcount when importing from prime FD (v2)</li>
+ <li>gallium/winsys/kms: Fully initialize kms_sw_dt at prime import time (v2)</li>
+ <li>gallium/winsys/kms: Move display target handle lookup to separate function</li>
+ <li>gallium/winsys/kms: Look up the GEM handle after importing a prime FD</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/12.0.3.html b/docs/relnotes/12.0.3.html
new file mode 100644
index 0000000..70e704b
--- /dev/null
+++ b/docs/relnotes/12.0.3.html
@@ -0,0 +1,71 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.3 Release Notes / September 15, 2016</h1>
+
+<p>
+Mesa 12.0.3 is a bug fix release which fixes bugs found since the 12.0.3 release.
+</p>
+<p>
+Mesa 12.0.3 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3. OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+79abcfab3de30dbd416d1582a3cf6b1be308466231488775f1b7bb43be353602 mesa-12.0.3.tar.gz
+1dc86dd9b51272eee1fad3df65e18cda2e556ef1bc0b6e07cd750b9757f493b1 mesa-12.0.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97781">Bug 97781</a> - [HSW, BYT, IVB] es2-cts.gtf.gl2extensiontests.depth_texture_cube_map.depth_texture_cube_map</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Emil Velikov (3):</p>
+<ul>
+ <li>docs: add sha256 checksums for 12.0.2</li>
+ <li>Revert "i965/miptree: Stop multiplying cube depth by 6 in HiZ calculations"</li>
+ <li>Update version to 12.0.3</li>
+</ul>
+
+<p>José Fonseca (1):</p>
+<ul>
+ <li>appveyor: Update winflexbison download URL.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/12.0.4.html b/docs/relnotes/12.0.4.html
new file mode 100644
index 0000000..eaa9ba5
--- /dev/null
+++ b/docs/relnotes/12.0.4.html
@@ -0,0 +1,321 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Mesa Release Notes</title>
+ <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+ <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.4 Release Notes / November 10, 2016</h1>
+
+<p>
+Mesa 12.0.4 is a bug fix release which fixes bugs found since the 12.0.4 release.
+</p>
+<p>
+Mesa 12.0.4 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3. OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+22026ce4f1c6a7908b0d10ff057decec0a5633afe7f38a0cef5c08d0689f02a6 mesa-12.0.4.tar.gz
+5d6003da867d3f54e5000b4acdfc37e6cce5b6a4459274fdad73e24bd2f0065e mesa-12.0.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71759">Bug 71759</a> - Intel driver fails with &quot;intel_do_flush_locked failed: No such file or directory&quot; if buffer imported with EGL_NATIVE_PIXMAP_KHR</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94354">Bug 94354</a> - R9285 Unigine Valley perf regression since radeonsi: use re-Z</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96770">Bug 96770</a> - include/GL/mesa_glinterop.h:62: error: redefinition of typedef ‘GLXContext’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97233">Bug 97233</a> - vkQuake VkSpecializationMapEntry related bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97260">Bug 97260</a> - R9 290 low performance in Linux 4.7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97549">Bug 97549</a> - [SNB, BXT] up to 40% perf drop from &quot;loader/dri3: Overhaul dri3_update_num_back&quot; commit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97887">Bug 97887</a> - llvm segfault in janusvr -render vive</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98025">Bug 98025</a> - [radeonsi] incorrect primitive restart index used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98134">Bug 98134</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.draw_buffers wants a different GL error code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98326">Bug 98326</a> - [dEQP, EGL] pbuffer depth/stencil tests fail</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Axel Davy (4):</p>
+<ul>
+ <li>gallium/util: Really allow aliasing of dst for u_box_union_*</li>
+ <li>st/nine: Fix the calculation of the number of vs inputs</li>
+ <li>st/nine: Fix mistake in Volume9 UnlockBox</li>
+ <li>st/nine: Fix locking CubeTexture surfaces.</li>
+</ul>
+
+<p>Brendan King (1):</p>
+<ul>
+ <li>configure.ac: fix the name of the Wayland Scanner pc file</li>
+</ul>
+
+<p>Brian Paul (1):</p>
+<ul>
+ <li>st/mesa: fix swizzle issue in st_create_sampler_view_from_stobj()</li>
+</ul>
+
+<p>Chad Versace (3):</p>
+<ul>
+ <li>egl: Fix truncation error in _eglParseSyncAttribList64</li>
+ <li>i965/sync: Fix uninitalized usage and leak of mutex</li>
+ <li>egl: Don't advertise unsupported platform extensions</li>
+</ul>
+
+<p>Chuanbo Weng (1):</p>
+<ul>
+ <li>gbm: fix potential NULL deref of mapImage/unmapImage.</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+ <li>autoconf: Make header install distinct for various APIs (v2)</li>
+</ul>
+
+<p>Dave Airlie (3):</p>
+<ul>
+ <li>anv: initialise and increment send_sbc</li>
+ <li>anv/wsi: fix apps that acquire multiple images up front</li>
+ <li>Revert "st/vdpau: use linear layout for output surfaces"</li>
+</ul>
+
+<p>Emil Velikov (12):</p>
+<ul>
+ <li>docs: add sha256 checksums for 12.0.3</li>
+ <li>cherry-ignore: add non-applicable i965 commit</li>
+ <li>cherry-ignore: add vaapi encode fix</li>
+ <li>cherry-ignore: add EGL_KHR_debug fix</li>
+ <li>cherry-ignore: add update_renderbuffer_read_surfaces()</li>
+ <li>isl/gen6: correctly check msaa layout samples count</li>
+ <li>egl/x11: don't crash if dri2_dpy-&gt;conn is NULL</li>
+ <li>get-pick-list.sh: Require explicit "12.0" for nominating stable patches</li>
+ <li>automake: don't forget to pick wglext.h in the tarball</li>
+ <li>cherry-ignore: add N/A EGL revert</li>
+ <li>cherry-ignore: add ClientWaitSync fixes</li>
+ <li>Update version to 12.0.4</li>
+</ul>
+
+<p>Eric Anholt (5):</p>
+<ul>
+ <li>travis: Parse configure.ac to pick an updated LIBDRM_VERSION.</li>
+ <li>travis: Update to the Ubuntu Trusty image.</li>
+ <li>travis: Enable vc4 in libdrm to satisfy vc4 test build dependency.</li>
+ <li>travis: Upgrade LLVM dependency to 3.5 and enable LLVM drivers.</li>
+ <li>gallium: Fix install-gallium-links.mk on non-bash /bin/sh</li>
+</ul>
+
+<p>Hans de Goede (1):</p>
+<ul>
+ <li>pipe_loader_sw: Fix fd leak when instantiated via pipe_loader_sw_probe_kms</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+ <li>glsl: Fix cut-and-paste bug in hierarchical visitor ir_expression::accept</li>
+</ul>
+
+<p>Ilia Mirkin (16):</p>
+<ul>
+ <li>nv30: set usage to staging so that the buffer is allocated in GART</li>
+ <li>a3xx: make sure to actually clamp depth as requested</li>
+ <li>a3xx: make use of software clipping when hw can't handle it</li>
+ <li>a3xx: use window scissor to simulate viewport xy clip</li>
+ <li>main: GL_RGB10_A2UI does not come with GL 3.0/EXT_texture_integer</li>
+ <li>mesa/formatquery: limit ES target support, fix core context support</li>
+ <li>nir: fix definition of pack_uvec2_to_uint</li>
+ <li>gm107/ir: AL2P writes to a predicate register</li>
+ <li>st/mesa: fix is_scissor_enabled when X/Y are negative</li>
+ <li>nvc0/ir: fix overwriting of value backing non-constant gather offset</li>
+ <li>nv50/ir: copy over value's register id when resolving merge of a phi</li>
+ <li>nvc0/ir: fix textureGather with a single offset</li>
+ <li>gm107/ir: fix texturing with indirect samplers</li>
+ <li>gm107/ir: fix bit offset of tex lod setting for indirect texturing</li>
+ <li>nv50,nvc0: avoid reading out of bounds when getting bogus so info</li>
+ <li>nv50/ir: process texture offset sources as regular sources</li>
+</ul>
+
+<p>James Legg (1):</p>
+<ul>
+ <li>radeonsi: Fix primitive restart when index changes</li>
+</ul>
+
+<p>Jason Ekstrand (9):</p>
+<ul>
+ <li>nir/spirv: Swap the argument order for AtomicCompareExchange</li>
+ <li>nir/spirv: Use the correct sources for CompareExchange on images</li>
+ <li>nir/spirv: Break variable decoration handling into a helper</li>
+ <li>nir/spirv: Refactor variable deocration handling</li>
+ <li>nir/spirv/cfg: Handle switches whose break block is a loop continue</li>
+ <li>nir/spirv/cfg: Detect switch_break after loop_break/continue</li>
+ <li>nir: Add a nop intrinsic</li>
+ <li>nir/spirv/cfg: Use a nop intrinsic for tagging the ends of blocks</li>
+ <li>intel/blorp: Rework our usage of ralloc when compiling shaders</li>
+</ul>
+
+<p>Jonathan Gray (3):</p>
+<ul>
+ <li>genxml: add generated headers to EXTRA_DIST</li>
+ <li>mapi: automake: set VISIBILITY_CFLAGS for shared glapi</li>
+ <li>mesa: automake: include mesa_glinterop.h in distfile</li>
+</ul>
+
+<p>Julien Isorce (1):</p>
+<ul>
+ <li>st/va: also honors interlaced preference when providing a video format</li>
+</ul>
+
+<p>Kenneth Graunke (8):</p>
+<ul>
+ <li>nir: Call nir_metadata_preserve from nir_lower_alu_to_scalar().</li>
+ <li>mesa: Expose RESET_NOTIFICATION_STRATEGY with KHR_robustness.</li>
+ <li>i965: Fix missing _NEW_TRANSFORM in Gen8+ 3DSTATE_DS atom.</li>
+ <li>i965: Add missing BRW_NEW_VS_PROG_DATA to 3DSTATE_CLIP.</li>
+ <li>i965: Move BRW_NEW_FRAGMENT_PROGRAM from 3DSTATE_PS to PS_EXTRA.</li>
+ <li>i965: Add missing BRW_NEW_CS_PROG_DATA to compute constant atom.</li>
+ <li>i965: Add missing BRW_CS_PROG_DATA to CS work group surface atom.</li>
+ <li>i965: Fix gl_InvocationID in dual object GS where invocations == 1.</li>
+</ul>
+
+<p>Marek Olšák (12):</p>
+<ul>
+ <li>radeonsi: fix cubemaps viewed as 2D</li>
+ <li>radeonsi: take compute shader and dispatch indirect memory usage into account</li>
+ <li>radeonsi: fix FP64 UBO loads with indirect uniform block indexing</li>
+ <li>mesa: fix glGetFramebufferAttachmentParameteriv w/ on-demand FRONT_BACK alloc</li>
+ <li>radeonsi: fix interpolateAt opcodes for .zw components</li>
+ <li>radeonsi: fix texture border colors for compute shaders</li>
+ <li>radeonsi: disable ReZ</li>
+ <li>gallium/radeon: make sure the address of separate CMASK is aligned properly</li>
+ <li>winsys/amdgpu: fix radeon_surf::macro_tile_index for imported textures</li>
+ <li>egl: use util/macros.h</li>
+ <li>egl: make interop ABI visible again</li>
+ <li>glx: make interop ABI visible again</li>
+</ul>
+
+<p>Mario Kleiner (1):</p>
+<ul>
+ <li>glx: Perform check for valid fbconfig against proper X-Screen.</li>
+</ul>
+
+<p>Martin Peres (2):</p>
+<ul>
+ <li>loader/dri3: add get_dri_screen() to the vtable</li>
+ <li>loader/dri3: import prime buffers in the currently-bound screen</li>
+</ul>
+
+<p>Matt Whitlock (5):</p>
+<ul>
+ <li>egl/android: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+ <li>gallium/auxiliary: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+ <li>st/dri: replace calls to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+ <li>st/xa: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+ <li>gallium/winsys: replace calls to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+</ul>
+
+<p>Max Staudt (1):</p>
+<ul>
+ <li>r300g: Set R300_VAP_CNTL on RSxxx to avoid triangle flickering</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+ <li>loader/dri3: Overhaul dri3_update_num_back</li>
+</ul>
+
+<p>Nicholas Bishop (2):</p>
+<ul>
+ <li>gbm: return appropriate error when queryImage() fails</li>
+ <li>st/dri: check pipe_screen-&gt;resource_get_handle() return value</li>
+</ul>
+
+<p>Nicolai Hähnle (10):</p>
+<ul>
+ <li>gallium/radeon: cleanup and fix branch emits</li>
+ <li>st/glsl_to_tgsi: disable on-the-fly peephole for 64-bit operations</li>
+ <li>st/glsl_to_tgsi: simplify translate_tex_offset</li>
+ <li>st/glsl_to_tgsi: fix textureGatherOffset with indirectly loaded offsets</li>
+ <li>st/mesa: fix vertex elements setup for doubles</li>
+ <li>radeonsi: fix indirect loads of 64 bit constants</li>
+ <li>st/glsl_to_tgsi: fix atomic counter addressing</li>
+ <li>st/glsl_to_tgsi: fix block copies of arrays of doubles</li>
+ <li>st/mesa: only set primitive_restart when the restart index is in range</li>
+ <li>radeonsi: fix 64-bit loads from LDS</li>
+</ul>
+
+<p>Samuel Pitoiset (4):</p>
+<ul>
+ <li>nvc0/ir: fix subops for IMAD</li>
+ <li>gk110/ir: fix wrong emission of OP_NOT</li>
+ <li>nvc0: use correct bufctx when invalidating CP textures</li>
+ <li>nvc0/ir: fix emission of IMAD with NEG modifiers</li>
+</ul>
+
+<p>Stencel, Joanna (1):</p>
+<ul>
+ <li>egl/wayland: add missing destroy_window callback</li>
+</ul>
+
+<p>Tapani Pälli (5):</p>
+<ul>
+ <li>egl: stop claiming support for pbuffer + msaa</li>
+ <li>egl/dri2: set max values for pbuffer width and height</li>
+ <li>egl: add check that eglCreateContext gets a valid config</li>
+ <li>mesa: fix error handling in DrawBuffers</li>
+ <li>egl: set preserved behavior for surface only if config supports it</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+ <li>configure.ac: add llvm inteljitevents component if enabled</li>
+</ul>
+
+<p>Vedran Miletić (1):</p>
+<ul>
+ <li>clover: Fix build against clang SVN &gt;= r273191</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+ <li>Revert "mesa_glinterop: remove inclusion of GLX header"</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/include/GL/mesa_glinterop.h b/include/GL/mesa_glinterop.h
index c0c20d6..0b373c1 100644
--- a/include/GL/mesa_glinterop.h
+++ b/include/GL/mesa_glinterop.h
@@ -58,8 +58,8 @@ extern "C" {
#endif
/* Forward declarations to avoid inclusion of GL/glx.h */
-typedef struct _XDisplay Display;
-typedef struct __GLXcontextRec *GLXContext;
+struct _XDisplay;
+struct __GLXcontextRec;
/* Forward declarations to avoid inclusion of EGL/egl.h */
typedef void *EGLDisplay;
@@ -246,7 +246,7 @@ struct mesa_glinterop_export_out {
* \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error
*/
int
-MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context,
+MesaGLInteropGLXQueryDeviceInfo(struct _XDisplay *dpy, struct __GLXcontextRec *context,
struct mesa_glinterop_device_info *out);
@@ -271,7 +271,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
* \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error
*/
int
-MesaGLInteropGLXExportObject(Display *dpy, GLXContext context,
+MesaGLInteropGLXExportObject(struct _XDisplay *dpy, struct __GLXcontextRec *context,
struct mesa_glinterop_export_in *in,
struct mesa_glinterop_export_out *out);
@@ -286,11 +286,11 @@ MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context,
struct mesa_glinterop_export_out *out);
-typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(Display *dpy, GLXContext context,
+typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(struct _XDisplay *dpy, struct __GLXcontextRec *context,
struct mesa_glinterop_device_info *out);
typedef int (PFNMESAGLINTEROPEGLQUERYDEVICEINFOPROC)(EGLDisplay dpy, EGLContext context,
struct mesa_glinterop_device_info *out);
-typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(Display *dpy, GLXContext context,
+typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(struct _XDisplay *dpy, struct __GLXcontextRec *context,
struct mesa_glinterop_export_in *in,
struct mesa_glinterop_export_out *out);
typedef int (PFNMESAGLINTEROPEGLEXPORTOBJECTPROC)(EGLDisplay dpy, EGLContext context,
diff --git a/install-gallium-links.mk b/install-gallium-links.mk
index ac5a499..fc2f75d 100644
--- a/install-gallium-links.mk
+++ b/install-gallium-links.mk
@@ -13,8 +13,8 @@ all-local : .install-gallium-links
fi; \
$(MKDIR_P) $$link_dir; \
file_list="$(dri_LTLIBRARIES:%.la=.libs/%.so)"; \
- file_list+="$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
- file_list+="$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
+ file_list="$$file_list$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
+ file_list="$$file_list$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
for f in $$file_list; do \
if test -h .libs/$$f; then \
cp -d $$f $$link_dir; \
diff --git a/src/Makefile.am b/src/Makefile.am
index b130f5b..c0aa115 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -47,6 +47,30 @@ CLEANFILES = $(BUILT_SOURCES)
SUBDIRS = . gtest util mapi/glapi/gen mapi
+if HAVE_OPENGL
+gldir = $(includedir)/GL
+gl_HEADERS = \
+ $(top_srcdir)/include/GL/gl.h \
+ $(top_srcdir)/include/GL/glext.h \
+ $(top_srcdir)/include/GL/glcorearb.h \
+ $(top_srcdir)/include/GL/gl_mangle.h
+endif
+
+if HAVE_GLX
+glxdir = $(includedir)/GL
+glx_HEADERS = \
+ $(top_srcdir)/include/GL/glx.h \
+ $(top_srcdir)/include/GL/glxext.h \
+ $(top_srcdir)/include/GL/glx_mangle.h
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = mesa/gl.pc
+endif
+
+if HAVE_COMMON_OSMESA
+osmesadir = $(includedir)/GL
+osmesa_HEADERS = $(top_srcdir)/include/GL/osmesa.h
+endif
+
# include only conditionally ?
SUBDIRS += compiler
@@ -93,7 +117,8 @@ SUBDIRS += gallium
endif
EXTRA_DIST = \
- getopt hgl SConscript
+ getopt hgl SConscript \
+ $(top_srcdir)/include/GL/mesa_glinterop.h
AM_CFLAGS = $(VISIBILITY_CFLAGS)
AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y
index 4022727..68544ae 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -278,10 +278,34 @@ control_line_success:
HASH_TOKEN DEFINE_TOKEN define
| HASH_TOKEN UNDEF IDENTIFIER NEWLINE {
macro_t *macro;
- if (strcmp("__LINE__", $3) == 0
- || strcmp("__FILE__", $3) == 0
- || strcmp("__VERSION__", $3) == 0
- || strncmp("GL_", $3, 3) == 0)
+
+ /* Section 3.4 (Preprocessor) of the GLSL ES 3.00 spec says:
+ *
+ * It is an error to undefine or to redefine a built-in
+ * (pre-defined) macro name.
+ *
+ * The GLSL ES 1.00 spec does not contain this text.
+ *
+ * Section 3.3 (Preprocessor) of the GLSL 1.30 spec says:
+ *
+ * #define and #undef functionality are defined as is
+ * standard for C++ preprocessors for macro definitions
+ * both with and without macro parameters.
+ *
+ * At least as far as I can tell GCC allow '#undef __FILE__'.
+ * Furthermore, there are desktop OpenGL conformance tests
+ * that expect '#undef __VERSION__' and '#undef
+ * GL_core_profile' to work.
+ *
+ * Only disallow #undef of pre-defined macros on GLSL ES >=
+ * 3.00 shaders.
+ */
+ if (parser->is_gles &&
+ parser->version >= 300 &&
+ (strcmp("__LINE__", $3) == 0
+ || strcmp("__FILE__", $3) == 0
+ || strcmp("__VERSION__", $3) == 0
+ || strncmp("GL_", $3, 3) == 0))
glcpp_error(& @1, parser, "Built-in (pre-defined)"
" macro names cannot be undefined.");
@@ -396,13 +420,13 @@ control_line_success:
_glcpp_parser_skip_stack_pop (parser, & @1);
} NEWLINE
| HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE {
- if (parser->version_resolved) {
+ if (parser->version != 0) {
glcpp_error(& @1, parser, "#version must appear on the first line");
}
_glcpp_parser_handle_version_declaration(parser, $3, NULL, true);
}
| HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE {
- if (parser->version_resolved) {
+ if (parser->version != 0) {
glcpp_error(& @1, parser, "#version must appear on the first line");
}
_glcpp_parser_handle_version_declaration(parser, $3, $4, true);
@@ -1345,7 +1369,7 @@ glcpp_parser_create(const struct gl_extensions *extensions, gl_api api)
parser->extensions = extensions;
parser->api = api;
- parser->version_resolved = false;
+ parser->version = 0;
parser->has_new_line_number = 0;
parser->new_line_number = 1;
@@ -2281,10 +2305,10 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
{
const struct gl_extensions *extensions = parser->extensions;
- if (parser->version_resolved)
+ if (parser->version != 0)
return;
- parser->version_resolved = true;
+ parser->version = version;
add_builtin_define (parser, "__VERSION__", version);
diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h
index d87e6b7..06f3521 100644
--- a/src/compiler/glsl/glcpp/glcpp.h
+++ b/src/compiler/glsl/glcpp/glcpp.h
@@ -196,7 +196,7 @@ struct glcpp_parser {
int error;
const struct gl_extensions *extensions;
gl_api api;
- bool version_resolved;
+ unsigned version;
bool has_new_line_number;
int new_line_number;
bool has_new_source_number;
diff --git a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c
index 49e7696..f8ade19 100644
--- a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c
+++ b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c
@@ -1,3 +1,4 @@
+#version 300 es
#undef __LINE__
#undef __FILE__
#undef __VERSION__
diff --git a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected
index 3b736df..498dc0f 100644
--- a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected
+++ b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected
@@ -1,6 +1,7 @@
-0:1(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
0:2(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
0:3(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
+0:4(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
+#version 300 es
diff --git a/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c
new file mode 100644
index 0000000..e3af10d
--- /dev/null
+++ b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c
@@ -0,0 +1,4 @@
+#version 110
+#undef __LINE__
+#undef __FILE__
+#undef __VERSION__
diff --git a/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected
new file mode 100644
index 0000000..cd0071f
--- /dev/null
+++ b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected
@@ -0,0 +1,4 @@
+#version 110
+
+
+
diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll
index 11711ee..c31958b 100644
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -348,10 +348,10 @@ isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_mul
usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY);
/* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */
-samplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY);
-isamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY);
-usamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY);
-samplerCubeArrayShadow KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW);
+samplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY);
+isamplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY);
+usamplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY);
+samplerCubeArrayShadow KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW);
samplerExternalOES {
if (yyextra->OES_EGL_image_external_enable)
diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy
index 3885688..c72f119 100644
--- a/src/compiler/glsl/glsl_parser.yy
+++ b/src/compiler/glsl/glsl_parser.yy
@@ -1784,8 +1784,10 @@ type_qualifier:
* variables. As only outputs can be declared as invariant, an invariant
* output from one shader stage will still match an input of a subsequent
* stage without the input being declared as invariant."
+ *
+ * On the desktop side, this text first appears in GLSL 4.30.
*/
- if (state->es_shader && state->language_version >= 300 && $$.flags.q.in)
+ if (state->is_version(430, 300) && $$.flags.q.in)
_mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs");
}
| interpolation_qualifier type_qualifier
diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index 93716c4..3809270 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -586,6 +586,13 @@ public:
return this->u.state_slots;
}
+ inline bool is_interpolation_flat() const
+ {
+ return this->data.interpolation == INTERP_QUALIFIER_FLAT ||
+ this->type->contains_integer() ||
+ this->type->contains_double();
+ }
+
inline bool is_name_ralloced() const
{
return this->name != ir_variable::tmp_name;
diff --git a/src/compiler/glsl/ir_hv_accept.cpp b/src/compiler/glsl/ir_hv_accept.cpp
index 213992a..5cc6a34 100644
--- a/src/compiler/glsl/ir_hv_accept.cpp
+++ b/src/compiler/glsl/ir_hv_accept.cpp
@@ -147,7 +147,7 @@ ir_expression::accept(ir_hierarchical_visitor *v)
goto done;
case visit_stop:
- return s;
+ return visit_stop;
}
}
diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp b/src/compiler/glsl/ir_set_program_inouts.cpp
index 183b13b..bca1e0a 100644
--- a/src/compiler/glsl/ir_set_program_inouts.cpp
+++ b/src/compiler/glsl/ir_set_program_inouts.cpp
@@ -260,15 +260,19 @@ ir_set_program_inouts_visitor::try_mark_partial_variable(ir_variable *var,
* lowering passes (do_vec_index_to_swizzle() gets rid of indexing into
* vectors, and lower_packed_varyings() gets rid of structs that occur in
* varyings).
+ *
+ * However, we don't use varying packing in all cases - tessellation
+ * shaders bypass it. This means we'll see varying structs and arrays
+ * of structs here. For now, we just give up so the caller marks the
+ * entire variable as used.
*/
if (!(type->is_matrix() ||
(type->is_array() &&
(type->fields.array->is_numeric() ||
type->fields.array->is_boolean())))) {
- assert(!"Unexpected indexing in ir_set_program_inouts");
- /* For safety in release builds, in case we ever encounter unexpected
- * indexing, give up and let the caller mark the whole variable as used.
+ /* If we don't know how to handle this case, give up and let the
+ * caller mark the whole variable as used.
*/
return false;
}
diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index 5a5adc0..ddf6aa2 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -308,7 +308,25 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
return;
}
- if (!prog->IsES && input->data.invariant != output->data.invariant) {
+ /* The GLSL 4.30 and GLSL ES 3.00 specifications say:
+ *
+ * "As only outputs need be declared with invariant, an output from
+ * one shader stage will still match an input of a subsequent stage
+ * without the input being declared as invariant."
+ *
+ * while GLSL 4.20 says:
+ *
+ * "For variables leaving one shader and coming into another shader,
+ * the invariant keyword has to be used in both shaders, or a link
+ * error will result."
+ *
+ * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
+ *
+ * "The invariance of varyings that are declared in both the vertex
+ * and fragment shaders must match."
+ */
+ if (input->data.invariant != output->data.invariant &&
+ prog->Version < (prog->IsES ? 300 : 430)) {
linker_error(prog,
"%s shader output `%s' %s invariant qualifier, "
"but %s shader input %s invariant qualifier\n",
@@ -1610,7 +1628,8 @@ varying_matches::compute_packing_class(const ir_variable *var)
unsigned packing_class = var->data.centroid | (var->data.sample << 1) |
(var->data.patch << 2);
packing_class *= 4;
- packing_class += var->data.interpolation;
+ packing_class += var->is_interpolation_flat()
+ ? unsigned(INTERP_QUALIFIER_FLAT) : var->data.interpolation;
return packing_class;
}
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 6379ed2..02b3e00 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -3687,6 +3687,18 @@ create_shader_variable(struct gl_shader_program *shProg,
if (in->data.mode == ir_var_system_value &&
in->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
out->name = ralloc_strdup(shProg, "gl_VertexID");
+ } else if ((in->data.mode == ir_var_shader_out &&
+ in->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) ||
+ (in->data.mode == ir_var_system_value &&
+ in->data.location == SYSTEM_VALUE_TESS_LEVEL_OUTER)) {
+ out->name = ralloc_strdup(shProg, "gl_TessLevelOuter");
+ type = glsl_type::get_array_instance(glsl_type::float_type, 4);
+ } else if ((in->data.mode == ir_var_shader_out &&
+ in->data.location == VARYING_SLOT_TESS_LEVEL_INNER) ||
+ (in->data.mode == ir_var_system_value &&
+ in->data.location == SYSTEM_VALUE_TESS_LEVEL_INNER)) {
+ out->name = ralloc_strdup(shProg, "gl_TessLevelInner");
+ type = glsl_type::get_array_instance(glsl_type::float_type, 2);
} else {
out->name = ralloc_strdup(shProg, name);
}
@@ -3839,6 +3851,9 @@ add_interface_variables(struct gl_shader_program *shProg,
continue;
};
+ if (var->data.patch)
+ loc_bias = int(VARYING_SLOT_PATCH0);
+
/* Skip packed varyings, packed varyings are handled separately
* by add_packed_varyings.
*/
diff --git a/src/compiler/glsl/lower_packed_varyings.cpp b/src/compiler/glsl/lower_packed_varyings.cpp
index 41edada..1e7a8c2 100644
--- a/src/compiler/glsl/lower_packed_varyings.cpp
+++ b/src/compiler/glsl/lower_packed_varyings.cpp
@@ -273,11 +273,11 @@ lower_packed_varyings_visitor::run(struct gl_shader *shader)
continue;
/* This lowering pass is only capable of packing floats and ints
- * together when their interpolation mode is "flat". Therefore, to be
- * safe, caller should ensure that integral varyings always use flat
- * interpolation, even when this is not required by GLSL.
+ * together when their interpolation mode is "flat". Treat integers as
+ * being flat when the interpolation mode is none.
*/
assert(var->data.interpolation == INTERP_QUALIFIER_FLAT ||
+ var->data.interpolation == INTERP_QUALIFIER_NONE ||
!var->type->contains_integer());
/* Clone the variable for program resource list before
@@ -607,7 +607,7 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
if (this->packed_varyings[slot] == NULL) {
char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name);
const glsl_type *packed_type;
- if (unpacked_var->data.interpolation == INTERP_QUALIFIER_FLAT)
+ if (unpacked_var->is_interpolation_flat())
packed_type = glsl_type::ivec4_type;
else
packed_type = glsl_type::vec4_type;
@@ -627,7 +627,8 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
packed_var->data.centroid = unpacked_var->data.centroid;
packed_var->data.sample = unpacked_var->data.sample;
packed_var->data.patch = unpacked_var->data.patch;
- packed_var->data.interpolation = unpacked_var->data.interpolation;
+ packed_var->data.interpolation = packed_type == glsl_type::ivec4_type
+ ? unsigned(INTERP_QUALIFIER_FLAT) : unpacked_var->data.interpolation;
packed_var->data.location = location;
packed_var->data.precision = unpacked_var->data.precision;
packed_var->data.always_active_io = unpacked_var->data.always_active_io;
diff --git a/src/compiler/glsl/opt_conditional_discard.cpp b/src/compiler/glsl/opt_conditional_discard.cpp
index 1ca8803..a27bead 100644
--- a/src/compiler/glsl/opt_conditional_discard.cpp
+++ b/src/compiler/glsl/opt_conditional_discard.cpp
@@ -72,7 +72,14 @@ opt_conditional_discard_visitor::visit_leave(ir_if *ir)
/* Move the condition and replace the ir_if with the ir_discard. */
ir_discard *discard = (ir_discard *) ir->then_instructions.head;
- discard->condition = ir->condition;
+ if (!discard->condition)
+ discard->condition = ir->condition;
+ else {
+ void *ctx = ralloc_parent(ir);
+ discard->condition = new(ctx) ir_expression(ir_binop_logic_and,
+ ir->condition,
+ discard->condition);
+ }
ir->replace_with(discard);
progress = true;
diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 11f1e85..83ce35e 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -1079,7 +1079,7 @@ function_key_compare(const void *a, const void *b)
const glsl_type *const key2 = (glsl_type *) b;
if (key1->length != key2->length)
- return 1;
+ return false;
return memcmp(key1->fields.parameters, key2->fields.parameters,
(key1->length + 1) * sizeof(*key1->fields.parameters)) == 0;
@@ -1090,20 +1090,8 @@ static uint32_t
function_key_hash(const void *a)
{
const glsl_type *const key = (glsl_type *) a;
- char hash_key[128];
- unsigned size = 0;
-
- size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length);
-
- for (unsigned i = 0; i < key->length; i++) {
- if (size >= sizeof(hash_key))
- break;
-
- size += snprintf(& hash_key[size], sizeof(hash_key) - size,
- "%p", (void *) key->fields.structure[i].type);
- }
-
- return _mesa_hash_string(hash_key);
+ return _mesa_hash_data(key->fields.parameters,
+ (key->length + 1) * sizeof(*key->fields.parameters));
}
const glsl_type *
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 3c8b4e0..158ccc1 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -659,6 +659,122 @@ nir_copy_deref(void *mem_ctx, nir_deref *deref)
return NULL;
}
+/* This is the second step in the recursion. We've found the tail and made a
+ * copy. Now we need to iterate over all possible leaves and call the
+ * callback on each one.
+ */
+static bool
+deref_foreach_leaf_build_recur(nir_deref_var *deref, nir_deref *tail,
+ nir_deref_foreach_leaf_cb cb, void *state)
+{
+ unsigned length;
+ union {
+ nir_deref_array arr;
+ nir_deref_struct str;
+ } tmp;
+
+ assert(tail->child == NULL);
+ switch (glsl_get_base_type(tail->type)) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_BOOL:
+ if (glsl_type_is_vector_or_scalar(tail->type))
+ return cb(deref, state);
+ /* Fall Through */
+
+ case GLSL_TYPE_ARRAY:
+ tmp.arr.deref.deref_type = nir_deref_type_array;
+ tmp.arr.deref.type = glsl_get_array_element(tail->type);
+ tmp.arr.deref_array_type = nir_deref_array_type_direct;
+ tmp.arr.indirect = NIR_SRC_INIT;
+ tail->child = &tmp.arr.deref;
+
+ length = glsl_get_length(tail->type);
+ for (unsigned i = 0; i < length; i++) {
+ tmp.arr.deref.child = NULL;
+ tmp.arr.base_offset = i;
+ if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state))
+ return false;
+ }
+ return true;
+
+ case GLSL_TYPE_STRUCT:
+ tmp.str.deref.deref_type = nir_deref_type_struct;
+ tail->child = &tmp.str.deref;
+
+ length = glsl_get_length(tail->type);
+ for (unsigned i = 0; i < length; i++) {
+ tmp.arr.deref.child = NULL;
+ tmp.str.deref.type = glsl_get_struct_field(tail->type, i);
+ tmp.str.index = i;
+ if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state))
+ return false;
+ }
+ return true;
+
+ default:
+ unreachable("Invalid type for dereference");
+ }
+}
+
+/* This is the first step of the foreach_leaf recursion. In this step we are
+ * walking to the end of the deref chain and making a copy in the stack as we
+ * go. This is because we don't want to mutate the deref chain that was
+ * passed in by the caller. The downside is that this deref chain is on the
+ * stack and , if the caller wants to do anything with it, they will have to
+ * make their own copy because this one will go away.
+ */
+static bool
+deref_foreach_leaf_copy_recur(nir_deref_var *deref, nir_deref *tail,
+ nir_deref_foreach_leaf_cb cb, void *state)
+{
+ union {
+ nir_deref_array arr;
+ nir_deref_struct str;
+ } c;
+
+ if (tail->child) {
+ switch (tail->child->deref_type) {
+ case nir_deref_type_array:
+ c.arr = *nir_deref_as_array(tail->child);
+ tail->child = &c.arr.deref;
+ return deref_foreach_leaf_copy_recur(deref, &c.arr.deref, cb, state);
+
+ case nir_deref_type_struct:
+ c.str = *nir_deref_as_struct(tail->child);
+ tail->child = &c.str.deref;
+ return deref_foreach_leaf_copy_recur(deref, &c.str.deref, cb, state);
+
+ case nir_deref_type_var:
+ default:
+ unreachable("Invalid deref type for a child");
+ }
+ } else {
+ /* We've gotten to the end of the original deref. Time to start
+ * building our own derefs.
+ */
+ return deref_foreach_leaf_build_recur(deref, tail, cb, state);
+ }
+}
+
+/**
+ * This function iterates over all of the possible derefs that can be created
+ * with the given deref as the head. It then calls the provided callback with
+ * a full deref for each one.
+ *
+ * The deref passed to the callback will be allocated on the stack. You will
+ * need to make a copy if you want it to hang around.
+ */
+bool
+nir_deref_foreach_leaf(nir_deref_var *deref,
+ nir_deref_foreach_leaf_cb cb, void *state)
+{
+ nir_deref_var copy = *deref;
+ return deref_foreach_leaf_copy_recur(&copy, &copy.deref, cb, state);
+}
+
/* Returns a load_const instruction that represents the constant
* initializer for the given deref chain. The caller is responsible for
* ensuring that there actually is a constant initializer.
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9816ed6..dc03918 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1923,6 +1923,10 @@ nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index);
nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref);
+typedef bool (*nir_deref_foreach_leaf_cb)(nir_deref_var *deref, void *state);
+bool nir_deref_foreach_leaf(nir_deref_var *deref,
+ nir_deref_foreach_leaf_cb cb, void *state);
+
nir_load_const_instr *
nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref);
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 09cdf72..7497efc 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -318,6 +318,25 @@ nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
}
static inline nir_ssa_def *
+nir_bany_inequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1)
+{
+ switch (src0->num_components) {
+ case 1: return nir_ine(b, src0, src1);
+ case 2: return nir_bany_inequal2(b, src0, src1);
+ case 3: return nir_bany_inequal3(b, src0, src1);
+ case 4: return nir_bany_inequal4(b, src0, src1);
+ default:
+ unreachable("bad component size");
+ }
+}
+
+static inline nir_ssa_def *
+nir_bany(nir_builder *b, nir_ssa_def *src)
+{
+ return nir_bany_inequal(b, src, nir_imm_int(b, 0));
+}
+
+static inline nir_ssa_def *
nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
{
unsigned swizzle[4] = {c, c, c, c};
diff --git a/src/compiler/nir/nir_inline_functions.c b/src/compiler/nir/nir_inline_functions.c
index c36748d..cf31e14 100644
--- a/src/compiler/nir/nir_inline_functions.c
+++ b/src/compiler/nir/nir_inline_functions.c
@@ -25,6 +25,20 @@
#include "nir_builder.h"
#include "nir_control_flow.h"
+static bool
+deref_apply_constant_initializer(nir_deref_var *deref, void *state)
+{
+ struct nir_builder *b = state;
+
+ nir_load_const_instr *initializer =
+ nir_deref_get_const_initializer_load(b->shader, deref);
+ nir_builder_instr_insert(b, &initializer->instr);
+
+ nir_store_deref_var(b, deref, &initializer->def, 0xf);
+
+ return true;
+}
+
static bool inline_function_impl(nir_function_impl *impl, struct set *inlined);
static void
@@ -174,11 +188,35 @@ inline_functions_block(nir_block *block, nir_builder *b,
/* Add copies of all in parameters */
assert(call->num_params == callee_copy->num_params);
+ b->cursor = nir_before_instr(&call->instr);
+
+ /* Before we insert the copy of the function, we need to lower away
+ * constant initializers on local variables. This is because constant
+ * initializers happen (effectively) at the top of the function and,
+ * since these are about to become locals of the calling function,
+ * initialization will happen at the top of the caller rather than at
+ * the top of the callee. This isn't usually a problem, but if we are
+ * being inlined inside of a loop, it can result in the variable not
+ * getting re-initialized properly for all loop iterations.
+ */
+ nir_foreach_variable(local, &callee_copy->locals) {
+ if (!local->constant_initializer)
+ continue;
+
+ nir_deref_var deref;
+ deref.deref.deref_type = nir_deref_type_var,
+ deref.deref.child = NULL;
+ deref.deref.type = local->type,
+ deref.var = local;
+
+ nir_deref_foreach_leaf(&deref, deref_apply_constant_initializer, b);
+
+ local->constant_initializer = NULL;
+ }
+
exec_list_append(&b->impl->locals, &callee_copy->locals);
exec_list_append(&b->impl->registers, &callee_copy->registers);
- b->cursor = nir_before_instr(&call->instr);
-
/* We now need to tie the two functions together using the
* parameters. There are two ways we do this: One is to turn the
* parameter into a local variable and do a shadow-copy. The other
diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h
index 6f86c9f..9479060 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -41,6 +41,8 @@
#define ARR(...) { __VA_ARGS__ }
+INTRINSIC(nop, 0, ARR(0), false, 0, 0, 0, xx, xx, xx,
+ NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(load_var, 0, ARR(0), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)
@@ -266,16 +268,16 @@ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, xx, xx, xx,
* in shared_atomic_add, etc).
* 2: For CompSwap only: the second data parameter.
*/
-INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 4f72cf7..a84fbdf 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -254,6 +254,9 @@ nir_lower_alu_to_scalar_impl(nir_function_impl *impl)
lower_alu_instr_scalar(nir_instr_as_alu(instr), &builder);
}
}
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
}
void
diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c
index d62cec0..937f8b3 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -471,7 +471,7 @@ lower_copies_to_load_store(struct deref_node *node,
return true;
}
-/* Performs variable renaming by doing a DFS of the dominance tree
+/* Performs variable renaming
*
* This algorithm is very similar to the one outlined in "Efficiently
* Computing Static Single Assignment Form and the Control Dependence
@@ -479,133 +479,132 @@ lower_copies_to_load_store(struct deref_node *node,
* SSA def on the stack per block.
*/
static bool
-rename_variables_block(nir_block *block, struct lower_variables_state *state)
+rename_variables(struct lower_variables_state *state)
{
nir_builder b;
nir_builder_init(&b, state->impl);
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-
- switch (intrin->intrinsic) {
- case nir_intrinsic_load_var: {
- struct deref_node *node =
- get_deref_node(intrin->variables[0], state);
-
- if (node == NULL) {
- /* If we hit this path then we are referencing an invalid
- * value. Most likely, we unrolled something and are
- * reading past the end of some array. In any case, this
- * should result in an undefined value.
- */
- nir_ssa_undef_instr *undef =
- nir_ssa_undef_instr_create(state->shader,
- intrin->num_components,
- intrin->dest.ssa.bit_size);
-
- nir_instr_insert_before(&intrin->instr, &undef->instr);
- nir_instr_remove(&intrin->instr);
-
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_src_for_ssa(&undef->def));
+ nir_foreach_block(block, state->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
continue;
- }
- if (!node->lower_to_ssa)
- continue;
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_var: {
+ struct deref_node *node =
+ get_deref_node(intrin->variables[0], state);
+
+ if (node == NULL) {
+ /* If we hit this path then we are referencing an invalid
+ * value. Most likely, we unrolled something and are
+ * reading past the end of some array. In any case, this
+ * should result in an undefined value.
+ */
+ nir_ssa_undef_instr *undef =
+ nir_ssa_undef_instr_create(state->shader,
+ intrin->num_components,
+ intrin->dest.ssa.bit_size);
+
+ nir_instr_insert_before(&intrin->instr, &undef->instr);
+ nir_instr_remove(&intrin->instr);
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&undef->def));
+ continue;
+ }
- nir_alu_instr *mov = nir_alu_instr_create(state->shader,
- nir_op_imov);
- mov->src[0].src = nir_src_for_ssa(
- nir_phi_builder_value_get_block_def(node->pb_value, block));
- for (unsigned i = intrin->num_components; i < 4; i++)
- mov->src[0].swizzle[i] = 0;
+ if (!node->lower_to_ssa)
+ continue;
- assert(intrin->dest.is_ssa);
+ nir_alu_instr *mov = nir_alu_instr_create(state->shader,
+ nir_op_imov);
+ mov->src[0].src = nir_src_for_ssa(
+ nir_phi_builder_value_get_block_def(node->pb_value, block));
+ for (unsigned i = intrin->num_components; i < 4; i++)
+ mov->src[0].swizzle[i] = 0;
- mov->dest.write_mask = (1 << intrin->num_components) - 1;
- nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
- intrin->num_components,
- intrin->dest.ssa.bit_size, NULL);
+ assert(intrin->dest.is_ssa);
- nir_instr_insert_before(&intrin->instr, &mov->instr);
- nir_instr_remove(&intrin->instr);
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+ intrin->num_components,
+ intrin->dest.ssa.bit_size, NULL);
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- nir_src_for_ssa(&mov->dest.dest.ssa));
- break;
- }
-
- case nir_intrinsic_store_var: {
- struct deref_node *node =
- get_deref_node(intrin->variables[0], state);
-
- if (node == NULL) {
- /* Probably an out-of-bounds array store. That should be a
- * no-op. */
+ nir_instr_insert_before(&intrin->instr, &mov->instr);
nir_instr_remove(&intrin->instr);
- continue;
- }
- if (!node->lower_to_ssa)
- continue;
-
- assert(intrin->num_components ==
- glsl_get_vector_elements(node->type));
-
- assert(intrin->src[0].is_ssa);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&mov->dest.dest.ssa));
+ break;
+ }
- nir_ssa_def *new_def;
- b.cursor = nir_before_instr(&intrin->instr);
+ case nir_intrinsic_store_var: {
+ struct deref_node *node =
+ get_deref_node(intrin->variables[0], state);
- unsigned wrmask = nir_intrinsic_write_mask(intrin);
- if (wrmask == (1 << intrin->num_components) - 1) {
- /* Whole variable store - just copy the source. Note that
- * intrin->num_components and intrin->src[0].ssa->num_components
- * may differ.
- */
- unsigned swiz[4];
- for (unsigned i = 0; i < 4; i++)
- swiz[i] = i < intrin->num_components ? i : 0;
+ if (node == NULL) {
+ /* Probably an out-of-bounds array store. That should be a
+ * no-op. */
+ nir_instr_remove(&intrin->instr);
+ continue;
+ }
- new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
- intrin->num_components, false);
- } else {
- nir_ssa_def *old_def =
- nir_phi_builder_value_get_block_def(node->pb_value, block);
- /* For writemasked store_var intrinsics, we combine the newly
- * written values with the existing contents of unwritten
- * channels, creating a new SSA value for the whole vector.
- */
- nir_ssa_def *srcs[4];
- for (unsigned i = 0; i < intrin->num_components; i++) {
- if (wrmask & (1 << i)) {
- srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
- } else {
- srcs[i] = nir_channel(&b, old_def, i);
+ if (!node->lower_to_ssa)
+ continue;
+
+ assert(intrin->num_components ==
+ glsl_get_vector_elements(node->type));
+
+ assert(intrin->src[0].is_ssa);
+
+ nir_ssa_def *new_def;
+ b.cursor = nir_before_instr(&intrin->instr);
+
+ unsigned wrmask = nir_intrinsic_write_mask(intrin);
+ if (wrmask == (1 << intrin->num_components) - 1) {
+ /* Whole variable store - just copy the source. Note that
+ * intrin->num_components and intrin->src[0].ssa->num_components
+ * may differ.
+ */
+ unsigned swiz[4];
+ for (unsigned i = 0; i < 4; i++)
+ swiz[i] = i < intrin->num_components ? i : 0;
+
+ new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
+ intrin->num_components, false);
+ } else {
+ nir_ssa_def *old_def =
+ nir_phi_builder_value_get_block_def(node->pb_value, block);
+ /* For writemasked store_var intrinsics, we combine the newly
+ * written values with the existing contents of unwritten
+ * channels, creating a new SSA value for the whole vector.
+ */
+ nir_ssa_def *srcs[4];
+ for (unsigned i = 0; i < intrin->num_components; i++) {
+ if (wrmask & (1 << i)) {
+ srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
+ } else {
+ srcs[i] = nir_channel(&b, old_def, i);
+ }
}
+ new_def = nir_vec(&b, srcs, intrin->num_components);
}
- new_def = nir_vec(&b, srcs, intrin->num_components);
- }
- assert(new_def->num_components == intrin->num_components);
+ assert(new_def->num_components == intrin->num_components);
- nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
- nir_instr_remove(&intrin->instr);
- break;
- }
+ nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
+ nir_instr_remove(&intrin->instr);
+ break;
+ }
- default:
- break;
+ default:
+ break;
+ }
}
}
- for (unsigned i = 0; i < block->num_dom_children; ++i)
- rename_variables_block(block->dom_children[i], state);
-
return true;
}
@@ -737,7 +736,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
}
}
- rename_variables_block(nir_start_block(impl), &state);
+ rename_variables(&state);
nir_phi_builder_finish(state.phi_builder);
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 15066c2..7045c95 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -257,7 +257,7 @@ unpack_4x8("unorm")
unpack_2x16("half")
unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
-dst.x = (src0.x & 0xffff) | (src0.y >> 16);
+dst.x = (src0.x & 0xffff) | (src0.y << 16);
""")
unop_horiz("pack_uvec4_to_uint", 1, tuint32, 4, tuint32, """
diff --git a/src/compiler/nir/nir_phi_builder.h b/src/compiler/nir/nir_phi_builder.h
index edc5302..a4dc18a 100644
--- a/src/compiler/nir/nir_phi_builder.h
+++ b/src/compiler/nir/nir_phi_builder.h
@@ -44,7 +44,8 @@
* var.pb_val = nir_phi_builder_add_value(pb, var.defs)
*
* // Visit each block. This needs to visit dominators first;
- * // nir_for_each_block() will be ok.
+ * // nir_foreach_block() will be ok.
+ *
* foreach block:
* foreach instruction:
* foreach use of variable var:
diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c
index f1bbfd5..0763cb8 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1718,8 +1718,8 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
break;
case SpvOpAtomicCompareExchange:
- intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
- intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
+ intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
+ intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
break;
case SpvOpAtomicISub:
@@ -1816,8 +1816,8 @@ fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
break;
case SpvOpAtomicCompareExchange:
- src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
- src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
+ src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
+ src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
break;
/* Fall through */
diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c
index d9096f4..62b9056 100644
--- a/src/compiler/spirv/vtn_cfg.c
+++ b/src/compiler/spirv/vtn_cfg.c
@@ -239,12 +239,12 @@ vtn_get_branch_type(struct vtn_block *block,
swcase->fallthrough == block->switch_case);
swcase->fallthrough = block->switch_case;
return vtn_branch_type_switch_fallthrough;
- } else if (block == switch_break) {
- return vtn_branch_type_switch_break;
} else if (block == loop_break) {
return vtn_branch_type_loop_break;
} else if (block == loop_cont) {
return vtn_branch_type_loop_continue;
+ } else if (block == switch_break) {
+ return vtn_branch_type_switch_break;
} else {
return vtn_branch_type_none;
}
@@ -443,6 +443,19 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list,
vtn_order_case(swtch, case_block->switch_case);
}
+ enum vtn_branch_type branch_type =
+ vtn_get_branch_type(break_block, switch_case, NULL,
+ loop_break, loop_cont);
+
+ if (branch_type != vtn_branch_type_none) {
+ /* It is possible that the break is actually the continue block
+ * for the containing loop. In this case, we need to bail and let
+ * the loop parsing code handle the continue properly.
+ */
+ assert(branch_type == vtn_branch_type_loop_continue);
+ return;
+ }
+
block = break_block;
continue;
}
@@ -518,7 +531,7 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
struct vtn_block *pred =
vtn_value(b, w[i + 1], vtn_value_type_block)->block;
- b->nb.cursor = nir_after_block_before_jump(pred->end_block);
+ b->nb.cursor = nir_after_instr(&pred->end_nop->instr);
vtn_local_store(b, src, nir_deref_var_create(b, phi_var));
}
@@ -576,7 +589,9 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list,
vtn_foreach_instruction(b, block_start, block_end, handler);
- block->end_block = nir_cursor_current_block(b->nb.cursor);
+ block->end_nop = nir_intrinsic_instr_create(b->nb.shader,
+ nir_intrinsic_nop);
+ nir_builder_instr_insert(&b->nb, &block->end_nop->instr);
if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) {
struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]);
diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
index 7f5444e..6f34f09 100644
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -149,8 +149,8 @@ struct vtn_block {
/** Points to the switch case started by this block (if any) */
struct vtn_case *switch_case;
- /** The last block in this SPIR-V block. */
- nir_block *end_block;
+ /** Every block ends in a nop intrinsic so that we can find it again */
+ nir_intrinsic_instr *end_nop;
};
struct vtn_function {
diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c
index fe2494b..459e573 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -889,81 +889,9 @@ vtn_get_builtin_location(struct vtn_builder *b,
}
static void
-var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
- const struct vtn_decoration *dec, void *void_var)
+apply_var_decoration(struct vtn_builder *b, nir_variable *nir_var,
+ const struct vtn_decoration *dec)
{
- struct vtn_variable *vtn_var = void_var;
-
- /* Handle decorations that apply to a vtn_variable as a whole */
- switch (dec->decoration) {
- case SpvDecorationBinding:
- vtn_var->binding = dec->literals[0];
- return;
- case SpvDecorationDescriptorSet:
- vtn_var->descriptor_set = dec->literals[0];
- return;
- default:
- break;
- }
-
- /* Now we handle decorations that apply to a particular nir_variable */
- nir_variable *nir_var = vtn_var->var;
- if (val->value_type == vtn_value_type_access_chain) {
- assert(val->access_chain->length == 0);
- assert(val->access_chain->var == void_var);
- assert(member == -1);
- } else {
- assert(val->value_type == vtn_value_type_type);
- if (member != -1)
- nir_var = vtn_var->members[member];
- }
-
- /* Location is odd in that it can apply in three different cases: To a
- * non-split variable, to a whole split variable, or to one structure
- * member of a split variable.
- */
- if (dec->decoration == SpvDecorationLocation) {
- unsigned location = dec->literals[0];
- bool is_vertex_input;
- if (b->shader->stage == MESA_SHADER_FRAGMENT &&
- vtn_var->mode == vtn_variable_mode_output) {
- is_vertex_input = false;
- location += FRAG_RESULT_DATA0;
- } else if (b->shader->stage == MESA_SHADER_VERTEX &&
- vtn_var->mode == vtn_variable_mode_input) {
- is_vertex_input = true;
- location += VERT_ATTRIB_GENERIC0;
- } else if (vtn_var->mode == vtn_variable_mode_input ||
- vtn_var->mode == vtn_variable_mode_output) {
- is_vertex_input = false;
- location += VARYING_SLOT_VAR0;
- } else {
- assert(!"Location must be on input or output variable");
- }
-
- if (nir_var) {
- /* This handles the member and lone variable cases */
- nir_var->data.location = location;
- nir_var->data.explicit_location = true;
- } else {
- /* This handles the structure member case */
- assert(vtn_var->members);
- unsigned length =
- glsl_get_length(glsl_without_array(vtn_var->type->type));
- for (unsigned i = 0; i < length; i++) {
- vtn_var->members[i]->data.location = location;
- vtn_var->members[i]->data.explicit_location = true;
- location +=
- glsl_count_attribute_slots(vtn_var->members[i]->interface_type,
- is_vertex_input);
- }
- }
- return;
- }
-
- if (nir_var == NULL)
- return;
-
switch (dec->decoration) {
case SpvDecorationRelaxedPrecision:
break; /* FIXME: Do nothing with this for now. */
@@ -1080,6 +1008,99 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
}
}
+static void
+var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
+ const struct vtn_decoration *dec, void *void_var)
+{
+ struct vtn_variable *vtn_var = void_var;
+
+ /* Handle decorations that apply to a vtn_variable as a whole */
+ switch (dec->decoration) {
+ case SpvDecorationBinding:
+ vtn_var->binding = dec->literals[0];
+ return;
+ case SpvDecorationDescriptorSet:
+ vtn_var->descriptor_set = dec->literals[0];
+ return;
+ default:
+ break;
+ }
+
+ if (val->value_type == vtn_value_type_access_chain) {
+ assert(val->access_chain->length == 0);
+ assert(val->access_chain->var == void_var);
+ assert(member == -1);
+ } else {
+ assert(val->value_type == vtn_value_type_type);
+ }
+
+ /* Location is odd. If applied to a split structure, we have to walk the
+ * whole thing and accumulate the location. It's easier to handle as a
+ * special case.
+ */
+ if (dec->decoration == SpvDecorationLocation) {
+ unsigned location = dec->literals[0];
+ bool is_vertex_input;
+ if (b->shader->stage == MESA_SHADER_FRAGMENT &&
+ vtn_var->mode == vtn_variable_mode_output) {
+ is_vertex_input = false;
+ location += FRAG_RESULT_DATA0;
+ } else if (b->shader->stage == MESA_SHADER_VERTEX &&
+ vtn_var->mode == vtn_variable_mode_input) {
+ is_vertex_input = true;
+ location += VERT_ATTRIB_GENERIC0;
+ } else if (vtn_var->mode == vtn_variable_mode_input ||
+ vtn_var->mode == vtn_variable_mode_output) {
+ is_vertex_input = false;
+ location += VARYING_SLOT_VAR0;
+ } else {
+ assert(!"Location must be on input or output variable");
+ }
+
+ if (vtn_var->var) {
+ /* This handles the member and lone variable cases */
+ vtn_var->var->data.location = location;
+ vtn_var->var->data.explicit_location = true;
+ } else {
+ /* This handles the structure member case */
+ assert(vtn_var->members);
+ unsigned length =
+ glsl_get_length(glsl_without_array(vtn_var->type->type));
+ for (unsigned i = 0; i < length; i++) {
+ vtn_var->members[i]->data.location = location;
+ vtn_var->members[i]->data.explicit_location = true;
+ location +=
+ glsl_count_attribute_slots(vtn_var->members[i]->interface_type,
+ is_vertex_input);
+ }
+ }
+ return;
+ } else {
+ if (vtn_var->var) {
+ assert(member <= 0);
+ apply_var_decoration(b, vtn_var->var, dec);
+ } else if (vtn_var->members) {
+ if (member >= 0) {
+ assert(vtn_var->members);
+ apply_var_decoration(b, vtn_var->members[member], dec);
+ } else {
+ unsigned length =
+ glsl_get_length(glsl_without_array(vtn_var->type->type));
+ for (unsigned i = 0; i < length; i++)
+ apply_var_decoration(b, vtn_var->members[i], dec);
+ }
+ } else {
+ /* A few variables, those with external storage, have no actual
+ * nir_variables associated with them. Fortunately, all decorations
+ * we care about for those variables are on the type only.
+ */
+ assert(vtn_var->mode == vtn_variable_mode_ubo ||
+ vtn_var->mode == vtn_variable_mode_ssbo ||
+ vtn_var->mode == vtn_variable_mode_push_constant);
+ }
+ }
+}
+
/* Tries to compute the size of an interface block based on the strides and
* offsets that are provided to us in the SPIR-V source.
*/
@@ -1173,7 +1194,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
case SpvStorageClassPushConstant:
var->mode = vtn_variable_mode_push_constant;
assert(b->shader->num_uniforms == 0);
- b->shader->num_uniforms = vtn_type_block_size(var->type) * 4;
+ b->shader->num_uniforms = vtn_type_block_size(var->type);
break;
case SpvStorageClassInput:
var->mode = vtn_variable_mode_input;
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index fe33ecd..c6d4fb5 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -242,6 +242,15 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
return NULL;
break;
+ case __DRI_ATTRIB_MAX_PBUFFER_WIDTH:
+ _eglSetConfigKey(&base, EGL_MAX_PBUFFER_WIDTH,
+ _EGL_MAX_PBUFFER_WIDTH);
+ break;
+ case __DRI_ATTRIB_MAX_PBUFFER_HEIGHT:
+ _eglSetConfigKey(&base, EGL_MAX_PBUFFER_HEIGHT,
+ _EGL_MAX_PBUFFER_HEIGHT);
+ break;
+
default:
key = dri2_to_egl_attribute_map[attrib];
if (key != 0)
@@ -320,6 +329,15 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
surface_type &= ~EGL_PIXMAP_BIT;
}
+ /* No support for pbuffer + MSAA for now.
+ *
+ * XXX TODO: pbuffer + MSAA does not work and causes crashes.
+ * See QT bugreport: https://bugreports.qt.io/browse/QTBUG-47509
+ */
+ if (base.Samples) {
+ surface_type &= ~EGL_PBUFFER_BIT;
+ }
+
conf->base.SurfaceType |= surface_type;
return conf;
@@ -758,64 +776,99 @@ dri2_create_screen(_EGLDisplay *disp)
/**
* Called via eglInitialize(), GLX_drv->API.Initialize().
+ *
+ * This must be guaranteed to be called exactly once, even if eglInitialize is
+ * called many times (without a eglTerminate in between).
*/
static EGLBoolean
dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp)
{
+ EGLBoolean ret = EGL_FALSE;
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+
+ /* In the case where the application calls eglMakeCurrent(context1),
+ * eglTerminate, then eglInitialize again (without a call to eglReleaseThread
+ * or eglMakeCurrent(NULL) before that), dri2_dpy structure is still
+ * initialized, as we need it to be able to free context1 correctly.
+ *
+ * It would probably be safest to forcibly release the display with
+ * dri2_display_release, to make sure the display is reinitialized correctly.
+ * However, the EGL spec states that we need to keep a reference to the
+ * current context (so we cannot call dri2_make_current(NULL)), and therefore
+ * we would leak context1 as we would be missing the old display connection
+ * to free it up correctly.
+ */
+ if (dri2_dpy) {
+ dri2_dpy->ref_count++;
+ return EGL_TRUE;
+ }
+
/* not until swrast_dri is supported */
if (disp->Options.UseFallback)
return EGL_FALSE;
+ /* Nothing to initialize for a test only display */
+ if (disp->Options.TestOnly)
+ return EGL_TRUE;
+
switch (disp->Platform) {
#ifdef HAVE_SURFACELESS_PLATFORM
case _EGL_PLATFORM_SURFACELESS:
- if (disp->Options.TestOnly)
- return EGL_TRUE;
- return dri2_initialize_surfaceless(drv, disp);
+ ret = dri2_initialize_surfaceless(drv, disp);
+ break;
#endif
-
#ifdef HAVE_X11_PLATFORM
case _EGL_PLATFORM_X11:
- if (disp->Options.TestOnly)
- return EGL_TRUE;
- return dri2_initialize_x11(drv, disp);
+ ret = dri2_initialize_x11(drv, disp);
+ break;
#endif
-
#ifdef HAVE_DRM_PLATFORM
case _EGL_PLATFORM_DRM:
- if (disp->Options.TestOnly)
- return EGL_TRUE;
- return dri2_initialize_drm(drv, disp);
+ ret = dri2_initialize_drm(drv, disp);
+ break;
#endif
#ifdef HAVE_WAYLAND_PLATFORM
case _EGL_PLATFORM_WAYLAND:
- if (disp->Options.TestOnly)
- return EGL_TRUE;
- return dri2_initialize_wayland(drv, disp);
+ ret = dri2_initialize_wayland(drv, disp);
+ break;
#endif
#ifdef HAVE_ANDROID_PLATFORM
case _EGL_PLATFORM_ANDROID:
- if (disp->Options.TestOnly)
- return EGL_TRUE;
- return dri2_initialize_android(drv, disp);
+ ret = dri2_initialize_android(drv, disp);
+ break;
#endif
-
default:
_eglLog(_EGL_WARNING, "No EGL platform enabled.");
return EGL_FALSE;
}
+
+ if (ret) {
+ dri2_dpy = dri2_egl_display(disp);
+
+ if (!dri2_dpy) {
+ return EGL_FALSE;
+ }
+
+ dri2_dpy->ref_count++;
+ }
+
+ return ret;
}
/**
- * Called via eglTerminate(), drv->API.Terminate().
+ * Decrement display reference count, and free up display if necessary.
*/
-static EGLBoolean
-dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
-{
+static void
+dri2_display_release(_EGLDisplay *disp) {
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
unsigned i;
- _eglReleaseDisplayResources(drv, disp);
+ assert(dri2_dpy->ref_count > 0);
+ dri2_dpy->ref_count--;
+
+ if (dri2_dpy->ref_count > 0)
+ return;
+
_eglCleanupDisplay(disp);
if (dri2_dpy->own_dri_screen)
@@ -870,6 +923,21 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
}
free(dri2_dpy);
disp->DriverData = NULL;
+}
+
+/**
+ * Called via eglTerminate(), drv->API.Terminate().
+ *
+ * This must be guaranteed to be called exactly once, even if eglTerminate is
+ * called many times (without a eglInitialize in between).
+ */
+static EGLBoolean
+dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
+{
+ /* Release all non-current Context/Surfaces. */
+ _eglReleaseDisplayResources(drv, disp);
+
+ dri2_display_release(disp);
return EGL_TRUE;
}
@@ -1189,10 +1257,16 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
_EGLSurface *tmp_dsurf, *tmp_rsurf;
__DRIdrawable *ddraw, *rdraw;
__DRIcontext *cctx;
+ EGLBoolean unbind;
+
+ if (!dri2_dpy)
+ return _eglError(EGL_NOT_INITIALIZED, "eglMakeCurrent");
/* make new bindings */
- if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf))
+ if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf)) {
+ /* _eglBindContext already sets the EGL error (in _eglCheckMakeCurrent) */
return EGL_FALSE;
+ }
/* flush before context switch */
if (old_ctx && dri2_drv->glFlush)
@@ -1207,14 +1281,21 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
dri2_dpy->core->unbindContext(old_cctx);
}
- if ((cctx == NULL && ddraw == NULL && rdraw == NULL) ||
- dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) {
+ unbind = (cctx == NULL && ddraw == NULL && rdraw == NULL);
+
+ if (unbind || dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) {
if (old_dsurf)
drv->API.DestroySurface(drv, disp, old_dsurf);
if (old_rsurf)
drv->API.DestroySurface(drv, disp, old_rsurf);
- if (old_ctx)
+
+ if (!unbind)
+ dri2_dpy->ref_count++;
+ if (old_ctx) {
+ EGLDisplay old_disp = _eglGetDisplayHandle(old_ctx->Resource.Display);
drv->API.DestroyContext(drv, disp, old_ctx);
+ dri2_display_release(old_disp);
+ }
return EGL_TRUE;
} else {
@@ -1232,7 +1313,11 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
_eglPutSurface(old_rsurf);
_eglPutContext(old_ctx);
- return EGL_FALSE;
+ /* dri2_dpy->core->bindContext failed. We cannot tell for sure why, but
+ * setting the error to EGL_BAD_MATCH is surely better than leaving it
+ * as EGL_SUCCESS.
+ */
+ return _eglError(EGL_BAD_MATCH, "eglMakeCurrent");
}
}
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index 925294b..6099bc2 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -80,8 +80,6 @@
#include "eglimage.h"
#include "eglsync.h"
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
-
struct wl_buffer;
struct dri2_egl_driver
@@ -177,6 +175,10 @@ struct dri2_egl_display
const __DRI2interopExtension *interop;
int fd;
+ /* dri2_initialize/dri2_terminate increment/decrement this count, so does
+ * dri2_make_current (tracks if there are active contexts/surfaces). */
+ int ref_count;
+
int own_device;
int swap_available;
int invalidate_available;
diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c
index 87bd19b..351fd0f 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -29,7 +29,7 @@
#include <errno.h>
#include <dlfcn.h>
-
+#include <fcntl.h>
#if 0
#include <xf86drm.h>
#endif
@@ -170,6 +170,8 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf)
static EGLBoolean
droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf)
{
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+
/* To avoid blocking other EGL calls, release the display mutex before
* we enter droid_window_enqueue_buffer() and re-acquire the mutex upon
* return.
@@ -200,6 +202,12 @@ droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_sur
dri2_surf->buffer = NULL;
mtx_lock(&disp->Mutex);
+
+ if (dri2_surf->dri_image) {
+ dri2_dpy->image->destroyImage(dri2_surf->dri_image);
+ dri2_surf->dri_image = NULL;
+ }
+
return EGL_TRUE;
}
@@ -291,6 +299,8 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
dri2_surf->base.GLColorspace);
+ if (!config)
+ goto cleanup_surface;
if (dri2_dpy->dri2) {
dri2_surf->dri_drawable =
@@ -384,6 +394,9 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
int fourcc, pitch;
int offset = 0, fd;
+ if (dri2_surf->dri_image)
+ return 0;
+
if (!dri2_surf->buffer)
return -1;
@@ -442,10 +455,8 @@ droid_image_get_buffers(__DRIdrawable *driDrawable,
static EGLBoolean
droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
{
- struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
- _EGLContext *ctx;
if (dri2_surf->base.Type != EGL_WINDOW_BIT)
return EGL_TRUE;
@@ -986,7 +997,7 @@ droid_open_device(void)
fd = -1;
}
- return (fd >= 0) ? dup(fd) : -1;
+ return (fd >= 0) ? fcntl(fd, F_DUPFD_CLOEXEC, 3) : -1;
}
/* support versions < JellyBean */
@@ -1134,6 +1145,7 @@ cleanup_device:
close(dri2_dpy->fd);
cleanup_display:
free(dri2_dpy);
+ dpy->DriverData = NULL;
return _eglError(EGL_NOT_INITIALIZED, err);
}
diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c
index 9373496..1ce282f 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -726,5 +726,6 @@ cleanup:
close(fd);
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
diff --git a/src/egl/drivers/dri2/platform_surfaceless.c b/src/egl/drivers/dri2/platform_surfaceless.c
index e0ddc12..323a8d7 100644
--- a/src/egl/drivers/dri2/platform_surfaceless.c
+++ b/src/egl/drivers/dri2/platform_surfaceless.c
@@ -157,6 +157,7 @@ cleanup_driver:
close(dri2_dpy->fd);
cleanup_display:
free(dri2_dpy);
+ disp->DriverData = NULL;
return _eglError(EGL_NOT_INITIALIZED, err);
}
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index ff0d5c8..1a295d5 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -118,6 +118,13 @@ resize_callback(struct wl_egl_window *wl_win, void *data)
(*dri2_dpy->flush->invalidate)(dri2_surf->dri_drawable);
}
+static void
+destroy_window_callback(void *data)
+{
+ struct dri2_egl_surface *dri2_surf = data;
+ dri2_surf->wl_win = NULL;
+}
+
/**
* Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface().
*/
@@ -159,6 +166,7 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,
dri2_surf->wl_win->private = dri2_surf;
dri2_surf->wl_win->resize_callback = resize_callback;
+ dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
dri2_surf->base.Width = -1;
dri2_surf->base.Height = -1;
@@ -257,8 +265,11 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
if (dri2_surf->throttle_callback)
wl_callback_destroy(dri2_surf->throttle_callback);
- dri2_surf->wl_win->private = NULL;
- dri2_surf->wl_win->resize_callback = NULL;
+ if (dri2_surf->wl_win) {
+ dri2_surf->wl_win->private = NULL;
+ dri2_surf->wl_win->resize_callback = NULL;
+ dri2_surf->wl_win->destroy_window_callback = NULL;
+ }
free(surf);
@@ -1238,6 +1249,7 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp)
wl_event_queue_destroy(dri2_dpy->wl_queue);
cleanup_dpy:
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
@@ -1883,6 +1895,7 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp)
wl_event_queue_destroy(dri2_dpy->wl_queue);
cleanup_dpy:
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index c0a4005..792cabe 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1231,6 +1231,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
xcb_disconnect(dri2_dpy->conn);
cleanup_dpy:
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
@@ -1302,15 +1303,13 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
dri2_dpy->screen = DefaultScreen(dpy);
}
- if (xcb_connection_has_error(dri2_dpy->conn)) {
+ if (!dri2_dpy->conn || xcb_connection_has_error(dri2_dpy->conn)) {
_eglLog(_EGL_WARNING, "DRI3: xcb_connect failed");
goto cleanup_dpy;
}
- if (dri2_dpy->conn) {
- if (!dri3_x11_connect(dri2_dpy))
- goto cleanup_conn;
- }
+ if (!dri3_x11_connect(dri2_dpy))
+ goto cleanup_conn;
if (!dri2_load_driver_dri3(disp))
goto cleanup_conn;
@@ -1338,10 +1337,8 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
disp->Extensions.WL_bind_wayland_display = EGL_TRUE;
#endif
- if (dri2_dpy->conn) {
- if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false))
- goto cleanup_configs;
- }
+ if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false))
+ goto cleanup_configs;
dri2_dpy->loader_dri3_ext.core = dri2_dpy->core;
dri2_dpy->loader_dri3_ext.image_driver = dri2_dpy->image_driver;
@@ -1370,6 +1367,7 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
xcb_disconnect(dri2_dpy->conn);
cleanup_dpy:
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
@@ -1467,6 +1465,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
xcb_disconnect(dri2_dpy->conn);
cleanup_dpy:
free(dri2_dpy);
+ disp->DriverData = NULL;
return EGL_FALSE;
}
diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c b/src/egl/drivers/dri2/platform_x11_dri3.c
index 9363a8a..69bfcd8 100644
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -103,6 +103,17 @@ egl_dri3_get_dri_context(struct loader_dri3_drawable *draw)
return dri2_ctx->dri_context;
}
+static __DRIscreen *
+egl_dri3_get_dri_screen(struct loader_dri3_drawable *draw)
+{
+ _EGLContext *ctx = _eglGetCurrentContext();
+ struct dri2_egl_context *dri2_ctx;
+ if (!ctx)
+ return NULL;
+ dri2_ctx = dri2_egl_context(ctx);
+ return dri2_egl_display(dri2_ctx->base.Resource.Display)->dri_screen;
+}
+
static void
egl_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
{
@@ -119,6 +130,7 @@ static struct loader_dri3_vtable egl_dri3_vtable = {
.set_drawable_size = egl_dri3_set_drawable_size,
.in_current_context = egl_dri3_in_current_context,
.get_dri_context = egl_dri3_get_dri_context,
+ .get_dri_screen = egl_dri3_get_dri_screen,
.flush_drawable = egl_dri3_flush_drawable,
.show_fps = NULL,
};
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 4700dbe..127ca1e 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -627,7 +627,9 @@ eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_list,
_EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv);
- if (!config && !disp->Extensions.MESA_configless_context)
+ if (config)
+ _EGL_CHECK_CONFIG(disp, conf, EGL_NO_CONTEXT, drv);
+ else if (!disp->Extensions.MESA_configless_context)
RETURN_EGL_ERROR(disp, EGL_BAD_CONFIG, EGL_NO_CONTEXT);
if (!share && share_list != EGL_NO_CONTEXT)
@@ -1937,7 +1939,7 @@ _eglLockDisplayInterop(EGLDisplay dpy, EGLContext context,
return MESA_GLINTEROP_SUCCESS;
}
-int
+PUBLIC int
MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
struct mesa_glinterop_device_info *out)
{
@@ -1959,7 +1961,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
return ret;
}
-int
+PUBLIC int
MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context,
struct mesa_glinterop_export_in *in,
struct mesa_glinterop_export_out *out)
diff --git a/src/egl/main/egldefines.h b/src/egl/main/egldefines.h
index 13a7563..6090fc3 100644
--- a/src/egl/main/egldefines.h
+++ b/src/egl/main/egldefines.h
@@ -34,6 +34,8 @@
#ifndef EGLDEFINES_INCLUDED
#define EGLDEFINES_INCLUDED
+#include "util/macros.h"
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -48,7 +50,6 @@ extern "C" {
#define _EGL_VENDOR_STRING "Mesa Project"
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
#define MIN2(A, B) (((A) < (B)) ? (A) : (B))
#ifdef __cplusplus
diff --git a/src/egl/main/eglglobals.c b/src/egl/main/eglglobals.c
index 938d953..1be6797 100644
--- a/src/egl/main/eglglobals.c
+++ b/src/egl/main/eglglobals.c
@@ -53,10 +53,16 @@ struct _egl_global _eglGlobal =
/* ClientExtensionsString */
"EGL_EXT_client_extensions"
" EGL_EXT_platform_base"
+#ifdef HAVE_WAYLAND_PLATFORM
" EGL_EXT_platform_wayland"
+#endif
+#ifdef HAVE_X11_PLATFORM
" EGL_EXT_platform_x11"
- " EGL_KHR_client_get_all_proc_addresses"
+#endif
+#ifdef HAVE_DRM_PLATFORM
" EGL_MESA_platform_gbm"
+#endif
+ " EGL_KHR_client_get_all_proc_addresses"
};
diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c
index 17d7907..e8ee49c 100644
--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -262,9 +262,13 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
{
const char *func;
EGLint renderBuffer = EGL_BACK_BUFFER;
- EGLint swapBehavior = EGL_BUFFER_PRESERVED;
+ EGLint swapBehavior = EGL_BUFFER_DESTROYED;
EGLint err;
+ /* Swap behavior can be preserved only if config supports this. */
+ if (conf->SurfaceType & EGL_SWAP_BEHAVIOR_PRESERVED_BIT)
+ swapBehavior = EGL_BUFFER_PRESERVED;
+
switch (type) {
case EGL_WINDOW_BIT:
func = "eglCreateWindowSurface";
diff --git a/src/egl/main/eglsync.c b/src/egl/main/eglsync.c
index 33625e9..f325031 100644
--- a/src/egl/main/eglsync.c
+++ b/src/egl/main/eglsync.c
@@ -26,6 +26,7 @@
**************************************************************************/
+#include <inttypes.h>
#include <string.h>
#include "eglsync.h"
@@ -75,8 +76,8 @@ _eglParseSyncAttribList64(_EGLSync *sync, const EGLAttrib *attrib_list)
return EGL_SUCCESS;
for (i = 0; attrib_list[i] != EGL_NONE; i++) {
- EGLint attr = attrib_list[i++];
- EGLint val = attrib_list[i];
+ EGLAttrib attr = attrib_list[i++];
+ EGLAttrib val = attrib_list[i];
switch (attr) {
case EGL_CL_EVENT_HANDLE_KHR:
@@ -92,7 +93,7 @@ _eglParseSyncAttribList64(_EGLSync *sync, const EGLAttrib *attrib_list)
}
if (err != EGL_SUCCESS) {
- _eglLog(_EGL_DEBUG, "bad sync attribute 0x%04x", attr);
+ _eglLog(_EGL_DEBUG, "bad sync attribute 0x%" PRIxPTR, attr);
break;
}
}
diff --git a/src/egl/wayland/wayland-egl/wayland-egl-priv.h b/src/egl/wayland/wayland-egl/wayland-egl-priv.h
index f1e3ba2..c91f9cd 100644
--- a/src/egl/wayland/wayland-egl/wayland-egl-priv.h
+++ b/src/egl/wayland/wayland-egl/wayland-egl-priv.h
@@ -27,6 +27,7 @@ struct wl_egl_window {
void *private;
void (*resize_callback)(struct wl_egl_window *, void *);
+ void (*destroy_window_callback)(void *);
};
#ifdef __cplusplus
diff --git a/src/egl/wayland/wayland-egl/wayland-egl.c b/src/egl/wayland/wayland-egl/wayland-egl.c
index 80a5be5..4a4701a 100644
--- a/src/egl/wayland/wayland-egl/wayland-egl.c
+++ b/src/egl/wayland/wayland-egl/wayland-egl.c
@@ -66,6 +66,7 @@ wl_egl_window_create(struct wl_surface *surface,
egl_window->surface = surface;
egl_window->private = NULL;
egl_window->resize_callback = NULL;
+ egl_window->destroy_window_callback = NULL;
wl_egl_window_resize(egl_window, width, height, 0, 0);
egl_window->attached_width = 0;
egl_window->attached_height = 0;
@@ -76,6 +77,8 @@ wl_egl_window_create(struct wl_surface *surface,
WL_EGL_EXPORT void
wl_egl_window_destroy(struct wl_egl_window *egl_window)
{
+ if (egl_window->destroy_window_callback)
+ egl_window->destroy_window_callback(egl_window->private);
free(egl_window);
}
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 6077976..013bc88 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -310,7 +310,8 @@ C_SOURCES := \
util/u_upload_mgr.h \
util/u_vbuf.c \
util/u_vbuf.h \
- util/u_video.h
+ util/u_video.h \
+ util/u_viewport.h
NIR_SOURCES := \
nir/tgsi_to_nir.c \
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index c8e1f13..0fbc78e 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -45,6 +45,7 @@ struct pipe_loader_sw_device {
struct util_dl_library *lib;
#endif
struct sw_winsys *ws;
+ int fd;
};
#define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev)
@@ -92,6 +93,7 @@ pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
sdev->base.driver_name = "swrast";
sdev->base.ops = &pipe_loader_sw_ops;
+ sdev->fd = -1;
#ifdef GALLIUM_STATIC_TARGETS
sdev->dd = &driver_descriptors;
@@ -169,6 +171,8 @@ pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd)
if (!pipe_loader_sw_probe_init_common(sdev))
goto fail;
+ sdev->fd = fd;
+
for (i = 0; sdev->dd->winsys[i].name; i++) {
if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) {
sdev->ws = sdev->dd->winsys[i].create_winsys(fd);
@@ -273,6 +277,11 @@ pipe_loader_sw_release(struct pipe_loader_device **dev)
util_dl_close(sdev->lib);
#endif
+#ifdef HAVE_PIPE_LOADER_KMS
+ if (sdev->fd != -1)
+ close(sdev->fd);
+#endif
+
FREE(sdev);
*dev = NULL;
}
diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h
index 00f231d..55da21f 100644
--- a/src/gallium/auxiliary/util/u_box.h
+++ b/src/gallium/auxiliary/util/u_box.h
@@ -140,11 +140,15 @@ static inline void
u_box_union_2d(struct pipe_box *dst,
const struct pipe_box *a, const struct pipe_box *b)
{
- dst->x = MIN2(a->x, b->x);
- dst->y = MIN2(a->y, b->y);
+ int x, y;
- dst->width = MAX2(a->x + a->width, b->x + b->width) - dst->x;
- dst->height = MAX2(a->y + a->height, b->y + b->height) - dst->y;
+ x = MIN2(a->x, b->x);
+ y = MIN2(a->y, b->y);
+
+ dst->width = MAX2(a->x + a->width, b->x + b->width) - x;
+ dst->height = MAX2(a->y + a->height, b->y + b->height) - y;
+ dst->x = x;
+ dst->y = y;
}
/* Aliasing of @dst permitted. */
@@ -152,13 +156,18 @@ static inline void
u_box_union_3d(struct pipe_box *dst,
const struct pipe_box *a, const struct pipe_box *b)
{
- dst->x = MIN2(a->x, b->x);
- dst->y = MIN2(a->y, b->y);
- dst->z = MIN2(a->z, b->z);
-
- dst->width = MAX2(a->x + a->width, b->x + b->width) - dst->x;
- dst->height = MAX2(a->y + a->height, b->y + b->height) - dst->y;
- dst->depth = MAX2(a->z + a->depth, b->z + b->depth) - dst->z;
+ int x, y, z;
+
+ x = MIN2(a->x, b->x);
+ y = MIN2(a->y, b->y);
+ z = MIN2(a->z, b->z);
+
+ dst->width = MAX2(a->x + a->width, b->x + b->width) - x;
+ dst->height = MAX2(a->y + a->height, b->y + b->height) - y;
+ dst->depth = MAX2(a->z + a->depth, b->z + b->depth) - z;
+ dst->x = x;
+ dst->y = y;
+ dst->z = z;
}
static inline boolean
diff --git a/src/gallium/auxiliary/util/u_format_r11g11b10f.h b/src/gallium/auxiliary/util/u_format_r11g11b10f.h
index 218822b..074783a 100644
--- a/src/gallium/auxiliary/util/u_format_r11g11b10f.h
+++ b/src/gallium/auxiliary/util/u_format_r11g11b10f.h
@@ -194,7 +194,7 @@ static inline float uf10_to_f32(uint16_t val)
if (exponent == 0) {
if (mantissa != 0) {
- const float scale = 1.0 / (1 << 20);
+ const float scale = 1.0 / (1 << 19);
f32.f = scale * mantissa;
}
}
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 8916a96..55343e8 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -831,7 +831,7 @@ align(int value, int alignment)
static inline uint64_t
align64(uint64_t value, unsigned alignment)
{
- return (value + alignment - 1) & ~(alignment - 1);
+ return (value + alignment - 1) & ~((uint64_t)alignment - 1);
}
/**
diff --git a/src/gallium/auxiliary/util/u_viewport.h b/src/gallium/auxiliary/util/u_viewport.h
new file mode 100644
index 0000000..a731b34
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_viewport.h
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright 2016 Ilia Mirkin.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_VIEWPORT_H
+#define U_VIEWPORT_H
+
+#include "c99_compat.h"
+#include "pipe/p_state.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+util_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
+ float *zmin, float *zmax)
+{
+ float a, b;
+ if (halfz) {
+ a = vp->translate[2];
+ b = vp->translate[2] + vp->scale[2];
+ } else {
+ a = vp->translate[2] - vp->scale[2];
+ b = vp->translate[2] + vp->scale[2];
+ }
+
+ *zmin = a < b ? a : b;
+ *zmax = a < b ? b : a;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
index f7f572e..493e645 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
@@ -89,6 +89,7 @@ dri3_free_front_buffer(struct vl_dri3_screen *scrn,
{
xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence);
xshmfence_unmap_shm(buffer->shm_fence);
+ pipe_resource_reference(&buffer->texture, NULL);
FREE(buffer);
}
diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c b/src/gallium/auxiliary/vl/vl_winsys_drm.c
index 6a759ae..df8809c 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_drm.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c
@@ -26,6 +26,7 @@
**************************************************************************/
#include <assert.h>
+#include <fcntl.h>
#include "pipe/p_screen.h"
#include "pipe-loader/pipe_loader.h"
@@ -47,7 +48,7 @@ vl_drm_screen_create(int fd)
if (!vscreen)
return NULL;
- if (fd < 0 || (new_fd = dup(fd)) < 0)
+ if (fd < 0 || (new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3)) < 0)
goto free_screen;
if (pipe_loader_drm_probe_fd(&vscreen->dev, new_fd))
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index dcb6dfb..bf787d1 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -1472,7 +1472,7 @@ static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
{
return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
}
-#define A3XX_RB_DEPTH_CONTROL_BF_ENABLE 0x00000080
+#define A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE 0x00000080
#define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000
#define REG_A3XX_RB_DEPTH_CLEAR 0x00002101
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index 6723941..51fe4bc 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -158,6 +158,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
+ if (fd3_needs_manual_clipping(ctx->prog.vp, ctx->rasterizer))
+ emit.key.ucp_enables = ctx->rasterizer->clip_plane_enable;
+
fixup_shader_state(ctx, &emit.key);
unsigned dirty = ctx->dirty;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 4a5242a..45bab58 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -31,6 +31,7 @@
#include "util/u_memory.h"
#include "util/u_helpers.h"
#include "util/u_format.h"
+#include "util/u_viewport.h"
#include "freedreno_resource.h"
#include "freedreno_query_hw.h"
@@ -529,7 +530,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
}
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control;
if (fp->writes_pos) {
val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
@@ -538,6 +539,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (fp->has_kill) {
val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
+ if (!ctx->rasterizer->depth_clip) {
+ val |= A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE;
+ }
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, val);
}
@@ -561,20 +565,24 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
->gras_cl_clip_cntl;
+ uint8_t planes = ctx->rasterizer->clip_plane_enable;
val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
- /* TODO only use if prog doesn't use clipvertex/clipdist */
- val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
- MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6));
+ if (!emit->key.ucp_enables)
+ val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
+ MIN2(util_bitcount(planes), 6));
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, val);
}
- if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
+ if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_UCP)) {
uint32_t planes = ctx->rasterizer->clip_plane_enable;
int count = 0;
+ if (emit->key.ucp_enables)
+ planes = 0;
+
while (planes && count < 6) {
int i = ffs(planes) - 1;
@@ -615,19 +623,35 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, val);
}
- if (dirty & FD_DIRTY_SCISSOR) {
+ if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER | FD_DIRTY_VIEWPORT)) {
struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+ int minx = scissor->minx;
+ int miny = scissor->miny;
+ int maxx = scissor->maxx;
+ int maxy = scissor->maxy;
+
+ /* Unfortunately there is no separate depth clip disable, only an all
+ * or nothing deal. So when we disable clipping, we must handle the
+ * viewport clip via scissors.
+ */
+ if (!ctx->rasterizer->depth_clip) {
+ struct pipe_viewport_state *vp = &ctx->viewport;
+ minx = MAX2(minx, (int)floorf(vp->translate[0] - fabsf(vp->scale[0])));
+ miny = MAX2(miny, (int)floorf(vp->translate[1] - fabsf(vp->scale[1])));
+ maxx = MIN2(maxx, (int)ceilf(vp->translate[0] + fabsf(vp->scale[0])));
+ maxy = MIN2(maxy, (int)ceilf(vp->translate[1] + fabsf(vp->scale[1])));
+ }
OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
- OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
- A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
- OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
- A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
-
- ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
- ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
- ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
- ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(minx) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(miny));
+ OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(maxx - 1) |
+ A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(maxy - 1));
+
+ ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, minx);
+ ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, miny);
+ ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, maxx);
+ ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, maxy);
}
if (dirty & FD_DIRTY_VIEWPORT) {
@@ -641,6 +665,30 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
}
+ if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+ float zmin, zmax;
+ int depth = 24;
+ if (ctx->framebuffer.zsbuf) {
+ depth = util_format_get_component_bits(
+ pipe_surface_format(ctx->framebuffer.zsbuf),
+ UTIL_FORMAT_COLORSPACE_ZS, 0);
+ }
+ util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
+ &zmin, &zmax);
+
+ OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2);
+ if (depth == 32) {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffffffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffffffff));
+ } else if (depth == 16) {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffff));
+ } else {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
+ }
+ }
+
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) {
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
int nr_cbufs = pfb->nr_cbufs;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 8152f8f..e9059ce 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -28,6 +28,7 @@
#include "pipe/p_state.h"
#include "util/u_string.h"
+#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
@@ -85,6 +86,20 @@ fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso)
delete_shader_stateobj(so);
}
+bool
+fd3_needs_manual_clipping(const struct fd3_shader_stateobj *so,
+ const struct pipe_rasterizer_state *rast)
+{
+ uint64_t outputs = ir3_shader_outputs(so->shader);
+
+ return (!rast->depth_clip ||
+ util_bitcount(rast->clip_plane_enable) > 6 ||
+ outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) |
+ (1ULL << VARYING_SLOT_CLIP_DIST0) |
+ (1ULL << VARYING_SLOT_CLIP_DIST1)));
+}
+
+
static void
emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
{
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
index b3fcc0c..b95df4c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -44,4 +44,7 @@ void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
void fd3_prog_init(struct pipe_context *pctx);
+bool fd3_needs_manual_clipping(const struct fd3_shader_stateobj *,
+ const struct pipe_rasterizer_state *);
+
#endif /* FD3_PROGRAM_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index d9a7bb5..aeb61e7 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -1376,7 +1376,7 @@ static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
{
return ((val) << A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
}
-#define A4XX_RB_DEPTH_CONTROL_BF_ENABLE 0x00000080
+#define A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE 0x00000080
#define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE 0x00010000
#define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS 0x00020000
#define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000
@@ -3145,6 +3145,8 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
#define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000
#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE 0x00010000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 0x00020000
#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000
#define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 00e985d..8b350ae 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -31,6 +31,7 @@
#include "util/u_memory.h"
#include "util/u_helpers.h"
#include "util/u_format.h"
+#include "util/u_viewport.h"
#include "freedreno_resource.h"
#include "freedreno_query_hw.h"
@@ -544,12 +545,14 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
}
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
bool fragz = fp->has_kill | fp->writes_pos;
+ bool clamp = !ctx->rasterizer->depth_clip;
OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, zsa->rb_depth_control |
+ COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) |
COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
@@ -636,6 +639,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
}
+ if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+ float zmin, zmax;
+ int depth = 24;
+ if (ctx->framebuffer.zsbuf) {
+ depth = util_format_get_component_bits(
+ pipe_surface_format(ctx->framebuffer.zsbuf),
+ UTIL_FORMAT_COLORSPACE_ZS, 0);
+ }
+ util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
+ &zmin, &zmax);
+
+ OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2);
+ if (depth == 32) {
+ OUT_RING(ring, fui(zmin));
+ OUT_RING(ring, fui(zmax));
+ } else if (depth == 16) {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffff));
+ } else {
+ OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
+ OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
+ }
+ }
+
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
unsigned n = pfb->nr_cbufs;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
index 7456c63..b3a4292 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
@@ -98,7 +98,8 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
if (!cso->depth_clip)
- so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
+ so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
+ A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
if (cso->clip_halfz)
so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index ee0018f..fc423ec 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -469,6 +469,12 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
debug_printf("\n");
}
+uint64_t
+ir3_shader_outputs(const struct ir3_shader *so)
+{
+ return so->nir->info.outputs_written;
+}
+
/* This has to reach into the fd_context a bit more than the rest of
* ir3, but it needs to be aligned with the compiler, so both agree
* on which const regs hold what. And the logic is identical between
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index c17a76b..f430b6b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -272,6 +272,7 @@ void ir3_shader_destroy(struct ir3_shader *shader);
struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
struct ir3_shader_key key, struct pipe_debug_callback *debug);
void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin);
+uint64_t ir3_shader_outputs(const struct ir3_shader *so);
struct fd_ringbuffer;
struct fd_context;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 80e0990..3a23a9a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -726,7 +726,7 @@ void
CodeEmitterGK110::emitIMAD(const Instruction *i)
{
uint8_t addOp =
- (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg());
+ i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
emitForm_21(i, 0x100, 0xa00);
@@ -773,7 +773,7 @@ CodeEmitterGK110::emitNOT(const Instruction *i)
break;
case FILE_MEMORY_CONST:
code[1] |= 0x4 << 28;
- setCAddress14(i->src(1));
+ setCAddress14(i->src(0));
break;
default:
assert(0);
@@ -1321,15 +1321,12 @@ void
CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
{
code[0] = 0x00000002 | ((qOp & 1) << 31);
- code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12);
+ code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall
defId(i->def(0), 2);
srcId(i->src(0), 10);
srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);
- if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
- code[1] |= 1 << 9; // dall
-
emitPredicate(i);
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index e62d807..d3e1708 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -1682,7 +1682,7 @@ CodeEmitterGM107::emitNOT()
void
CodeEmitterGM107::emitIADD()
{
- if (!longIMMD(insn->src(1))) {
+ if (insn->src(1).getFile() != FILE_IMMEDIATE) {
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5c100000);
@@ -1707,6 +1707,7 @@ CodeEmitterGM107::emitIADD()
emitX (0x2b);
} else {
emitInsn(0x1c000000);
+ emitNEG (0x38, insn->src(0));
emitSAT (0x36);
emitX (0x35);
emitCC (0x34);
@@ -2300,6 +2301,7 @@ CodeEmitterGM107::emitAL2P()
{
emitInsn (0xefa00000);
emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
+ emitPRED (0x2c);
emitO (0x20);
emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
emitGPR (0x08, insn->src(0).getIndirect(0));
@@ -2523,7 +2525,7 @@ CodeEmitterGM107::emitTEX()
if (insn->tex.rIndirectSrc >= 0) {
emitInsn (0xdeb80000);
- emitField(0x35, 2, lodm);
+ emitField(0x25, 2, lodm);
emitField(0x24, 1, insn->tex.useOffsets == 1);
} else {
emitInsn (0xc0380000);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 5d68e99..ca10848 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -2112,7 +2112,7 @@ makeInstructionLong(Instruction *insn)
insn->encSize = 8;
for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
- fn->bbArray[i]->binPos += 4;
+ fn->bbArray[i]->binPos += adj;
}
fn->binSize += adj;
insn->bb->binSize += adj;
@@ -2164,9 +2164,16 @@ replaceExitWithModifier(Function *func)
return;
}
}
- epilogue->binSize -= 8;
- func->binSize -= 8;
+
+ int adj = epilogue->getExit()->encSize;
+ epilogue->binSize -= adj;
+ func->binSize -= adj;
delete_Instruction(func->getProgram(), epilogue->getExit());
+
+ // There may be BB's that are laid out after the exit block
+ for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
+ func->bbArray[i]->binPos -= adj;
+ }
}
void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index bc94285..be5ee4f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -736,9 +736,15 @@ CodeEmitterNVC0::emitUADD(const Instruction *i)
void
CodeEmitterNVC0::emitIMAD(const Instruction *i)
{
+ uint8_t addOp =
+ i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
+
assert(i->encSize == 8);
emitForm_A(i, HEX64(20000000, 00000003));
+ assert(addOp != 3);
+ code[0] |= addOp << 8;
+
if (isSignedType(i->dType))
code[0] |= 1 << 7;
if (isSignedType(i->sType))
@@ -749,10 +755,6 @@ CodeEmitterNVC0::emitIMAD(const Instruction *i)
if (i->flagsDef >= 0) code[1] |= 1 << 16;
if (i->flagsSrc >= 0) code[1] |= 1 << 23;
- if (i->src(2).mod.neg()) code[0] |= 0x10;
- if (i->src(1).mod.neg() ^
- i->src(0).mod.neg()) code[0] |= 0x20;
-
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
code[0] |= 1 << 6;
}
@@ -1356,16 +1358,13 @@ CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
void
CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
{
- code[0] = 0x00000000 | (laneMask << 6);
+ code[0] = 0x00000200 | (laneMask << 6); // dall
code[1] = 0x48000000 | qOp;
defId(i->def(0), 14);
srcId(i->src(0), 20);
srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
- if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
- code[0] |= 1 << 9; // dall
-
emitPredicate(i);
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index beb7b53..899a5cd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -182,6 +182,7 @@ public:
// mask of used components of source s
unsigned int srcMask(unsigned int s) const;
+ unsigned int texOffsetMask() const;
SrcRegister getSrc(unsigned int s) const
{
@@ -234,6 +235,35 @@ private:
const struct tgsi_full_instruction *insn;
};
+unsigned int Instruction::texOffsetMask() const
+{
+ const struct tgsi_instruction_texture *tex = &insn->Texture;
+ assert(insn->Instruction.Texture);
+
+ switch (tex->Texture) {
+ case TGSI_TEXTURE_BUFFER:
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ return 0x1;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_2D_MSAA:
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
+ return 0x3;
+ case TGSI_TEXTURE_3D:
+ return 0x7;
+ default:
+ assert(!"Unexpected texture target");
+ return 0xf;
+ }
+}
+
unsigned int Instruction::srcMask(unsigned int s) const
{
unsigned int mask = insn->Dst[0].Register.WriteMask;
@@ -942,6 +972,9 @@ private:
int inferSysValDirection(unsigned sn) const;
bool scanDeclaration(const struct tgsi_full_declaration *);
bool scanInstruction(const struct tgsi_full_instruction *);
+ void scanInstructionSrc(const Instruction& insn,
+ const Instruction::SrcRegister& src,
+ unsigned mask);
void scanProperty(const struct tgsi_full_property *);
void scanImmediate(const struct tgsi_full_immediate *);
@@ -1351,6 +1384,61 @@ inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
}
+void Source::scanInstructionSrc(const Instruction& insn,
+ const Instruction::SrcRegister& src,
+ unsigned mask)
+{
+ if (src.getFile() == TGSI_FILE_TEMPORARY) {
+ if (src.isIndirect(0))
+ indirectTempArrays.insert(src.getArrayId());
+ } else
+ if (src.getFile() == TGSI_FILE_BUFFER ||
+ src.getFile() == TGSI_FILE_IMAGE ||
+ (src.getFile() == TGSI_FILE_MEMORY &&
+ memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
+ info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+ 0x1 : 0x2;
+ } else
+ if (src.getFile() == TGSI_FILE_OUTPUT) {
+ if (src.isIndirect(0)) {
+ // We don't know which one is accessed, just mark everything for
+ // reading. This is an extremely unlikely occurrence.
+ for (unsigned i = 0; i < info->numOutputs; ++i)
+ info->out[i].oread = 1;
+ } else {
+ info->out[src.getIndex(0)].oread = 1;
+ }
+ }
+ if (src.getFile() != TGSI_FILE_INPUT)
+ return;
+
+ if (src.isIndirect(0)) {
+ for (unsigned i = 0; i < info->numInputs; ++i)
+ info->in[i].mask = 0xf;
+ } else {
+ const int i = src.getIndex(0);
+ for (unsigned c = 0; c < 4; ++c) {
+ if (!(mask & (1 << c)))
+ continue;
+ int k = src.getSwizzle(c);
+ if (k <= TGSI_SWIZZLE_W)
+ info->in[i].mask |= 1 << k;
+ }
+ switch (info->in[i].sn) {
+ case TGSI_SEMANTIC_PSIZE:
+ case TGSI_SEMANTIC_PRIMID:
+ case TGSI_SEMANTIC_FOG:
+ info->in[i].mask &= 0x1;
+ break;
+ case TGSI_SEMANTIC_PCOORD:
+ info->in[i].mask &= 0x3;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
{
Instruction insn(inst);
@@ -1383,66 +1471,19 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
indirectTempArrays.insert(dst.getArrayId());
} else
if (dst.getFile() == TGSI_FILE_BUFFER ||
- dst.getFile() == TGSI_FILE_IMAGE ||
+ dst.getFile() == TGSI_FILE_IMAGE ||
(dst.getFile() == TGSI_FILE_MEMORY &&
memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
info->io.globalAccess |= 0x2;
}
}
- for (unsigned s = 0; s < insn.srcCount(); ++s) {
- Instruction::SrcRegister src = insn.getSrc(s);
- if (src.getFile() == TGSI_FILE_TEMPORARY) {
- if (src.isIndirect(0))
- indirectTempArrays.insert(src.getArrayId());
- } else
- if (src.getFile() == TGSI_FILE_BUFFER ||
- src.getFile() == TGSI_FILE_IMAGE ||
- (src.getFile() == TGSI_FILE_MEMORY &&
- memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
- info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
- 0x1 : 0x2;
- } else
- if (src.getFile() == TGSI_FILE_OUTPUT) {
- if (src.isIndirect(0)) {
- // We don't know which one is accessed, just mark everything for
- // reading. This is an extremely unlikely occurrence.
- for (unsigned i = 0; i < info->numOutputs; ++i)
- info->out[i].oread = 1;
- } else {
- info->out[src.getIndex(0)].oread = 1;
- }
- }
- if (src.getFile() != TGSI_FILE_INPUT)
- continue;
- unsigned mask = insn.srcMask(s);
+ for (unsigned s = 0; s < insn.srcCount(); ++s)
+ scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
+
+ for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s)
+ scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask());
- if (src.isIndirect(0)) {
- for (unsigned i = 0; i < info->numInputs; ++i)
- info->in[i].mask = 0xf;
- } else {
- const int i = src.getIndex(0);
- for (unsigned c = 0; c < 4; ++c) {
- if (!(mask & (1 << c)))
- continue;
- int k = src.getSwizzle(c);
- if (k <= TGSI_SWIZZLE_W)
- info->in[i].mask |= 1 << k;
- }
- switch (info->in[i].sn) {
- case TGSI_SEMANTIC_PSIZE:
- case TGSI_SEMANTIC_PRIMID:
- case TGSI_SEMANTIC_FOG:
- info->in[i].mask &= 0x1;
- break;
- case TGSI_SEMANTIC_PCOORD:
- info->in[i].mask &= 0x3;
- break;
- default:
- break;
- }
- }
- }
return true;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
index 23414d5..b1076cf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
@@ -287,7 +287,10 @@ private:
bb.push(node);
- while (bb.getSize()) {
+ while (bb.getSize() || cross.getSize()) {
+ if (bb.getSize() == 0)
+ cross.moveTo(bb);
+
node = reinterpret_cast<Graph::Node *>(bb.pop().u.p);
assert(node);
if (!node->visit(sequence))
@@ -314,9 +317,6 @@ private:
}
}
nodes[count++] = node;
-
- if (bb.getSize() == 0)
- cross.moveTo(bb);
}
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 6a6b44c..4e60b1c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -750,6 +750,16 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
i->tex.rIndirectSrc = 0;
i->tex.sIndirectSrc = -1;
}
+ // Move the indirect reference to right after the coords
+ else if (i->tex.rIndirectSrc >= 0 && chipset >= NVISA_GM107_CHIPSET) {
+ Value *hnd = i->getIndirectR();
+
+ i->setIndirectR(NULL);
+ i->moveSources(arg, 1);
+ i->setSrc(arg, hnd);
+ i->tex.rIndirectSrc = 0;
+ i->tex.sIndirectSrc = -1;
+ }
} else
// (nvc0) generate and move the tsc/tic/array source to the front
if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
@@ -823,7 +833,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
for (n = 0; n < i->tex.useOffsets; n++) {
for (c = 0; c < 2; ++c) {
if ((n % 2) == 0 && c == 0)
- offs[n / 2] = i->offset[n][c].get();
+ bld.mkMov(offs[n / 2] = bld.getScratch(), i->offset[n][c].get());
else
bld.mkOp3(OP_INSBF, TYPE_U32,
offs[n / 2],
@@ -2056,6 +2066,13 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
base = 0;
}
+ if (ind) {
+ Value *ptr;
+ ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ind, bld.mkImm(su->tex.r));
+ ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7));
+ su->setIndirectR(ptr);
+ }
+
// get surface coordinates
for (c = 0; c < arg; ++c)
src[c] = su->getSrc(c);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 6b52d7b..bf260bb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1902,8 +1902,10 @@ GCRA::resolveSplitsAndMerges()
// their registers should be identical.
if (v->getInsn()->op == OP_PHI || v->getInsn()->op == OP_UNION) {
Instruction *phi = v->getInsn();
- for (int phis = 0; phi->srcExists(phis); ++phis)
+ for (int phis = 0; phi->srcExists(phis); ++phis) {
phi->getSrc(phis)->join = v;
+ phi->getSrc(phis)->reg.data.id = v->reg.data.id;
+ }
}
reg += v->reg.size;
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
index 7b0d074..1c71534 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
@@ -127,6 +127,8 @@ nv30_render_draw_elements(struct vbuf_render *render,
struct nouveau_pushbuf *push = nv30->screen->base.pushbuf;
unsigned i;
+ pipe_mutex_lock(nv30->screen->base.push_mutex);
+
BEGIN_NV04(push, NV30_3D(VTXBUF(0)), r->vertex_info.num_attribs);
for (i = 0; i < r->vertex_info.num_attribs; i++) {
PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP,
@@ -134,8 +136,10 @@ nv30_render_draw_elements(struct vbuf_render *render,
NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
}
- if (!nv30_state_validate(nv30, ~0, false))
+ if (!nv30_state_validate(nv30, ~0, false)) {
+ pipe_mutex_unlock(nv30->screen->base.push_mutex);
return;
+ }
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
PUSH_DATA (push, r->prim);
@@ -160,6 +164,8 @@ nv30_render_draw_elements(struct vbuf_render *render,
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
PUSH_RESET(push, BUFCTX_VTXTMP);
+
+ pipe_mutex_unlock(nv30->screen->base.push_mutex);
}
static void
@@ -172,6 +178,8 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr)
unsigned ps = fn + (pn ? 1 : 0);
unsigned i;
+ pipe_mutex_lock(nv30->screen->base.push_mutex);
+
BEGIN_NV04(push, NV30_3D(VTXBUF(0)), r->vertex_info.num_attribs);
for (i = 0; i < r->vertex_info.num_attribs; i++) {
PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP,
@@ -179,8 +187,10 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr)
NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
}
- if (!nv30_state_validate(nv30, ~0, false))
+ if (!nv30_state_validate(nv30, ~0, false)) {
+ pipe_mutex_unlock(nv30->screen->base.push_mutex);
return;
+ }
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
PUSH_DATA (push, r->prim);
@@ -197,6 +207,8 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr)
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
PUSH_RESET(push, BUFCTX_VTXTMP);
+
+ pipe_mutex_unlock(nv30->screen->base.push_mutex);
}
static void
@@ -383,6 +395,8 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nv30_render_validate(nv30);
+ pipe_mutex_unlock(nv30->screen->base.push_mutex);
+
if (nv30->draw_dirty & NV30_NEW_VIEWPORT)
draw_set_viewport_states(draw, 0, 1, &nv30->viewport);
if (nv30->draw_dirty & NV30_NEW_RASTERIZER)
@@ -448,6 +462,8 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (transfer[i])
pipe_buffer_unmap(pipe, transfer[i]);
+ pipe_mutex_lock(nv30->screen->base.push_mutex);
+
nv30->draw_dirty = 0;
nv30_state_release(nv30);
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
index 6de61bc..fd21f99 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
@@ -38,6 +38,8 @@ nv30_fragprog_upload(struct nv30_context *nv30)
struct nv30_fragprog *fp = nv30->fragprog.program;
struct pipe_context *pipe = &nv30->base.pipe;
+ pipe_mutex_unlock(nv->screen->push_mutex);
+
if (unlikely(!fp->buffer))
fp->buffer = pipe_buffer_create(pipe->screen, 0, 0, fp->insn_len * 4);
@@ -60,6 +62,8 @@ nv30_fragprog_upload(struct nv30_context *nv30)
if (nv04_resource(fp->buffer)->domain != NOUVEAU_BO_VRAM)
nouveau_buffer_migrate(nv, nv04_resource(fp->buffer), NOUVEAU_BO_VRAM);
+
+ pipe_mutex_lock(nv->screen->push_mutex);
}
void
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state.c b/src/gallium/drivers/nouveau/nv30/nv30_state.c
index fd604c2..43ecaac 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_state.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state.c
@@ -379,8 +379,9 @@ nv30_set_framebuffer_state(struct pipe_context *pipe,
struct nv30_miptree *zeta_mt = nv30_miptree(fb->zsbuf->texture);
if (color_mt->swizzled != zeta_mt->swizzled ||
- (util_format_get_blocksize(fb->zsbuf->format) > 2) !=
- (util_format_get_blocksize(fb->cbufs[0]->format) > 2)) {
+ (color_mt->swizzled &&
+ (util_format_get_blocksize(fb->zsbuf->format) > 2) !=
+ (util_format_get_blocksize(fb->cbufs[0]->format) > 2))) {
nv30->framebuffer.zsbuf = NULL;
debug_printf("Mismatched color and zeta formats, ignoring zeta.\n");
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
index 9ecbcd1..24fa3bb 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
@@ -115,7 +115,8 @@ nv30_transfer_rect_fragprog(struct nv30_context *nv30)
struct pipe_context *pipe = &nv30->base.pipe;
if (!fp) {
- nv30->blit_fp = pipe_buffer_create(pipe->screen, 0, 0, 12 * 4);
+ nv30->blit_fp =
+ pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STAGING, 12 * 4);
if (nv30->blit_fp) {
struct pipe_transfer *transfer;
u32 *map = pipe_buffer_map(pipe, nv30->blit_fp,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
index 34d32d1..6ea5a47 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -161,7 +161,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
F3(A, R11G11B10_FLOAT, R11G11B10_FLOAT, R, G, B, xx, FLOAT, BF10GF11RF11, IB),
F3(A, L8_UNORM, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
- F3(A, L8_SRGB, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
+ F3(A, L8_SRGB, NONE, R, R, R, xx, UNORM, R8, T),
F3(A, L8_SNORM, R8_SNORM, R, R, R, xx, SNORM, R8, TC),
I3(A, L8_SINT, R8_SINT, R, R, R, xx, SINT, R8, TR),
I3(A, L8_UINT, R8_UINT, R, R, R, xx, UINT, R8, TR),
@@ -203,7 +203,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
C4(A, L4A4_UNORM, NONE, R, R, R, G, UNORM, G4R4, T),
C4(A, L8A8_UNORM, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
C4(A, L8A8_SNORM, RG8_SNORM, R, R, R, G, SNORM, G8R8, T),
- C4(A, L8A8_SRGB, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
+ C4(A, L8A8_SRGB, NONE, R, R, R, G, UNORM, G8R8, T),
C4(A, L8A8_SINT, RG8_SINT, R, R, R, G, SINT, G8R8, T),
C4(A, L8A8_UINT, RG8_UINT, R, R, R, G, UINT, G8R8, T),
C4(A, L16A16_UNORM, RG16_UNORM, R, R, R, G, UNORM, R16_G16, T),
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index c764f5c..383bee3 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -307,6 +307,9 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
const unsigned r = pso->output[i].register_index;
b = pso->output[i].output_buffer;
+ if (r >= info->numOutputs)
+ continue;
+
for (c = 0; c < pso->output[i].num_components; ++c)
so->map[base[b] + p + c] = info->out[r].slot[s + c];
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 65f7338..a67a390 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -1,5 +1,6 @@
#include "util/u_format.h"
+#include "util/u_viewport.h"
#include "nv50/nv50_context.h"
@@ -265,8 +266,12 @@ nv50_validate_viewport(struct nv50_context *nv50)
PUSH_DATAf(push, vpt->scale[1]);
PUSH_DATAf(push, vpt->scale[2]);
- zmin = vpt->translate[2] - fabsf(vpt->scale[2]);
- zmax = vpt->translate[2] + fabsf(vpt->scale[2]);
+ /* If the halfz setting ever changes, the viewports will also get
+ * updated. The rast will get updated before the validate function has a
+ * chance to hit, so we can just use it directly without an atom
+ * dependency.
+ */
+ util_viewport_zmin_zmax(vpt, nv50->rast->pipe.clip_halfz, &zmin, &zmax);
#ifdef NV50_SCISSORS_CLIPPING
BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 28d6fec..12d5b0e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -514,10 +514,8 @@ nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx,
NOUVEAU_DRV_STAT(&nvc0->screen->base, resource_validate_count, count);
}
-static void
-nvc0_context_get_sample_position(struct pipe_context *pipe,
- unsigned sample_count, unsigned sample_index,
- float *xy)
+const void *
+nvc0_get_sample_locations(unsigned sample_count)
{
static const uint8_t ms1[1][2] = { { 0x8, 0x8 } };
static const uint8_t ms2[2][2] = {
@@ -549,8 +547,22 @@ nvc0_context_get_sample_position(struct pipe_context *pipe,
case 8: ptr = ms8; break;
default:
assert(0);
- return; /* bad sample count -> undefined locations */
+ return NULL; /* bad sample count -> undefined locations */
}
+ return ptr;
+}
+
+static void
+nvc0_context_get_sample_position(struct pipe_context *pipe,
+ unsigned sample_count, unsigned sample_index,
+ float *xy)
+{
+ const uint8_t (*ptr)[2];
+
+ ptr = nvc0_get_sample_locations(sample_count);
+ if (!ptr)
+ return;
+
xy[0] = ptr[sample_index][0] * 0.0625f;
xy[1] = ptr[sample_index][1] * 0.0625f;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 8d27300..ff5467c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -278,6 +278,7 @@ struct pipe_context *nvc0_create(struct pipe_screen *, void *, unsigned flags);
void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *,
bool on_flush);
void nvc0_default_kick_notify(struct nouveau_pushbuf *);
+const void *nvc0_get_sample_locations(unsigned);
/* nvc0_draw.c */
extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index aba9511..e1ff3b7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -500,11 +500,14 @@ nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info,
for (i = 0; i < pso->num_outputs; ++i) {
unsigned s = pso->output[i].start_component;
unsigned p = pso->output[i].dst_offset;
+ const unsigned r = pso->output[i].register_index;
b = pso->output[i].output_buffer;
+ if (r >= info->numOutputs)
+ continue;
+
for (c = 0; c < pso->output[i].num_components; ++c)
- tfb->varying_index[b][p++] =
- info->out[pso->output[i].register_index].slot[s + c];
+ tfb->varying_index[b][p++] = info->out[r].slot[s + c];
tfb->varying_count[b] = MAX2(tfb->varying_count[b], p);
tfb->stream[b] = pso->output[i].stream;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index ad44e85..ce59484 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -2,6 +2,7 @@
#include "util/u_format.h"
#include "util/u_framebuffer.h"
#include "util/u_math.h"
+#include "util/u_viewport.h"
#include "nvc0/nvc0_context.h"
@@ -211,6 +212,19 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
PUSH_DATAf(push, xy[1]);
}
+ if (screen->base.class_3d >= GM200_3D_CLASS) {
+ const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
+ uint32_t val[4] = {};
+
+ for (i = 0; i < 16; i++) {
+ val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0);
+ val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4);
+ }
+
+ BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
+ PUSH_DATAp(push, val, 4);
+ }
+
if (serialize)
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
@@ -316,8 +330,12 @@ nvc0_validate_viewport(struct nvc0_context *nvc0)
PUSH_DATA (push, (w << 16) | x);
PUSH_DATA (push, (h << 16) | y);
- zmin = vp->translate[2] - fabsf(vp->scale[2]);
- zmax = vp->translate[2] + fabsf(vp->scale[2]);
+ /* If the halfz setting ever changes, the viewports will also get
+ * updated. The rast will get updated before the validate function has a
+ * chance to hit, so we can just use it directly without an atom
+ * dependency.
+ */
+ util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax);
BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2);
PUSH_DATAf(push, zmin);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 1a5d8ec..efd90de 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -589,13 +589,11 @@ void nvc0_validate_textures(struct nvc0_context *nvc0)
PUSH_DATA (nvc0->base.pushbuf, 0);
}
- if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
- /* Invalidate all CP textures because they are aliased. */
- for (int i = 0; i < nvc0->num_textures[5]; i++)
- nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CP_TEX(i));
- nvc0->textures_dirty[5] = ~0;
- nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
- }
+ /* Invalidate all CP textures because they are aliased. */
+ for (int i = 0; i < nvc0->num_textures[5]; i++)
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEX(i));
+ nvc0->textures_dirty[5] = ~0;
+ nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
}
bool
@@ -709,11 +707,9 @@ void nvc0_validate_samplers(struct nvc0_context *nvc0)
PUSH_DATA (nvc0->base.pushbuf, 0);
}
- if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
- /* Invalidate all CP samplers because they are aliased. */
- nvc0->samplers_dirty[5] = ~0;
- nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
- }
+ /* Invalidate all CP samplers because they are aliased. */
+ nvc0->samplers_dirty[5] = ~0;
+ nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
}
/* Upload the "diagonal" entries for the possible texture sources ($t == $s).
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index cae621c..3d20c68 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -835,7 +835,7 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
/* Queue things up to let the macros write params to the driver constbuf */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
+ PUSH_DATA (push, 2048);
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
BEGIN_NVC0(push, NVC0_3D(CB_POS), 1);
@@ -981,7 +981,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (nvc0->vertprog->vp.need_draw_parameters) {
PUSH_SPACE(push, 9);
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
+ PUSH_DATA (push, 2048);
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
if (!info->indirect) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index a1d1d3e..d172d73 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -237,7 +237,13 @@ nve4_compute_validate_samplers(struct nvc0_context *nvc0)
BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
}
+
+ /* Invalidate all 3D samplers because they are aliased. */
+ for (int s = 0; s < 5; s++)
+ nvc0->samplers_dirty[s] = ~0;
+ nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
}
+
/* (Code duplicated at bottom for various non-convincing reasons.
* E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
* entries to avoid a subchannel switch.
@@ -690,6 +696,14 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
}
nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+ /* Invalidate all 3D textures because they are aliased. */
+ for (int s = 0; s < 5; s++) {
+ for (int i = 0; i < nvc0->num_textures[s]; i++)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
+ nvc0->textures_dirty[s] = ~0;
+ }
+ nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
}
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index d100a9d..341f406 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -190,7 +190,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
/* VAP. */
R300_INIT_ATOM(viewport_state, 9);
R300_INIT_ATOM(pvs_flush, 2);
- R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9);
+ R300_INIT_ATOM(vap_invariant_state, is_r500 || !has_tcl ? 11 : 9);
R300_INIT_ATOM(vertex_stream_state, 0);
R300_INIT_ATOM(vs_state, 0);
R300_INIT_ATOM(vs_constants, 0);
@@ -314,6 +314,14 @@ static void r300_init_states(struct pipe_context *pipe)
if (r300->screen->caps.is_r500) {
OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0);
+ } else if (!r300->screen->caps.has_tcl) {
+ /* RSxxx:
+ * Static VAP setup since r300_emit_vs_state() is never called.
+ */
+ OUT_CB_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) |
+ R300_PVS_NUM_CNTLRS(5) |
+ R300_PVS_NUM_FPUS(2) |
+ R300_PVS_VF_MAX_VTX_NUM(5));
}
END_CB;
}
diff --git a/src/gallium/drivers/radeon/cayman_msaa.c b/src/gallium/drivers/radeon/cayman_msaa.c
index 9412e89..6d6998e 100644
--- a/src/gallium/drivers/radeon/cayman_msaa.c
+++ b/src/gallium/drivers/radeon/cayman_msaa.c
@@ -143,6 +143,13 @@ void cayman_init_msaa(struct pipe_context *ctx)
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
{
switch (nr_samples) {
+ default:
+ case 1:
+ radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
+ radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
+ break;
case 2:
radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 23ddff4..8de3b18 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -703,8 +703,9 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
}
rtex->cmask_buffer = (struct r600_resource *)
- pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
- PIPE_USAGE_DEFAULT, rtex->cmask.size);
+ r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
+ rtex->cmask.size,
+ rtex->cmask.alignment);
if (rtex->cmask_buffer == NULL) {
rtex->cmask.size = 0;
return;
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 74b36ec..9ab5af9 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -513,6 +513,16 @@ void radeon_llvm_emit_store(
}
}
+/* Emit a branch to the given default target for the current block if
+ * applicable -- that is, if the current block does not already contain a
+ * branch from a break or continue.
+ */
+static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
+{
+ if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
+ LLVMBuildBr(builder, target);
+}
+
static void bgnloop_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -577,28 +587,8 @@ static void else_emit(
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
- LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
-
- /* We need to add a terminator to the current block if the previous
- * instruction was an ENDIF.Example:
- * IF
- * [code]
- * IF
- * [code]
- * ELSE
- * [code]
- * ENDIF <--
- * ELSE<--
- * [code]
- * ENDIF
- */
- if (current_block != current_branch->if_block) {
- LLVMBuildBr(gallivm->builder, current_branch->endif_block);
- }
- if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
- LLVMBuildBr(gallivm->builder, current_branch->endif_block);
- }
+ emit_default_branch(gallivm->builder, current_branch->endif_block);
current_branch->has_else = 1;
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
}
@@ -611,26 +601,15 @@ static void endif_emit(
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
- LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
- /* If we have consecutive ENDIF instructions, then the first ENDIF
- * will not have a terminator, so we need to add one. */
- if (current_block != current_branch->if_block
- && current_block != current_branch->else_block
- && !LLVMGetBasicBlockTerminator(current_block)) {
+ emit_default_branch(gallivm->builder, current_branch->endif_block);
- LLVMBuildBr(gallivm->builder, current_branch->endif_block);
- }
+ /* Need to fixup an empty else block if there was no ELSE opcode. */
if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
LLVMBuildBr(gallivm->builder, current_branch->endif_block);
}
- if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
- LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
- LLVMBuildBr(gallivm->builder, current_branch->endif_block);
- }
-
LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
ctx->branch_depth--;
}
@@ -644,9 +623,7 @@ static void endloop_emit(
struct gallivm_state * gallivm = bld_base->base.gallivm;
struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
- if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
- LLVMBuildBr(gallivm->builder, current_loop->loop_block);
- }
+ emit_default_branch(gallivm->builder, current_loop->loop_block);
LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
ctx->loop_depth--;
@@ -1326,23 +1303,32 @@ static void emit_lsb(const struct lp_build_tgsi_action * action,
struct lp_build_emit_data * emit_data)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef args[2] = {
emit_data->args[0],
/* The value of 1 means that ffs(x=0) = undef, so LLVM won't
* add special code to check for x=0. The reason is that
* the LLVM behavior for x=0 is different from what we
- * need here.
- *
- * The hardware already implements the correct behavior.
+ * need here. However, LLVM also assumes that ffs(x) is
+ * in [0, 31], but GLSL expects that ffs(0) = -1, so
+ * a conditional assignment to handle 0 is still required.
*/
- lp_build_const_int32(gallivm, 1)
+ LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
};
- emit_data->output[emit_data->chan] =
+ LLVMValueRef lsb =
lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
emit_data->dst_type, args, ARRAY_SIZE(args),
LLVMReadNoneAttribute);
+
+ /* TODO: We need an intrinsic to skip this conditional. */
+ /* Check for zero: */
+ emit_data->output[emit_data->chan] =
+ LLVMBuildSelect(builder,
+ LLVMBuildICmp(builder, LLVMIntEQ, args[0],
+ bld_base->uint_bld.zero, ""),
+ lp_build_const_int32(gallivm, -1), lsb, "");
}
/* Find the last bit set. */
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index d8ec2a3..8e23b61 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -519,6 +519,12 @@ static void cik_sdma_copy(struct pipe_context *ctx,
return;
}
+ /* Carrizo SDMA texture copying is very broken for some users.
+ * https://bugs.freedesktop.org/show_bug.cgi?id=97029
+ */
+ if (sctx->b.family == CHIP_CARRIZO)
+ goto fallback;
+
if (cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box))
return;
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 484b252..c279c77 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -158,6 +158,7 @@ static void si_set_global_binding(
static void si_initialize_compute(struct si_context *sctx)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+ uint64_t bc_va;
radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
radeon_emit(cs, 0);
@@ -193,6 +194,17 @@ static void si_initialize_compute(struct si_context *sctx)
0x190 /* Default value */);
}
+ /* Set the pointer to border colors. */
+ bc_va = sctx->border_color_buffer->gpu_address;
+
+ if (sctx->b.chip_class >= CIK) {
+ radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2);
+ radeon_emit(cs, bc_va >> 8); /* R_030E00_TA_CS_BC_BASE_ADDR */
+ radeon_emit(cs, bc_va >> 40); /* R_030E04_TA_CS_BC_BASE_ADDR_HI */
+ } else {
+ radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8);
+ }
+
sctx->cs_shader_state.emitted_program = NULL;
sctx->cs_shader_state.initialized = true;
}
@@ -459,6 +471,20 @@ static void si_launch_grid(
si_decompress_compute_textures(sctx);
+ /* Add buffer sizes for memory checking in need_cs_space. */
+ r600_context_add_resource_size(ctx, &program->shader.bo->b.b);
+ /* TODO: add the scratch buffer */
+
+ if (info->indirect) {
+ r600_context_add_resource_size(ctx, info->indirect);
+
+ /* The hw doesn't read the indirect buffer via TC L2. */
+ if (r600_resource(info->indirect)->TC_L2_dirty) {
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ r600_resource(info->indirect)->TC_L2_dirty = false;
+ }
+ }
+
si_need_cs_space(sctx);
if (!sctx->cs_shader_state.initialized)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index bbd02e9..fe4cb29 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -819,9 +819,9 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf
util_memcpy_cpu_to_le32(tmp, ptr, size);
}
-void si_set_constant_buffer(struct si_context *sctx,
- struct si_buffer_resources *buffers,
- uint slot, struct pipe_constant_buffer *input)
+static void si_set_constant_buffer(struct si_context *sctx,
+ struct si_buffer_resources *buffers,
+ uint slot, struct pipe_constant_buffer *input)
{
assert(slot < buffers->desc.num_elements);
pipe_resource_reference(&buffers->buffers[slot], NULL);
@@ -881,6 +881,12 @@ void si_set_constant_buffer(struct si_context *sctx,
buffers->desc.dirty_mask |= 1u << slot;
}
+void si_set_rw_buffer(struct si_context *sctx,
+ uint slot, struct pipe_constant_buffer *input)
+{
+ si_set_constant_buffer(sctx, &sctx->rw_buffers, slot, input);
+}
+
static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
uint shader, uint slot,
struct pipe_constant_buffer *input)
@@ -1052,10 +1058,10 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
* and most other clients can use TC L2 as well, we don't need
* to flush it.
*
- * The only case which requires flushing it is VGT DMA index
- * fetching, which is a rare case. Thus, flag the TC L2
- * dirtiness in the resource and handle it when index fetching
- * is used.
+ * The only cases which requires flushing it is VGT DMA index
+ * fetching (on <= CIK) and indirect draw data, which are rare
+ * cases. Thus, flag the TC L2 dirtiness in the resource and
+ * handle it at draw call time.
*/
for (i = 0; i < sctx->b.streamout.num_targets; i++)
if (sctx->b.streamout.targets[i])
@@ -1177,8 +1183,7 @@ static void si_set_polygon_stipple(struct pipe_context *ctx,
cb.user_buffer = stipple;
cb.buffer_size = sizeof(stipple);
- si_set_constant_buffer(sctx, &sctx->rw_buffers,
- SI_PS_CONST_POLY_STIPPLE, &cb);
+ si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
}
/* TEXTURE METADATA ENABLE/DISABLE */
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index e8e0403..69478a8 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -216,7 +216,8 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->clip_regs);
si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
- si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
+ ctx->msaa_sample_locs.nr_samples = 0;
+ si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom);
si_mark_atom_dirty(ctx, &ctx->msaa_config);
si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
si_mark_atom_dirty(ctx, &ctx->cb_render_state);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 88f4f20..9dd9ef5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -212,8 +212,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
si_begin_new_cs(sctx);
r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */
- /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
- * with a NULL buffer). We need to use a dummy buffer instead. */
+ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
+ * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
if (sctx->b.chip_class == CIK) {
sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
PIPE_USAGE_DEFAULT, 16);
@@ -228,6 +228,15 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
}
}
+ si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
+ &sctx->null_const_buf);
+ si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES,
+ &sctx->null_const_buf);
+ si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE,
+ &sctx->null_const_buf);
+ si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS,
+ &sctx->null_const_buf);
+
/* Clear the NULL constant buffer, because loads should return zeros. */
sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
sctx->null_const_buf.buffer->width0, 0,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index dbbf9b6..2661972 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -168,6 +168,11 @@ struct si_clip_state {
struct pipe_clip_state state;
};
+struct si_sample_locs {
+ struct r600_atom atom;
+ unsigned nr_samples;
+};
+
struct si_sample_mask {
struct r600_atom atom;
uint16_t sample_mask;
@@ -212,7 +217,7 @@ struct si_context {
/* Atom declarations. */
struct r600_atom cache_flush;
struct si_framebuffer framebuffer;
- struct r600_atom msaa_sample_locs;
+ struct si_sample_locs msaa_sample_locs;
struct r600_atom db_render_state;
struct r600_atom msaa_config;
struct si_sample_mask sample_mask;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 5e5bf68..5ead940 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1014,7 +1014,7 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
if (type == TGSI_TYPE_DOUBLE) {
LLVMValueRef value2;
dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
- lp_build_const_int32(gallivm, swizzle + 1));
+ lp_build_const_int32(gallivm, 1));
value2 = build_indexed_load(ctx, ctx->lds, dw_addr, false);
return radeon_llvm_emit_fetch_double(bld_base, value, value2);
}
@@ -1846,13 +1846,13 @@ static LLVMValueRef fetch_constant(
result = bitcast(bld_base, type, result);
else {
LLVMValueRef addr2, result2;
- addr2 = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
+ addr2 = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
addr2 = lp_build_add(&bld_base->uint_bld, addr2,
- lp_build_const_int32(base->gallivm, idx * 4));
+ lp_build_const_int32(base->gallivm, (idx + 1) * 4));
- result2 = buffer_load_const(base->gallivm->builder, ctx->const_buffers[buf],
+ result2 = buffer_load_const(base->gallivm->builder, bufp,
addr2, ctx->f32);
result = radeon_llvm_emit_fetch_double(bld_base,
@@ -5072,7 +5072,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
}
intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
- for (chan = 0; chan < 2; chan++) {
+ for (chan = 0; chan < 4; chan++) {
LLVMValueRef args[4];
LLVMValueRef llvm_chan;
unsigned schan;
@@ -6567,6 +6567,41 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
radeon_llvm_dispose(&ctx.radeon_bld);
+ /* Validate SGPR and VGPR usage for compute to detect compiler bugs.
+ * LLVM 3.9svn has this bug.
+ */
+ if (sel->type == PIPE_SHADER_COMPUTE) {
+ unsigned *props = sel->info.properties;
+ unsigned wave_size = 64;
+ unsigned max_vgprs = 256;
+ unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
+ unsigned max_sgprs_per_wave = 128;
+ unsigned min_waves_per_cu =
+ DIV_ROUND_UP(props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
+ props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
+ props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH],
+ wave_size);
+ unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
+
+ max_vgprs = max_vgprs / min_waves_per_simd;
+ max_sgprs = MIN2(max_sgprs / min_waves_per_simd, max_sgprs_per_wave);
+
+ if (shader->config.num_sgprs > max_sgprs ||
+ shader->config.num_vgprs > max_vgprs) {
+ fprintf(stderr, "LLVM failed to compile a shader correctly: "
+ "SGPR:VGPR usage is %u:%u, but the hw limit is %u:%u\n",
+ shader->config.num_sgprs, shader->config.num_vgprs,
+ max_sgprs, max_vgprs);
+
+ /* Just terminate the process, because dependent
+ * shaders can hang due to bad input data, but use
+ * the env var to allow shader-db to work.
+ */
+ if (!debug_get_bool_option("SI_PASS_BAD_SHADERS", false))
+ abort();
+ }
+ }
+
/* Add the scratch offset to input SGPRs. */
if (shader->config.scratch_bytes_per_wave)
shader->info.num_input_sgprs += 1; /* scratch byte offset */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3bbb81a..a641b5d 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -460,6 +460,10 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+ /* Only set dual source blending for MRT0 to avoid a hang. */
+ if (i >= 1 && blend->dual_src_blend)
+ continue;
+
if (!state->rt[j].colormask)
continue;
@@ -620,8 +624,7 @@ static void si_set_clip_state(struct pipe_context *ctx,
cb.user_buffer = state->ucp;
cb.buffer_offset = 0;
cb.buffer_size = 4*4*8;
- si_set_constant_buffer(sctx, &sctx->rw_buffers,
- SI_VS_CONST_CLIP_PLANES, &cb);
+ si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
pipe_resource_reference(&cb.buffer, NULL);
}
@@ -847,9 +850,13 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
return;
if (sctx->framebuffer.nr_samples > 1 &&
- (!old_rs || old_rs->multisample_enable != rs->multisample_enable))
+ (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) {
si_mark_atom_dirty(sctx, &sctx->db_render_state);
+ if (sctx->b.family >= CHIP_POLARIS10)
+ si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
+ }
+
r600_set_scissor_enable(&sctx->b, rs->scissor_enable);
si_pm4_bind_state(sctx, rasterizer, rs);
@@ -1586,6 +1593,10 @@ static unsigned si_tex_dim(unsigned res_target, unsigned view_target,
if (view_target == PIPE_TEXTURE_CUBE ||
view_target == PIPE_TEXTURE_CUBE_ARRAY)
res_target = view_target;
+ /* If interpreting cubemaps as something else, set 2D_ARRAY. */
+ else if (res_target == PIPE_TEXTURE_CUBE ||
+ res_target == PIPE_TEXTURE_CUBE_ARRAY)
+ res_target = PIPE_TEXTURE_2D_ARRAY;
switch (res_target) {
default:
@@ -2395,21 +2406,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
assert(0);
}
constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
- si_set_constant_buffer(sctx, &sctx->rw_buffers,
- SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
+ si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
- /* Smoothing (only possible with nr_samples == 1) uses the same
- * sample locations as the MSAA it simulates.
- *
- * Therefore, don't update the sample locations when
- * transitioning from no AA to smoothing-equivalent AA, and
- * vice versa.
- */
- if ((sctx->framebuffer.nr_samples != 1 ||
- old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) &&
- (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES ||
- old_nr_samples != 1))
- si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs);
+ si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
}
}
@@ -2536,8 +2535,37 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx,
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned nr_samples = sctx->framebuffer.nr_samples;
- cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples :
- SI_NUM_SMOOTH_AA_SAMPLES);
+ /* Smoothing (only possible with nr_samples == 1) uses the same
+ * sample locations as the MSAA it simulates.
+ */
+ if (nr_samples <= 1 && sctx->smoothing_enabled)
+ nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
+
+ /* On Polaris, the small primitive filter uses the sample locations
+ * even when MSAA is off, so we need to make sure they're set to 0.
+ */
+ if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) &&
+ (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
+ sctx->msaa_sample_locs.nr_samples = nr_samples;
+ cayman_emit_msaa_sample_locs(cs, nr_samples);
+ }
+
+ if (sctx->b.family >= CHIP_POLARIS10) {
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ unsigned small_prim_filter_cntl =
+ S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
+ S_028830_LINE_FILTER_DISABLE(1); /* line bug */
+
+ /* The alternative of setting sample locations to 0 would
+ * require a DB flush to avoid Z errors, see
+ * https://bugs.freedesktop.org/show_bug.cgi?id=96908
+ */
+ if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
+ small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
+
+ radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
+ small_prim_filter_cntl);
+ }
}
static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
@@ -3246,8 +3274,7 @@ static void si_set_tess_state(struct pipe_context *ctx,
(void*)array, sizeof(array),
&cb.buffer_offset);
- si_set_constant_buffer(sctx, &sctx->rw_buffers,
- SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
+ si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
pipe_resource_reference(&cb.buffer, NULL);
}
@@ -3337,7 +3364,7 @@ void si_init_state_functions(struct si_context *sctx)
si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush);
si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
- si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
+ si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
@@ -3810,11 +3837,6 @@ static void si_init_config(struct si_context *sctx)
if (sctx->b.family == CHIP_STONEY)
si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
- if (sctx->b.family >= CHIP_POLARIS10)
- si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
- S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
- S_028830_LINE_FILTER_DISABLE(1)); /* line bug */
-
si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
if (sctx->b.chip_class >= CIK)
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index aea98ae..97f6dfa 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -269,9 +269,8 @@ void si_update_compressed_colortex_masks(struct si_context *sctx);
void si_emit_graphics_shader_userdata(struct si_context *sctx,
struct r600_atom *atom);
void si_emit_compute_shader_userdata(struct si_context *sctx);
-void si_set_constant_buffer(struct si_context *sctx,
- struct si_buffer_resources *buffers,
- uint slot, struct pipe_constant_buffer *input);
+void si_set_rw_buffer(struct si_context *sctx,
+ uint slot, struct pipe_constant_buffer *input);
/* si_state.c */
struct si_shader_selector;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 3c11d14..4e1c599 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -494,13 +494,13 @@ static void si_emit_draw_registers(struct si_context *sctx,
radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart);
sctx->last_primitive_restart_en = info->primitive_restart;
- if (info->primitive_restart &&
- (info->restart_index != sctx->last_restart_index ||
- sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN)) {
- radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
- info->restart_index);
- sctx->last_restart_index = info->restart_index;
- }
+ }
+ if (info->primitive_restart &&
+ (info->restart_index != sctx->last_restart_index ||
+ sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN)) {
+ radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
+ info->restart_index);
+ sctx->last_restart_index = info->restart_index;
}
}
@@ -963,6 +963,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
r600_resource(ib.buffer)->TC_L2_dirty = false;
}
+ if (info->indirect && r600_resource(info->indirect)->TC_L2_dirty) {
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ r600_resource(info->indirect)->TC_L2_dirty = false;
+ }
+
/* Check flush flags. */
if (sctx->b.flags)
si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index a7af76d..932d017 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -759,10 +759,10 @@ static void si_shader_ps(struct si_shader *shader)
S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
- /* Prefer RE_Z if the shader is complex enough. The requirement is either:
- * - the shader uses at least 2 VMEM instructions, or
- * - the code size is at least 50 2-dword instructions or 100 1-dword
- * instructions.
+ /* DON'T USE EARLY_Z_THEN_RE_Z !!!
+ *
+ * It decreases performance by 15% in DiRT: Showdown on Ultra settings.
+ * And it has pretty complex shaders.
*
* Shaders with side effects that must execute independently of the
* depth test require LATE_Z.
@@ -770,9 +770,6 @@ static void si_shader_ps(struct si_shader *shader)
if (info->writes_memory &&
!info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
shader->z_order = V_02880C_LATE_Z;
- else if (info->num_memory_instructions >= 2 ||
- shader->binary.code_size > 100*4)
- shader->z_order = V_02880C_EARLY_Z_THEN_RE_Z;
else
shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
}
@@ -2052,6 +2049,9 @@ bool si_update_shaders(struct si_context *sctx)
if (sctx->b.chip_class == SI)
si_mark_atom_dirty(sctx, &sctx->db_render_state);
+
+ if (sctx->framebuffer.nr_samples <= 1)
+ si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
}
}
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 8d49b9d..341ee12 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -7209,6 +7209,7 @@
/* */
#define R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL 0x028830 /* Polaris */
#define S_028830_SMALL_PRIM_FILTER_ENABLE(x) (((x) & 0x1) << 0)
+#define C_028830_SMALL_PRIM_FILTER_ENABLE 0xFFFFFFFE
#define S_028830_TRIANGLE_FILTER_DISABLE(x) (((x) & 0x1) << 1)
#define S_028830_LINE_FILTER_DISABLE(x) (((x) & 0x1) << 2)
#define S_028830_POINT_FILTER_DISABLE(x) (((x) & 0x1) << 3)
diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c
index abfef0f..9e37e23 100644
--- a/src/gallium/drivers/svga/svga_shader.c
+++ b/src/gallium/drivers/svga/svga_shader.c
@@ -173,10 +173,16 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views));
- for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
+ /* In case the number of samplers and sampler_views doesn't match,
+ * loop over the lower of the two counts.
+ */
+ key->num_textures = MIN2(svga->curr.num_sampler_views[shader],
+ svga->curr.num_samplers[shader]);
+
+ for (i = 0; i < key->num_textures; i++) {
struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i];
- if (view) {
- assert(svga->curr.sampler[shader][i]);
+ const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i];
+ if (view && sampler) {
assert(view->texture);
assert(view->texture->target < (1 << 4)); /* texture_target:4 */
@@ -195,7 +201,7 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
}
}
- if (!svga->curr.sampler[shader][i]->normalized_coords) {
+ if (!sampler->normalized_coords) {
assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
key->tex[i].width_height_idx = idx++;
key->tex[i].unnormalized = TRUE;
@@ -208,7 +214,6 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
key->tex[i].swizzle_a = view->swizzle_a;
}
}
- key->num_textures = svga->curr.num_sampler_views[shader];
}
diff --git a/src/gallium/drivers/swr/Makefile.am b/src/gallium/drivers/swr/Makefile.am
index b67413a..3459af3 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -22,7 +22,7 @@
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
-AM_CXXFLAGS = $(GALLIUM_DRIVER_CFLAGS) -std=c++11
+AM_CXXFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(SWR_CXX11_CXXFLAGS)
noinst_LTLIBRARIES = libmesaswr.la
@@ -31,7 +31,7 @@ libmesaswr_la_SOURCES = $(LOADER_SOURCES)
COMMON_CXXFLAGS = \
$(GALLIUM_DRIVER_CFLAGS) \
$(LLVM_CXXFLAGS) \
- -std=c++11 \
+ $(SWR_CXX11_CXXFLAGS) \
-I$(builddir)/rasterizer/scripts \
-I$(builddir)/rasterizer/jitter \
-I$(srcdir)/rasterizer \
@@ -148,7 +148,7 @@ distclean-local:
lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
libswrAVX_la_CXXFLAGS = \
- -march=core-avx-i \
+ $(SWR_AVX_CXXFLAGS) \
-DKNOB_ARCH=KNOB_ARCH_AVX \
$(COMMON_CXXFLAGS)
@@ -171,7 +171,7 @@ libswrAVX_la_LDFLAGS = \
$(COMMON_LDFLAGS)
libswrAVX2_la_CXXFLAGS = \
- -march=core-avx2 \
+ $(SWR_AVX2_CXXFLAGS) \
-DKNOB_ARCH=KNOB_ARCH_AVX2 \
$(COMMON_CXXFLAGS)
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 21e3bde..f6bacfd 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -28,6 +28,7 @@
#include <xf86drm.h>
#include <xf86drmMode.h>
+#include "util/u_hash_table.h"
#include "util/u_memory.h"
#include "util/ralloc.h"
@@ -329,10 +330,19 @@ vc4_bo_open_handle(struct vc4_screen *screen,
uint32_t winsys_stride,
uint32_t handle, uint32_t size)
{
- struct vc4_bo *bo = CALLOC_STRUCT(vc4_bo);
+ struct vc4_bo *bo;
assert(size);
+ pipe_mutex_lock(screen->bo_handles_mutex);
+
+ bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle);
+ if (bo) {
+ pipe_reference(NULL, &bo->reference);
+ goto done;
+ }
+
+ bo = CALLOC_STRUCT(vc4_bo);
pipe_reference_init(&bo->reference, 1);
bo->screen = screen;
bo->handle = handle;
@@ -347,6 +357,10 @@ vc4_bo_open_handle(struct vc4_screen *screen,
bo->map = malloc(bo->size);
#endif
+ util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo);
+
+done:
+ pipe_mutex_unlock(screen->bo_handles_mutex);
return bo;
}
@@ -399,7 +413,11 @@ vc4_bo_get_dmabuf(struct vc4_bo *bo)
bo->handle);
return -1;
}
+
+ pipe_mutex_lock(bo->screen->bo_handles_mutex);
bo->private = false;
+ util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo);
+ pipe_mutex_unlock(bo->screen->bo_handles_mutex);
return fd;
}
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h
index b77506e..71a4426 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.h
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.h
@@ -25,6 +25,7 @@
#define VC4_BUFMGR_H
#include <stdint.h>
+#include "util/u_hash_table.h"
#include "util/u_inlines.h"
#include "vc4_qir.h"
@@ -87,11 +88,27 @@ vc4_bo_reference(struct vc4_bo *bo)
static inline void
vc4_bo_unreference(struct vc4_bo **bo)
{
+ struct vc4_screen *screen;
if (!*bo)
return;
- if (pipe_reference(&(*bo)->reference, NULL))
- vc4_bo_last_unreference(*bo);
+ if ((*bo)->private) {
+ /* Avoid the mutex for private BOs */
+ if (pipe_reference(&(*bo)->reference, NULL))
+ vc4_bo_last_unreference(*bo);
+ } else {
+ screen = (*bo)->screen;
+ pipe_mutex_lock(screen->bo_handles_mutex);
+
+ if (pipe_reference(&(*bo)->reference, NULL)) {
+ util_hash_table_remove(screen->bo_handles,
+ (void *)(uintptr_t)(*bo)->handle);
+ vc4_bo_last_unreference(*bo);
+ }
+
+ pipe_mutex_unlock(screen->bo_handles_mutex);
+ }
+
*bo = NULL;
}
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index c271a95..77013d9 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -143,6 +143,8 @@ struct vc4_compiled_shader {
/** bitmask of which inputs are color inputs, for flat shade handling. */
uint32_t color_inputs;
+ bool disable_early_z;
+
uint8_t num_inputs;
/* Byte offsets for the start of the vertex attributes 0-7, and the
diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c
index 5d64797..8b192da 100644
--- a/src/gallium/drivers/vc4/vc4_emit.c
+++ b/src/gallium/drivers/vc4/vc4_emit.c
@@ -71,7 +71,9 @@ vc4_emit_state(struct pipe_context *pctx)
vc4->draw_max_y = MAX2(vc4->draw_max_y, maxy);
}
- if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {
+ if (vc4->dirty & (VC4_DIRTY_RASTERIZER |
+ VC4_DIRTY_ZSA |
+ VC4_DIRTY_COMPILED_FS)) {
uint8_t ez_enable_mask_out = ~0;
/* HW-2905: If the RCL ends up doing a full-res load when
@@ -83,7 +85,7 @@ vc4_emit_state(struct pipe_context *pctx)
* was seeing bad rendering on glxgears -samples 4 even in
* that case.
*/
- if (vc4->msaa)
+ if (vc4->msaa || vc4->prog.fs->disable_early_z)
ez_enable_mask_out &= ~VC4_CONFIG_BITS_EARLY_Z;
cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm.c b/src/gallium/drivers/vc4/vc4_opt_vpm.c
index d31b673..5df798a 100644
--- a/src/gallium/drivers/vc4/vc4_opt_vpm.c
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm.c
@@ -110,11 +110,12 @@ qir_opt_vpm(struct vc4_compile *c)
* sources are independent of previous instructions
*/
if (temps == 1) {
- list_del(&inst->link);
inst->src[j] = mov->src[0];
- list_replace(&mov->link, &inst->link);
- c->defs[temp] = NULL;
- free(mov);
+
+ list_del(&inst->link);
+ list_addtail(&inst->link, &mov->link);
+ qir_remove_instruction(c, mov);
+
progress = true;
break;
}
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 5d036eb..3572cf7 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2032,6 +2032,11 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
shader->input_slots[shader->num_inputs] = *slot;
shader->num_inputs++;
}
+
+ /* Note: the temporary clone in c->s has been freed. */
+ nir_shader *orig_shader = key->shader_state->base.ir.nir;
+ if (orig_shader->info.outputs_written & (1 << FRAG_RESULT_DEPTH))
+ shader->disable_early_z = true;
} else {
shader->num_inputs = c->num_inputs;
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index 20f137a..aabe593 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -534,8 +534,8 @@ vc4_resource_from_handle(struct pipe_screen *pscreen,
struct vc4_resource *rsc = vc4_resource_setup(pscreen, tmpl);
struct pipe_resource *prsc = &rsc->base.b;
struct vc4_resource_slice *slice = &rsc->slices[0];
- uint32_t expected_stride = align(prsc->width0 / rsc->cpp,
- vc4_utile_width(rsc->cpp));
+ uint32_t expected_stride =
+ align(prsc->width0, vc4_utile_width(rsc->cpp)) * rsc->cpp;
if (!rsc)
return NULL;
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 733275a..0fb2bbc 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -30,6 +30,7 @@
#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_format.h"
+#include "util/u_hash_table.h"
#include "util/ralloc.h"
#include "vc4_screen.h"
@@ -82,7 +83,11 @@ vc4_screen_get_vendor(struct pipe_screen *pscreen)
static void
vc4_screen_destroy(struct pipe_screen *pscreen)
{
+ struct vc4_screen *screen = vc4_screen(pscreen);
+
+ util_hash_table_destroy(screen->bo_handles);
vc4_bufmgr_destroy(pscreen);
+ close(screen->fd);
ralloc_free(pscreen);
}
@@ -488,6 +493,18 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
return retval == usage;
}
+#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
+
+static unsigned handle_hash(void *key)
+{
+ return PTR_TO_UINT(key);
+}
+
+static int handle_compare(void *key1, void *key2)
+{
+ return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
+}
+
struct pipe_screen *
vc4_screen_create(int fd)
{
@@ -505,6 +522,8 @@ vc4_screen_create(int fd)
screen->fd = fd;
list_inithead(&screen->bo_cache.time_list);
+ pipe_mutex_init(screen->bo_handles_mutex);
+ screen->bo_handles = util_hash_table_create(handle_hash, handle_compare);
vc4_fence_init(screen);
diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h
index 03f76b2..281d254 100644
--- a/src/gallium/drivers/vc4/vc4_screen.h
+++ b/src/gallium/drivers/vc4/vc4_screen.h
@@ -73,6 +73,9 @@ struct vc4_screen {
uint32_t bo_count;
} bo_cache;
+ struct util_hash_table *bo_handles;
+ pipe_mutex bo_handles_mutex;
+
uint32_t bo_size;
uint32_t bo_count;
};
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index ca5812ba..396f563 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -318,17 +318,7 @@ struct pipe_blend_state
struct pipe_blend_color
{
- /**
- * Making the color array explicitly 16-byte aligned provides a hint to
- * compilers to make more efficient auto-vectorization optimizations.
- * The actual performance gains from vectorizing the blend color array are
- * fairly minimal, if any, but the alignment is necessary to work around
- * buggy vectorization in some compilers which fail to generate the correct
- * unaligned accessors resulting in a segfault. Specifically several
- * versions of the Intel compiler are known to be affected but it's likely
- * others are as well.
- */
- PIPE_ALIGN_VAR(16) float color[4];
+ float color[4];
};
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index e2cadda..57eaaaa 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -207,7 +207,7 @@ namespace {
c.getDiagnosticOpts().ShowCarets = false;
c.getInvocation().setLangDefaults(c.getLangOpts(), clang::IK_OpenCL,
#if HAVE_LLVM >= 0x0309
- llvm::Triple(triple),
+ llvm::Triple(triple), c.getPreprocessorOpts(),
#endif
clang::LangStandard::lang_opencl11);
c.createDiagnostics(
diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c
index e1afc4d..a07ecd1 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -30,6 +30,7 @@
#include <xf86drm.h>
#include <dlfcn.h>
+#include <fcntl.h>
#include "GL/mesa_glinterop.h"
#include "util/u_memory.h"
#include "util/u_inlines.h"
@@ -979,8 +980,10 @@ dri2_query_image(__DRIimage *image, int attrib, int *value)
return GL_TRUE;
case __DRI_IMAGE_ATTRIB_FD:
whandle.type= DRM_API_HANDLE_TYPE_FD;
- image->texture->screen->resource_get_handle(image->texture->screen,
- image->texture, &whandle, usage);
+ if (!image->texture->screen->resource_get_handle(image->texture->screen,
+ image->texture, &whandle, usage))
+ return GL_FALSE;
+
*value = whandle.handle;
return GL_TRUE;
case __DRI_IMAGE_ATTRIB_FORMAT:
@@ -1798,7 +1801,7 @@ dri2_init_screen(__DRIscreen * sPriv)
sPriv->driverPrivate = (void *)screen;
- if (screen->fd < 0 || (fd = dup(screen->fd)) < 0)
+ if (screen->fd < 0 || (fd = fcntl(screen->fd, F_DUPFD_CLOEXEC, 3)) < 0)
goto free_screen;
pscreen = load_pipe_screen(&screen->dev, screen->fd);
@@ -1879,7 +1882,7 @@ dri_kms_init_screen(__DRIscreen * sPriv)
sPriv->driverPrivate = (void *)screen;
- if (screen->fd < 0 || (fd = dup(screen->fd)) < 0)
+ if (screen->fd < 0 || (fd = fcntl(screen->fd, F_DUPFD_CLOEXEC, 3)) < 0)
goto free_screen;
if (pipe_loader_sw_probe_kms(&screen->dev, fd))
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index bd373d7..e2855d7 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -2031,7 +2031,7 @@ DECL_SPECIAL(DCL)
ureg_DECL_vs_input(ureg, sem.reg.idx);
assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
- tx->info->num_inputs = sem.reg.idx + 1;
+ tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
/* NOTE: preserving order in case of indirect access */
} else
if (tx->version.major >= 3) {
diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c
index 2606dbf..aff4d4c 100644
--- a/src/gallium/state_trackers/nine/surface9.c
+++ b/src/gallium/state_trackers/nine/surface9.c
@@ -405,6 +405,7 @@ NineSurface9_LockRect( struct NineSurface9 *This,
} else {
u_box_origin_2d(This->desc.Width, This->desc.Height, &box);
}
+ box.z = This->layer;
user_warn(This->desc.Format == D3DFMT_NULL);
diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c
index 1fdc638..faeeec1 100644
--- a/src/gallium/state_trackers/nine/volume9.c
+++ b/src/gallium/state_trackers/nine/volume9.c
@@ -374,7 +374,7 @@ NineVolume9_UnlockBox( struct NineVolume9 *This )
This->layer_stride_conversion,
0, 0, 0,
This->desc.Width, This->desc.Height,
- This->desc.Height);
+ This->desc.Depth);
if (!This->data)
pipe_transfer_unmap(This->pipe, transfer);
diff --git a/src/gallium/state_trackers/omx/vid_enc.c b/src/gallium/state_trackers/omx/vid_enc.c
index d70439a..c9d9ab1 100644
--- a/src/gallium/state_trackers/omx/vid_enc.c
+++ b/src/gallium/state_trackers/omx/vid_enc.c
@@ -874,8 +874,8 @@ static void enc_ReleaseTasks(struct list_head *head)
{
struct encode_task *i, *next;
- if (!head)
- return;
+ if (!head || !head->next)
+ return;
LIST_FOR_EACH_ENTRY_SAFE(i, next, head, list) {
pipe_resource_reference(&i->bitstream, NULL);
diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c
index 8a6a397..1ad0d71 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -584,24 +584,26 @@ vlVaCreateSurfaces2(VADriverContextP ctx, unsigned int format,
memset(&templat, 0, sizeof(templat));
+ templat.buffer_format = pscreen->get_video_param(
+ pscreen,
+ PIPE_VIDEO_PROFILE_UNKNOWN,
+ PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
+ PIPE_VIDEO_CAP_PREFERED_FORMAT
+ );
+ templat.interlaced = pscreen->get_video_param(
+ pscreen,
+ PIPE_VIDEO_PROFILE_UNKNOWN,
+ PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
+ PIPE_VIDEO_CAP_PREFERS_INTERLACED
+ );
+
if (expected_fourcc) {
- templat.buffer_format = VaFourccToPipeFormat(expected_fourcc);
- templat.interlaced = 0;
- } else {
- templat.buffer_format = pscreen->get_video_param
- (
- pscreen,
- PIPE_VIDEO_PROFILE_UNKNOWN,
- PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
- PIPE_VIDEO_CAP_PREFERED_FORMAT
- );
- templat.interlaced = pscreen->get_video_param
- (
- pscreen,
- PIPE_VIDEO_PROFILE_UNKNOWN,
- PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
- PIPE_VIDEO_CAP_PREFERS_INTERLACED
- );
+ enum pipe_format expected_format = VaFourccToPipeFormat(expected_fourcc);
+
+ if (expected_format != templat.buffer_format || memory_attibute)
+ templat.interlaced = 0;
+
+ templat.buffer_format = expected_format;
}
templat.chroma_format = ChromaToPipe(format);
diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index c644cc8..b278288 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -82,7 +82,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
res_tmpl.depth0 = 1;
res_tmpl.array_size = 1;
res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET |
- PIPE_BIND_LINEAR | PIPE_BIND_SHARED;
+ PIPE_BIND_SHARED;
res_tmpl.usage = PIPE_USAGE_DEFAULT;
pipe_mutex_lock(dev->mutex);
diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c
index e091b083..c97c0ff 100644
--- a/src/gallium/state_trackers/xa/xa_tracker.c
+++ b/src/gallium/state_trackers/xa/xa_tracker.c
@@ -27,6 +27,7 @@
*/
#include <unistd.h>
+#include <fcntl.h>
#include "xa_tracker.h"
#include "xa_priv.h"
#include "pipe/p_state.h"
@@ -157,7 +158,7 @@ xa_tracker_create(int drm_fd)
if (!xa)
return NULL;
- if (drm_fd < 0 || (fd = dup(drm_fd)) < 0)
+ if (drm_fd < 0 || (fd = fcntl(drm_fd, F_DUPFD_CLOEXEC, 3)) < 0)
goto out_no_fd;
if (pipe_loader_drm_probe_fd(&xa->dev, fd))
diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am
index f42dd25..06ade45 100644
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -1,5 +1,11 @@
include $(top_srcdir)/src/gallium/Automake.inc
+if HAVE_ANDROID
+if HAVE_SHARED_GLAPI
+SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
+endif
+endif
+
AM_CFLAGS = \
-I$(top_srcdir)/src/mapi \
-I$(top_srcdir)/src/mesa \
@@ -49,6 +55,7 @@ gallium_dri_la_LIBADD = \
$(top_builddir)/src/gallium/drivers/noop/libnoop.la \
$(top_builddir)/src/gallium/drivers/rbug/librbug.la \
$(top_builddir)/src/gallium/drivers/trace/libtrace.la \
+ $(SHARED_GLAPI_LIB) \
$(SELINUX_LIBS) \
$(EXPAT_LIBS) \
$(LIBDRM_LIBS) \
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 0ce010e..3f6e280 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -253,6 +253,20 @@ static int compute_level(struct amdgpu_winsys *ws,
return 0;
}
+static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
+{
+ unsigned index, tileb;
+
+ tileb = 8 * 8 * surf->bpe;
+ tileb = MIN2(surf->tile_split, tileb);
+
+ for (index = 0; tileb > 64; index++)
+ tileb >>= 1;
+
+ assert(index < 16);
+ return index;
+}
+
static int amdgpu_surface_init(struct radeon_winsys *rws,
struct radeon_surf *surf)
{
@@ -345,7 +359,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
!(surf->flags & RADEON_SURF_SCANOUT) &&
!(surf->flags & RADEON_SURF_DISABLE_DCC) &&
- !compressed && AddrDccIn.numSamples <= 1;
+ !compressed && AddrDccIn.numSamples <= 1 &&
+ surf->last_level == 0;
/* This disables incorrect calculations (hacks) in addrlib. */
AddrSurfInfoIn.flags.noStencil = 1;
@@ -380,6 +395,9 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
else
AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
+
+ /* Addrlib doesn't set this if tileIndex is forced like above. */
+ AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
}
surf->bo_size = 0;
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index 598ffcb..52b873f 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -1,5 +1,6 @@
#include <sys/stat.h>
#include <unistd.h>
+#include <fcntl.h>
#include "pipe/p_context.h"
#include "pipe/p_state.h"
#include "util/u_format.h"
@@ -91,7 +92,7 @@ nouveau_drm_screen_create(int fd)
* nouveau_device_wrap does not close the fd in case of a device
* creation error.
*/
- dupfd = dup(fd);
+ dupfd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
ret = nouveau_drm_new(dupfd, &drm);
if (ret)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 5c85c8f..7619873 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -44,6 +44,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
+#include <fcntl.h>
#include <radeon_surface.h>
#ifndef RADEON_INFO_ACTIVE_CU_COUNT
@@ -790,7 +791,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
return NULL;
}
- ws->fd = dup(fd);
+ ws->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
if (!do_winsys_init(ws))
goto fail1;
diff --git a/src/gallium/winsys/svga/drm/vmw_screen.c b/src/gallium/winsys/svga/drm/vmw_screen.c
index 7fcb6d2..d0bfcd7 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen.c
@@ -31,9 +31,15 @@
#include "util/u_memory.h"
#include "pipe/p_compiler.h"
#include "util/u_hash_table.h"
-#include <sys/types.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
+#include <sys/sysmacros.h>
+#endif
#include <sys/stat.h>
#include <unistd.h>
+#include <fcntl.h>
static struct util_hash_table *dev_hash = NULL;
@@ -83,7 +89,7 @@ vmw_winsys_create( int fd )
vws->device = stat_buf.st_rdev;
vws->open_count = 1;
- vws->ioctl.drm_fd = dup(fd);
+ vws->ioctl.drm_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
vws->base.have_gb_dma = TRUE;
vws->base.need_to_rebind_resources = FALSE;
diff --git a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
index 21ac0d7..07eca99 100644
--- a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
@@ -211,7 +211,29 @@ kms_sw_displaytarget_map(struct sw_winsys *ws,
}
static struct kms_sw_displaytarget *
-kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd)
+kms_sw_displaytarget_find_and_ref(struct kms_sw_winsys *kms_sw,
+ unsigned int kms_handle)
+{
+ struct kms_sw_displaytarget *kms_sw_dt;
+
+ LIST_FOR_EACH_ENTRY(kms_sw_dt, &kms_sw->bo_list, link) {
+ if (kms_sw_dt->handle == kms_handle) {
+ kms_sw_dt->ref_count++;
+
+ DEBUG_PRINT("KMS-DEBUG: imported buffer %u (size %u)\n",
+ kms_sw_dt->handle, kms_sw_dt->size);
+
+ return kms_sw_dt;
+ }
+ }
+
+ return NULL;
+}
+
+static struct kms_sw_displaytarget *
+kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd,
+ unsigned width, unsigned height,
+ unsigned stride)
{
uint32_t handle = -1;
struct kms_sw_displaytarget * kms_sw_dt;
@@ -222,6 +244,10 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd)
if (ret)
return NULL;
+ kms_sw_dt = kms_sw_displaytarget_find_and_ref(kms_sw, handle);
+ if (kms_sw_dt)
+ return kms_sw_dt;
+
kms_sw_dt = CALLOC_STRUCT(kms_sw_displaytarget);
if (!kms_sw_dt)
return NULL;
@@ -229,6 +255,9 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd)
kms_sw_dt->ref_count = 1;
kms_sw_dt->handle = handle;
kms_sw_dt->size = lseek(fd, 0, SEEK_END);
+ kms_sw_dt->width = width;
+ kms_sw_dt->height = height;
+ kms_sw_dt->stride = stride;
if (kms_sw_dt->size == (off_t)-1) {
FREE(kms_sw_dt);
@@ -274,25 +303,18 @@ kms_sw_displaytarget_from_handle(struct sw_winsys *ws,
switch(whandle->type) {
case DRM_API_HANDLE_TYPE_FD:
- kms_sw_dt = kms_sw_displaytarget_add_from_prime(kms_sw, whandle->handle);
- if (kms_sw_dt) {
- kms_sw_dt->ref_count++;
- kms_sw_dt->width = templ->width0;
- kms_sw_dt->height = templ->height0;
- kms_sw_dt->stride = whandle->stride;
+ kms_sw_dt = kms_sw_displaytarget_add_from_prime(kms_sw, whandle->handle,
+ templ->width0,
+ templ->height0,
+ whandle->stride);
+ if (kms_sw_dt)
*stride = kms_sw_dt->stride;
- }
return (struct sw_displaytarget *)kms_sw_dt;
case DRM_API_HANDLE_TYPE_KMS:
- LIST_FOR_EACH_ENTRY(kms_sw_dt, &kms_sw->bo_list, link) {
- if (kms_sw_dt->handle == whandle->handle) {
- kms_sw_dt->ref_count++;
-
- DEBUG_PRINT("KMS-DEBUG: imported buffer %u (size %u)\n", kms_sw_dt->handle, kms_sw_dt->size);
-
- *stride = kms_sw_dt->stride;
- return (struct sw_displaytarget *)kms_sw_dt;
- }
+ kms_sw_dt = kms_sw_displaytarget_find_and_ref(kms_sw, whandle->handle);
+ if (kms_sw_dt) {
+ *stride = kms_sw_dt->stride;
+ return (struct sw_displaytarget *)kms_sw_dt;
}
/* fallthrough */
default:
diff --git a/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c b/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c
index c5434ad..23fe8e7 100644
--- a/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c
+++ b/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c
@@ -22,6 +22,7 @@
*/
#include <unistd.h>
+#include <fcntl.h>
#include "vc4_drm_public.h"
@@ -30,5 +31,5 @@
struct pipe_screen *
vc4_drm_screen_create(int fd)
{
- return vc4_screen_create(dup(fd));
+ return vc4_screen_create(fcntl(fd, F_DUPFD_CLOEXEC, 3));
}
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
index dc203cd..f866b24 100644
--- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
@@ -903,7 +903,7 @@ virgl_drm_screen_create(int fd)
virgl_screen(pscreen)->refcnt++;
} else {
struct virgl_winsys *vws;
- int dup_fd = dup(fd);
+ int dup_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
vws = virgl_drm_winsys_create(dup_fd);
diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index c3626e3..2330f1b 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -589,7 +589,8 @@ gbm_dri_bo_get_fd(struct gbm_bo *_bo)
if (bo->image == NULL)
return -1;
- dri->image->queryImage(bo->image, __DRI_IMAGE_ATTRIB_FD, &fd);
+ if (!dri->image->queryImage(bo->image, __DRI_IMAGE_ATTRIB_FD, &fd))
+ return -1;
return fd;
}
@@ -941,7 +942,7 @@ gbm_dri_bo_map(struct gbm_bo *_bo,
return *map_data;
}
- if (!dri->image || dri->image->base.version < 12) {
+ if (!dri->image || dri->image->base.version < 12 || !dri->image->mapImage) {
errno = ENOSYS;
return NULL;
}
@@ -972,7 +973,8 @@ gbm_dri_bo_unmap(struct gbm_bo *_bo, void *map_data)
return;
}
- if (!dri->context || !dri->image || dri->image->base.version < 12)
+ if (!dri->context || !dri->image ||
+ dri->image->base.version < 12 || !dri->image->unmapImage)
return;
dri->image->unmapImage(dri->context, bo->image, map_data);
diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c
index 0f4657a..c3a2ec33 100644
--- a/src/gbm/main/gbm.c
+++ b/src/gbm/main/gbm.c
@@ -31,7 +31,12 @@
#include <string.h>
#include <stdint.h>
-#include <sys/types.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
+#include <sys/sysmacros.h>
+#endif
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
@@ -237,7 +242,8 @@ gbm_bo_get_handle(struct gbm_bo *bo)
* descriptor.
* \param bo The buffer object
- * \return Returns a file descriptor referring to the underlying buffer
+ * \return Returns a file descriptor referring to the underlying buffer or -1
+ * if an error occurs.
*/
GBM_EXPORT int
gbm_bo_get_fd(struct gbm_bo *bo)
diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index 90d7bba..51b6b1c 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -132,6 +132,16 @@ glx_dri3_get_dri_context(struct loader_dri3_drawable *draw)
return (gc != &dummyContext) ? dri3Ctx->driContext : NULL;
}
+static __DRIscreen *
+glx_dri3_get_dri_screen(struct loader_dri3_drawable *draw)
+{
+ struct glx_context *gc = __glXGetCurrentContext();
+ struct dri3_context *pcp = (struct dri3_context *) gc;
+ struct dri3_screen *psc = (struct dri3_screen *) pcp->base.psc;
+
+ return (gc != &dummyContext && psc) ? psc->driScreen : NULL;
+}
+
static void
glx_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
{
@@ -169,6 +179,7 @@ static struct loader_dri3_vtable glx_dri3_vtable = {
.set_drawable_size = glx_dri3_set_drawable_size,
.in_current_context = glx_dri3_in_current_context,
.get_dri_context = glx_dri3_get_dri_context,
+ .get_dri_screen = glx_dri3_get_dri_screen,
.flush_drawable = glx_dri3_flush_drawable,
.show_fps = glx_dri3_show_fps,
};
diff --git a/src/glx/glx_error.c b/src/glx/glx_error.c
index b3860db..653cbeb 100644
--- a/src/glx/glx_error.c
+++ b/src/glx/glx_error.c
@@ -39,11 +39,9 @@ __glXSendError(Display * dpy, int_fast8_t errorCode, uint_fast32_t resourceID,
uint_fast16_t minorCode, bool coreX11error)
{
struct glx_display *glx_dpy = __glXInitialize(dpy);
- struct glx_context *gc = __glXGetCurrentContext();
xError error;
assert(glx_dpy);
- assert(gc);
LockDisplay(dpy);
@@ -59,7 +57,7 @@ __glXSendError(Display * dpy, int_fast8_t errorCode, uint_fast32_t resourceID,
error.sequenceNumber = dpy->request;
error.resourceID = resourceID;
error.minorCode = minorCode;
- error.majorCode = gc ? gc->majorOpcode : 0;
+ error.majorCode = glx_dpy->majorOpcode;
_XError(dpy, &error);
diff --git a/src/glx/glx_pbuffer.c b/src/glx/glx_pbuffer.c
index a0c1e3d..24c073c 100644
--- a/src/glx/glx_pbuffer.c
+++ b/src/glx/glx_pbuffer.c
@@ -328,7 +328,7 @@ GetDrawableAttribute(Display * dpy, GLXDrawable drawable,
* the calling thread's current context a GLXBadDrawable error is
* generated."
*/
- if (pdraw == NULL || gc == NULL || gc->currentDpy != dpy ||
+ if (pdraw == NULL || gc == &dummyContext || gc->currentDpy != dpy ||
(gc->currentDrawable != drawable &&
gc->currentReadable != drawable)) {
__glXSendError(dpy, GLXBadDrawable, drawable,
diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
index 3856032..5ff4dd3 100644
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -524,7 +524,7 @@ glXWaitGL(void)
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc && gc->vtable->wait_gl)
+ if (gc != &dummyContext && gc->vtable->wait_gl)
gc->vtable->wait_gl(gc);
}
@@ -537,7 +537,7 @@ glXWaitX(void)
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc && gc->vtable->wait_x)
+ if (gc != &dummyContext && gc->vtable->wait_x)
gc->vtable->wait_x(gc);
}
@@ -546,7 +546,7 @@ glXUseXFont(Font font, int first, int count, int listBase)
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc && gc->vtable->use_x_font)
+ if (gc != &dummyContext && gc->vtable->use_x_font)
gc->vtable->use_x_font(gc, font, first, count, listBase);
}
@@ -838,7 +838,7 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable)
__GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable);
if (pdraw != NULL) {
- Bool flush = gc && drawable == gc->currentDrawable;
+ Bool flush = gc != &dummyContext && drawable == gc->currentDrawable;
(*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0, flush);
return;
@@ -855,7 +855,7 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable)
** The calling thread may or may not have a current context. If it
** does, send the context tag so the server can do a flush.
*/
- if ((gc != NULL) && (dpy == gc->currentDpy) &&
+ if ((gc != &dummyContext) && (dpy == gc->currentDpy) &&
((drawable == gc->currentDrawable)
|| (drawable == gc->currentReadable))) {
tag = gc->currentContextTag;
@@ -1388,7 +1388,7 @@ _GLX_PUBLIC Display *
glXGetCurrentDisplay(void)
{
struct glx_context *gc = __glXGetCurrentContext();
- if (NULL == gc)
+ if (gc == &dummyContext)
return NULL;
return gc->currentDpy;
}
@@ -1630,7 +1630,6 @@ glXCreateNewContext(Display * dpy, GLXFBConfig fbconfig,
int renderType, GLXContext shareList, Bool allowDirect)
{
struct glx_config *config = (struct glx_config *) fbconfig;
- int screen = DefaultScreen(dpy);
struct glx_config **config_list;
int list_size;
unsigned i;
@@ -1641,7 +1640,7 @@ glXCreateNewContext(Display * dpy, GLXFBConfig fbconfig,
}
config_list = (struct glx_config **)
- glXGetFBConfigs(dpy, screen, &list_size);
+ glXGetFBConfigs(dpy, config->screen, &list_size);
for (i = 0; i < list_size; i++) {
if (config_list[i] == config)
@@ -1751,7 +1750,7 @@ __glXSwapIntervalSGI(int interval)
CARD32 *interval_ptr;
CARD8 opcode;
- if (gc == NULL) {
+ if (gc == &dummyContext) {
return GLX_BAD_CONTEXT;
}
@@ -1805,7 +1804,7 @@ __glXSwapIntervalMESA(unsigned int interval)
#ifdef GLX_DIRECT_RENDERING
struct glx_context *gc = __glXGetCurrentContext();
- if (gc != NULL && gc->isDirect) {
+ if (gc != &dummyContext && gc->isDirect) {
struct glx_screen *psc;
psc = GetGLXScreenConfigs( gc->currentDpy, gc->screen);
@@ -1827,7 +1826,7 @@ __glXGetSwapIntervalMESA(void)
#ifdef GLX_DIRECT_RENDERING
struct glx_context *gc = __glXGetCurrentContext();
- if (gc != NULL && gc->isDirect) {
+ if (gc != &dummyContext && gc->isDirect) {
struct glx_screen *psc;
psc = GetGLXScreenConfigs( gc->currentDpy, gc->screen);
@@ -1857,7 +1856,7 @@ __glXGetVideoSyncSGI(unsigned int *count)
__GLXDRIdrawable *pdraw;
#endif
- if (!gc)
+ if (gc == &dummyContext)
return GLX_BAD_CONTEXT;
#ifdef GLX_DIRECT_RENDERING
@@ -1899,7 +1898,7 @@ __glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count)
if (divisor <= 0 || remainder < 0)
return GLX_BAD_VALUE;
- if (!gc)
+ if (gc == &dummyContext)
return GLX_BAD_CONTEXT;
#ifdef GLX_DIRECT_RENDERING
@@ -2212,7 +2211,7 @@ __glXSwapBuffersMscOML(Display * dpy, GLXDrawable drawable,
struct glx_screen *psc = pdraw ? pdraw->psc : NULL;
#endif
- if (!gc) /* no GLX for this */
+ if (gc == &dummyContext) /* no GLX for this */
return -1;
#ifdef GLX_DIRECT_RENDERING
@@ -2392,7 +2391,7 @@ __glXCopySubBufferMESA(Display * dpy, GLXDrawable drawable,
** does, send the context tag so the server can do a flush.
*/
gc = __glXGetCurrentContext();
- if ((gc != NULL) && (dpy == gc->currentDpy) &&
+ if ((gc != &dummyContext) && (dpy == gc->currentDpy) &&
((drawable == gc->currentDrawable) ||
(drawable == gc->currentReadable))) {
tag = gc->currentContextTag;
@@ -2431,7 +2430,7 @@ __glXBindTexImageEXT(Display * dpy,
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc == NULL || gc->vtable->bind_tex_image == NULL)
+ if (gc == &dummyContext || gc->vtable->bind_tex_image == NULL)
return;
gc->vtable->bind_tex_image(dpy, drawable, buffer, attrib_list);
@@ -2442,7 +2441,7 @@ __glXReleaseTexImageEXT(Display * dpy, GLXDrawable drawable, int buffer)
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc == NULL || gc->vtable->release_tex_image == NULL)
+ if (gc == &dummyContext || gc->vtable->release_tex_image == NULL)
return;
gc->vtable->release_tex_image(dpy, drawable, buffer);
@@ -2718,7 +2717,7 @@ __glXGetUST(int64_t * ust)
#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
-int
+PUBLIC int
MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context,
struct mesa_glinterop_device_info *out)
{
@@ -2742,7 +2741,7 @@ MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context,
return ret;
}
-int
+PUBLIC int
MesaGLInteropGLXExportObject(Display *dpy, GLXContext context,
struct mesa_glinterop_export_in *in,
struct mesa_glinterop_export_out *out)
diff --git a/src/glx/glxglvnd.c b/src/glx/glxglvnd.c
index b7252a7..098304d 100644
--- a/src/glx/glxglvnd.c
+++ b/src/glx/glxglvnd.c
@@ -19,13 +19,13 @@ static void *__glXGLVNDGetProcAddress(const GLubyte *procName)
static unsigned FindGLXFunction(const GLubyte *name)
{
- unsigned first = 0;
- unsigned last = DI_FUNCTION_COUNT - 1;
+ int first = 0;
+ int last = DI_FUNCTION_COUNT - 1;
while (first <= last) {
- unsigned middle = (first + last) / 2;
- int comp = strcmp((const char *) name,
- __glXDispatchTableStrings[middle]);
+ int middle = (first + last) / 2;
+ int comp = strcmp(__glXDispatchTableStrings[middle],
+ (const char *) name);
if (comp < 0)
first = middle + 1;
diff --git a/src/glx/query_renderer.c b/src/glx/query_renderer.c
index 9108ec2..4debf06 100644
--- a/src/glx/query_renderer.c
+++ b/src/glx/query_renderer.c
@@ -106,7 +106,7 @@ glXQueryCurrentRendererIntegerMESA(int attribute, unsigned int *value)
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc == NULL)
+ if (gc == &dummyContext)
return False;
return __glXQueryRendererInteger(gc->psc, attribute, value);
@@ -166,7 +166,7 @@ glXQueryCurrentRendererStringMESA(int attribute)
{
struct glx_context *gc = __glXGetCurrentContext();
- if (gc == NULL)
+ if (gc == &dummyContext)
return False;
return __glXQueryRendererString(gc->psc, attribute);
diff --git a/src/glx/tests/fake_glx_screen.cpp b/src/glx/tests/fake_glx_screen.cpp
index db20749..801f54a 100644
--- a/src/glx/tests/fake_glx_screen.cpp
+++ b/src/glx/tests/fake_glx_screen.cpp
@@ -75,7 +75,20 @@ indirect_create_context_attribs(struct glx_screen *base,
return indirect_create_context(base, config_base, shareList, 0);
}
-__thread void *__glX_tls_Context = NULL;
+/* This is necessary so that we don't have to link with glxcurrent.c
+ * which would require us to link with X libraries and what not.
+ */
+GLubyte dummyBuffer[__GLX_BUFFER_LIMIT_SIZE];
+struct glx_context_vtable dummyVtable;
+struct glx_context dummyContext = {
+ &dummyBuffer[0],
+ &dummyBuffer[0],
+ &dummyBuffer[0],
+ &dummyBuffer[__GLX_BUFFER_LIMIT_SIZE],
+ sizeof(dummyBuffer),
+ &dummyVtable
+};
+__thread void *__glX_tls_Context = &dummyContext;
#if !defined(GLX_USE_TLS)
extern "C" struct glx_context *
diff --git a/src/intel/genxml/Makefile.am b/src/intel/genxml/Makefile.am
index d6c1c5b..e8bd84c 100644
--- a/src/intel/genxml/Makefile.am
+++ b/src/intel/genxml/Makefile.am
@@ -35,6 +35,7 @@ $(BUILT_SOURCES): gen_pack_header.py
CLEANFILES = $(BUILT_SOURCES)
EXTRA_DIST = \
+ $(GENXML_GENERATED_FILES) \
gen6.xml \
gen7.xml \
gen75.xml \
diff --git a/src/intel/genxml/gen6.xml b/src/intel/genxml/gen6.xml
index 44e2804..8bc28a9 100644
--- a/src/intel/genxml/gen6.xml
+++ b/src/intel/genxml/gen6.xml
@@ -79,7 +79,7 @@
</group>
</struct>
- <struct name="BLEND_STATE" length="2">
+ <struct name="BLEND_STATE_ENTRY" length="2">
<field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/>
<field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/>
<field name="Alpha Blend Function" start="26" end="28" type="uint">
@@ -169,6 +169,12 @@
<field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/>
</struct>
+ <struct name="BLEND_STATE" length="16">
+ <group count="8" start="0" size="64">
+ <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/>
+ </group>
+ </struct>
+
<struct name="CC_VIEWPORT" length="2">
<field name="Minimum Depth" start="0" end="31" type="float"/>
<field name="Maximum Depth" start="32" end="63" type="float"/>
@@ -781,6 +787,7 @@
<field name="CLIP Enable" start="95" end="95" type="bool"/>
<field name="API Mode" start="94" end="94" type="uint">
<value name="APIMODE_OGL" value="0"/>
+ <value name="APIMODE_D3D" value="1"/>
</field>
<field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/>
<field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/>
diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml
index 2bbfcb7..cbeb2e1 100644
--- a/src/intel/genxml/gen7.xml
+++ b/src/intel/genxml/gen7.xml
@@ -102,7 +102,7 @@
</group>
</struct>
- <struct name="BLEND_STATE" length="2">
+ <struct name="BLEND_STATE_ENTRY" length="2">
<field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/>
<field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/>
<field name="Alpha Blend Function" start="26" end="28" type="uint">
@@ -192,6 +192,12 @@
<field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/>
</struct>
+ <struct name="BLEND_STATE" length="16">
+ <group count="8" start="0" size="64">
+ <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/>
+ </group>
+ </struct>
+
<struct name="CC_VIEWPORT" length="2">
<field name="Minimum Depth" start="0" end="31" type="float"/>
<field name="Maximum Depth" start="32" end="63" type="float"/>
@@ -953,6 +959,7 @@
<field name="Clip Enable" start="95" end="95" type="bool"/>
<field name="API Mode" start="94" end="94" type="uint">
<value name="APIMODE_OGL" value="0"/>
+ <value name="APIMODE_D3D" value="1"/>
</field>
<field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/>
<field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/>
diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml
index 9ab432c..40f3c31 100644
--- a/src/intel/genxml/gen75.xml
+++ b/src/intel/genxml/gen75.xml
@@ -112,7 +112,7 @@
</group>
</struct>
- <struct name="BLEND_STATE" length="2">
+ <struct name="BLEND_STATE_ENTRY" length="2">
<field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/>
<field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/>
<field name="Alpha Blend Function" start="26" end="28" type="uint">
@@ -202,6 +202,12 @@
<field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/>
</struct>
+ <struct name="BLEND_STATE" length="16">
+ <group count="8" start="0" size="64">
+ <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/>
+ </group>
+ </struct>
+
<struct name="CC_VIEWPORT" length="2">
<field name="Minimum Depth" start="0" end="31" type="float"/>
<field name="Maximum Depth" start="32" end="63" type="float"/>
@@ -1062,6 +1068,7 @@
<field name="Clip Enable" start="95" end="95" type="bool"/>
<field name="API Mode" start="94" end="94" type="uint">
<value name="APIMODE_OGL" value="0"/>
+ <value name="APIMODE_D3D" value="1"/>
</field>
<field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/>
<field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/>
diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml
index 80d40fb..e3b0cdb 100644
--- a/src/intel/genxml/gen8.xml
+++ b/src/intel/genxml/gen8.xml
@@ -1115,15 +1115,16 @@
<field name="User Clip Distance Cull Test Enable Bitmask" start="32" end="39" type="uint"/>
<field name="Clip Enable" start="95" end="95" type="bool"/>
<field name="API Mode" start="94" end="94" type="uint">
- <value name="OGL" value="0"/>
+ <value name="APIMODE_OGL" value="0"/>
+ <value name="APIMODE_D3D" value="1"/>
</field>
<field name="Viewport XY Clip Test Enable" start="92" end="92" type="bool"/>
<field name="Guardband Clip Test Enable" start="90" end="90" type="bool"/>
<field name="User Clip Distance Clip Test Enable Bitmask" start="80" end="87" type="uint"/>
<field name="Clip Mode" start="77" end="79" type="uint">
- <value name="NORMAL" value="0"/>
- <value name="REJECT_ALL" value="3"/>
- <value name="ACCEPT_ALL" value="4"/>
+ <value name="CLIPMODE_NORMAL" value="0"/>
+ <value name="CLIPMODE_REJECT_ALL" value="3"/>
+ <value name="CLIPMODE_ACCEPT_ALL" value="4"/>
</field>
<field name="Perspective Divide Disable" start="73" end="73" type="bool"/>
<field name="Non-Perspective Barycentric Enable" start="72" end="72" type="bool"/>
@@ -2035,7 +2036,7 @@
<field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="30"/>
<field name="DWord Length" start="0" end="7" type="uint" default="3"/>
<field name="SO Function Enable" start="63" end="63" type="uint"/>
- <field name="API Rendering Disable" start="62" end="62" type="uint"/>
+ <field name="Rendering Disable" start="62" end="62" type="uint"/>
<field name="Render Stream Select" start="59" end="60" type="uint"/>
<field name="Reorder Mode" start="58" end="58" type="uint">
<value name="LEADING" value="0"/>
diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index 94b7d28..4333b89 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -1167,15 +1167,16 @@
<field name="User Clip Distance Cull Test Enable Bitmask" start="32" end="39" type="uint"/>
<field name="Clip Enable" start="95" end="95" type="bool"/>
<field name="API Mode" start="94" end="94" type="uint">
- <value name="OGL" value="0"/>
+ <value name="APIMODE_OGL" value="0"/>
+ <value name="APIMODE_D3D" value="1"/>
</field>
<field name="Viewport XY Clip Test Enable" start="92" end="92" type="bool"/>
<field name="Guardband Clip Test Enable" start="90" end="90" type="bool"/>
<field name="User Clip Distance Clip Test Enable Bitmask" start="80" end="87" type="uint"/>
<field name="Clip Mode" start="77" end="79" type="uint">
- <value name="NORMAL" value="0"/>
- <value name="REJECT_ALL" value="3"/>
- <value name="ACCEPT_ALL" value="4"/>
+ <value name="CLIPMODE_NORMAL" value="0"/>
+ <value name="CLIPMODE_REJECT_ALL" value="3"/>
+ <value name="CLIPMODE_ACCEPT_ALL" value="4"/>
</field>
<field name="Perspective Divide Disable" start="73" end="73" type="bool"/>
<field name="Non-Perspective Barycentric Enable" start="72" end="72" type="bool"/>
@@ -2238,7 +2239,7 @@
<field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="30"/>
<field name="DWord Length" start="0" end="7" type="uint" default="3"/>
<field name="SO Function Enable" start="63" end="63" type="uint"/>
- <field name="API Rendering Disable" start="62" end="62" type="uint"/>
+ <field name="Rendering Disable" start="62" end="62" type="uint"/>
<field name="Render Stream Select" start="59" end="60" type="uint"/>
<field name="Reorder Mode" start="58" end="58" type="uint">
<value name="LEADING" value="0"/>
diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am
index 1fd6683..8e03ee6 100644
--- a/src/intel/isl/Makefile.am
+++ b/src/intel/isl/Makefile.am
@@ -46,23 +46,25 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/include
-libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init
+AM_CFLAGS = \
+ $(VISIBILITY_CFLAGS) \
+ -Wno-override-init
libisl_la_LIBADD = $(ISL_GEN_LIBS)
libisl_la_SOURCES = $(ISL_FILES) $(ISL_GENERATED_FILES)
libisl_gen7_la_SOURCES = $(ISL_GEN7_FILES)
-libisl_gen7_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=70
+libisl_gen7_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=70
libisl_gen75_la_SOURCES = $(ISL_GEN75_FILES)
-libisl_gen75_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=75
+libisl_gen75_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=75
libisl_gen8_la_SOURCES = $(ISL_GEN8_FILES)
-libisl_gen8_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=80
+libisl_gen8_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=80
libisl_gen9_la_SOURCES = $(ISL_GEN9_FILES)
-libisl_gen9_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=90
+libisl_gen9_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=90
BUILT_SOURCES = $(ISL_GENERATED_FILES)
diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 77b570d..0bdfa9d 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -490,27 +490,27 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
case ISL_MSAA_LAYOUT_ARRAY:
assert(info->depth == 1);
- assert(info->array_len == 1);
+ assert(info->levels == 1);
assert(!isl_format_is_compressed(info->format));
*phys_level0_sa = (struct isl_extent4d) {
.w = info->width,
.h = info->height,
.d = 1,
- .a = info->samples,
+ .a = info->array_len * info->samples,
};
break;
case ISL_MSAA_LAYOUT_INTERLEAVED:
assert(info->depth == 1);
- assert(info->array_len == 1);
+ assert(info->levels == 1);
assert(!isl_format_is_compressed(info->format));
*phys_level0_sa = (struct isl_extent4d) {
.w = info->width,
.h = info->height,
.d = 1,
- .a = 1,
+ .a = info->array_len,
};
isl_msaa_interleaved_scale_px_to_sa(info->samples,
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index ef86228..64aced8 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -989,7 +989,7 @@ isl_has_matching_typed_storage_image_format(const struct brw_device_info *devinf
static inline bool
isl_tiling_is_any_y(enum isl_tiling tiling)
{
- return (1u << tiling) & ISL_TILING_ANY_MASK;
+ return (1u << tiling) & ISL_TILING_ANY_Y_MASK;
}
static inline bool
diff --git a/src/intel/isl/isl_gen6.c b/src/intel/isl/isl_gen6.c
index 24c3939..cc246f5 100644
--- a/src/intel/isl/isl_gen6.c
+++ b/src/intel/isl/isl_gen6.c
@@ -37,7 +37,7 @@ gen6_choose_msaa_layout(const struct isl_device *dev,
if (info->samples == 1) {
*msaa_layout = ISL_MSAA_LAYOUT_NONE;
- return false;
+ return true;
}
/* From the Sandybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Surface
diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c
index b2317d8..1c985b6 100644
--- a/src/intel/isl/isl_surface_state.c
+++ b/src/intel/isl/isl_surface_state.c
@@ -430,8 +430,15 @@ isl_genX(buffer_fill_state_s)(void *state,
uint32_t num_elements = info->size / info->stride;
if (GEN_GEN >= 7) {
+ /* From the IVB PRM, SURFACE_STATE::Height,
+ *
+ * For typed buffer and structured buffer surfaces, the number
+ * of entries in the buffer ranges from 1 to 2^27. For raw buffer
+ * surfaces, the number of entries in the buffer is the number of bytes
+ * which can range from 1 to 2^30.
+ */
if (info->format == ISL_FORMAT_RAW) {
- assert(num_elements <= (1ull << 31));
+ assert(num_elements <= (1ull << 30));
assert((num_elements & 3) == 0);
} else {
assert(num_elements <= (1ull << 27));
diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am
index 0e521cf..6b1015a 100644
--- a/src/intel/vulkan/Makefile.am
+++ b/src/intel/vulkan/Makefile.am
@@ -61,7 +61,9 @@ AM_CPPFLAGS = \
-I$(top_builddir)/src/intel \
-I$(top_srcdir)/src/intel
-AM_CFLAGS = -Wno-override-init -msse2
+AM_CFLAGS = \
+ $(VISIBILITY_CFLAGS) \
+ -Wno-override-init -msse2
libanv_gen7_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=70
libanv_gen7_la_SOURCES = $(GEN7_FILES)
@@ -159,6 +161,7 @@ libvulkan_intel_la_LDFLAGS = \
-module \
-no-undefined \
-avoid-version \
+ $(BSYMBOLIC) \
$(GC_SECTIONS) \
$(LD_NO_UNDEFINED)
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 20d3af1..ca78c09 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -359,7 +359,7 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
switch (cmd_buffer->device->info.gen) {
case 7:
if (cmd_buffer->device->info.is_haswell)
- return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
+ return gen75_cmd_buffer_emit_state_base_address(cmd_buffer);
else
return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
case 8:
@@ -741,20 +741,26 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
{
struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
struct anv_subpass *subpass = cmd_buffer->state.subpass;
- struct anv_pipeline_bind_map *map;
+ struct anv_pipeline *pipeline;
uint32_t bias, state_offset;
switch (stage) {
case MESA_SHADER_COMPUTE:
- map = &cmd_buffer->state.compute_pipeline->bindings[stage];
+ pipeline = cmd_buffer->state.compute_pipeline;
bias = 1;
break;
default:
- map = &cmd_buffer->state.pipeline->bindings[stage];
+ pipeline = cmd_buffer->state.pipeline;
bias = 0;
break;
}
+ if (!anv_pipeline_has_stage(pipeline, stage)) {
+ *bt_state = (struct anv_state) { 0, };
+ return VK_SUCCESS;
+ }
+
+ struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
if (bias + map->surface_count == 0) {
*bt_state = (struct anv_state) { 0, };
return VK_SUCCESS;
@@ -907,13 +913,19 @@ VkResult
anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage, struct anv_state *state)
{
- struct anv_pipeline_bind_map *map;
+ struct anv_pipeline *pipeline;
if (stage == MESA_SHADER_COMPUTE)
- map = &cmd_buffer->state.compute_pipeline->bindings[stage];
+ pipeline = cmd_buffer->state.compute_pipeline;
else
- map = &cmd_buffer->state.pipeline->bindings[stage];
+ pipeline = cmd_buffer->state.pipeline;
+ if (!anv_pipeline_has_stage(pipeline, stage)) {
+ *state = (struct anv_state) { 0, };
+ return VK_SUCCESS;
+ }
+
+ struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
if (map->sampler_count == 0) {
*state = (struct anv_state) { 0, };
return VK_SUCCESS;
@@ -1080,10 +1092,14 @@ struct anv_state
anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage)
{
+ /* If we don't have this stage, bail. */
+ if (!anv_pipeline_has_stage(cmd_buffer->state.pipeline, stage))
+ return (struct anv_state) { .offset = 0 };
+
struct anv_push_constants *data =
cmd_buffer->state.push_constants[stage];
const struct brw_stage_prog_data *prog_data =
- cmd_buffer->state.pipeline->prog_data[stage];
+ anv_shader_bin_get_prog_data(cmd_buffer->state.pipeline->shaders[stage]);
/* If we don't actually have any push constants, bail. */
if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c
index 448ae0e..4ab1802 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -27,6 +27,8 @@
#include <unistd.h>
#include <fcntl.h>
+#include "util/mesa-sha1.h"
+
#include "anv_private.h"
/*
@@ -65,9 +67,8 @@ VkResult anv_CreateDescriptorSetLayout(
struct anv_sampler **samplers =
(struct anv_sampler **)&set_layout->binding[max_binding + 1];
+ memset(set_layout, 0, sizeof(*set_layout));
set_layout->binding_count = max_binding + 1;
- set_layout->shader_stages = 0;
- set_layout->size = 0;
for (uint32_t b = 0; b <= max_binding; b++) {
/* Initialize all binding_layout entries to -1 */
@@ -202,6 +203,15 @@ void anv_DestroyDescriptorSetLayout(
anv_free2(&device->alloc, pAllocator, set_layout);
}
+static void
+sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx,
+ const struct anv_descriptor_set_layout *layout)
+{
+ size_t size = sizeof(*layout) +
+ sizeof(layout->binding[0]) * layout->binding_count;
+ _mesa_sha1_update(ctx, layout, size);
+}
+
/*
* Pipeline layouts. These have nothing to do with the pipeline. They are
* just muttiple descriptor set layouts pasted together
@@ -246,6 +256,19 @@ VkResult anv_CreatePipelineLayout(
}
}
+ struct mesa_sha1 *ctx = _mesa_sha1_init();
+ for (unsigned s = 0; s < layout->num_sets; s++) {
+ sha1_update_descriptor_set_layout(ctx, layout->set[s].layout);
+ _mesa_sha1_update(ctx, &layout->set[s].dynamic_offset_start,
+ sizeof(layout->set[s].dynamic_offset_start));
+ }
+ _mesa_sha1_update(ctx, &layout->num_sets, sizeof(layout->num_sets));
+ for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+ _mesa_sha1_update(ctx, &layout->stage[s].has_dynamic_offsets,
+ sizeof(layout->stage[s].has_dynamic_offsets));
+ }
+ _mesa_sha1_final(ctx, layout->sha1);
+
*pPipelineLayout = anv_pipeline_layout_to_handle(layout);
return VK_SUCCESS;
@@ -409,6 +432,11 @@ anv_descriptor_set_create(struct anv_device *device,
(struct anv_buffer_view *) &set->descriptors[layout->size];
set->buffer_count = layout->buffer_count;
+ /* By defining the descriptors to be zero now, we can later verify that
+ * a descriptor has not been populated with user data.
+ */
+ memset(set->descriptors, 0, sizeof(struct anv_descriptor) * layout->size);
+
/* Go through and fill out immutable samplers if we have any */
struct anv_descriptor *desc = set->descriptors;
for (uint32_t b = 0; b < layout->binding_count; b++) {
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index dd941b6..cd8fb3a 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -372,7 +372,7 @@ void anv_GetPhysicalDeviceFeatures(
.robustBufferAccess = true,
.fullDrawIndexUint32 = true,
.imageCubeArray = false,
- .independentBlend = pdevice->info->gen >= 8,
+ .independentBlend = true,
.geometryShader = true,
.tessellationShader = false,
.sampleRateShading = false,
@@ -438,6 +438,10 @@ void anv_GetPhysicalDeviceProperties(
const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0;
+ /* See assertions made when programming the buffer surface state. */
+ const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
+ (1ul << 30) : (1ul << 27);
+
VkSampleCountFlags sample_counts =
isl_device_get_sample_counts(&pdevice->isl_dev);
@@ -448,8 +452,8 @@ void anv_GetPhysicalDeviceProperties(
.maxImageDimensionCube = (1 << 14),
.maxImageArrayLayers = (1 << 11),
.maxTexelBufferElements = 128 * 1024 * 1024,
- .maxUniformBufferRange = UINT32_MAX,
- .maxStorageBufferRange = UINT32_MAX,
+ .maxUniformBufferRange = (1ul << 27),
+ .maxStorageBufferRange = max_raw_buffer_sz,
.maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
.maxMemoryAllocationCount = UINT32_MAX,
.maxSamplerAllocationCount = 64 * 1024,
@@ -649,13 +653,15 @@ PFN_vkVoidFunction anv_GetInstanceProcAddr(
return anv_lookup_entrypoint(pName);
}
-/* The loader wants us to expose a second GetInstanceProcAddr function
- * to work around certain LD_PRELOAD issues seen in apps.
+/* With version 1+ of the loader interface the ICD should expose
+ * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
*/
+PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
VkInstance instance,
const char* pName);
+PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
VkInstance instance,
const char* pName)
@@ -869,7 +875,8 @@ VkResult anv_CreateDevice(
&device->dynamic_state_block_pool);
anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024);
- anv_pipeline_cache_init(&device->default_pipeline_cache, device);
+ anv_state_pool_init(&device->instruction_state_pool,
+ &device->instruction_block_pool);
anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
@@ -944,6 +951,7 @@ void anv_DestroyDevice(
anv_bo_pool_finish(&device->batch_bo_pool);
anv_state_pool_finish(&device->dynamic_state_pool);
anv_block_pool_finish(&device->dynamic_state_block_pool);
+ anv_state_pool_finish(&device->instruction_state_pool);
anv_block_pool_finish(&device->instruction_block_pool);
anv_state_pool_finish(&device->surface_state_pool);
anv_block_pool_finish(&device->surface_state_block_pool);
@@ -1786,23 +1794,3 @@ void anv_DestroyFramebuffer(
anv_free2(&device->alloc, pAllocator, fb);
}
-
-void vkCmdDbgMarkerBegin(
- VkCommandBuffer commandBuffer,
- const char* pMarker)
- __attribute__ ((visibility ("default")));
-
-void vkCmdDbgMarkerEnd(
- VkCommandBuffer commandBuffer)
- __attribute__ ((visibility ("default")));
-
-void vkCmdDbgMarkerBegin(
- VkCommandBuffer commandBuffer,
- const char* pMarker)
-{
-}
-
-void vkCmdDbgMarkerEnd(
- VkCommandBuffer commandBuffer)
-{
-}
diff --git a/src/intel/vulkan/anv_entrypoints_gen.py b/src/intel/vulkan/anv_entrypoints_gen.py
index 2896174..dcf25ee 100644
--- a/src/intel/vulkan/anv_entrypoints_gen.py
+++ b/src/intel/vulkan/anv_entrypoints_gen.py
@@ -134,7 +134,6 @@ if opt_header:
print "%s gen75_%s%s;" % (type, name, args)
print "%s gen8_%s%s;" % (type, name, args)
print "%s gen9_%s%s;" % (type, name, args)
- print "%s anv_validate_%s%s;" % (type, name, args)
print_guard_end(name)
exit()
@@ -185,23 +184,24 @@ for type, name, args, num, h in entrypoints:
print " \"vk%s\\0\"" % name
offsets.append(i)
i += 2 + len(name) + 1
-print """ ;
+print " ;"
-/* Weak aliases for all potential validate functions. These will resolve to
- * NULL if they're not defined, which lets the resolve_entrypoint() function
- * either pick a validate wrapper if available or just plug in the actual
- * entry point.
- */
-"""
-
-# Now generate the table of all entry points and their validation functions
+# Now generate the table of all entry points
print "\nstatic const struct anv_entrypoint entrypoints[] = {"
for type, name, args, num, h in entrypoints:
print " { %5d, 0x%08x }," % (offsets[num], h)
print "};\n"
-for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]:
+print """
+
+/* Weak aliases for all potential implementations. These will resolve to
+ * NULL if they're not defined, which lets the resolve_entrypoint() function
+ * either pick the correct entry point.
+ */
+"""
+
+for layer in [ "anv", "gen7", "gen75", "gen8", "gen9" ]:
for type, name, args, num, h in entrypoints:
print_guard_start(name)
print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args)
@@ -214,27 +214,6 @@ for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]:
print "};\n"
print """
-#ifdef DEBUG
-static bool enable_validate = true;
-#else
-static bool enable_validate = false;
-#endif
-
-/* We can't use symbols that need resolving (like, oh, getenv) in the resolve
- * function. This means that we have to determine whether or not to use the
- * validation layer sometime before that. The constructor function attribute asks
- * the dynamic linker to invoke determine_validate() at dlopen() time which
- * works.
- */
-static void __attribute__ ((constructor))
-determine_validate(void)
-{
- const char *s = getenv("ANV_VALIDATE");
-
- if (s)
- enable_validate = atoi(s);
-}
-
static const struct brw_device_info *dispatch_devinfo;
void
@@ -246,9 +225,6 @@ anv_set_dispatch_devinfo(const struct brw_device_info *devinfo)
void * __attribute__ ((noinline))
anv_resolve_entrypoint(uint32_t index)
{
- if (enable_validate && validate_layer.entrypoints[index])
- return validate_layer.entrypoints[index];
-
if (dispatch_devinfo == NULL) {
return anv_layer.entrypoints[index];
}
@@ -277,17 +253,6 @@ anv_resolve_entrypoint(uint32_t index)
}
"""
-# Now output ifuncs and their resolve helpers for all entry points. The
-# resolve helper calls resolve_entrypoint() with the entry point index, which
-# lets the resolver look it up in the table.
-
-for type, name, args, num, h in entrypoints:
- print_guard_start(name)
- print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num)
- print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name)
- print_guard_end(name)
-
-
# Now generate the hash table used for entry point look up. This is a
# uint16_t table of entry point indices. We use 0xffff to indicate an entry
# in the hash table is empty.
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 77d9931..caf4a3e 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -322,81 +322,6 @@ void anv_GetImageSubresourceLayout(
}
}
-VkResult
-anv_validate_CreateImageView(VkDevice _device,
- const VkImageViewCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkImageView *pView)
-{
- ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image);
- const VkImageSubresourceRange *subresource;
-
- /* Validate structure type before dereferencing it. */
- assert(pCreateInfo);
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO);
- subresource = &pCreateInfo->subresourceRange;
-
- /* Validate viewType is in range before using it. */
- assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE);
- assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE);
-
- /* Validate format is in range before using it. */
- assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE);
- assert(pCreateInfo->format <= VK_FORMAT_END_RANGE);
-
- /* Validate channel swizzles. */
- assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
- assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE);
- assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
- assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE);
- assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
- assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE);
- assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
- assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE);
-
- /* Validate subresource. */
- assert(subresource->aspectMask != 0);
- assert(subresource->levelCount > 0);
- assert(subresource->layerCount > 0);
- assert(subresource->baseMipLevel < image->levels);
- assert(subresource->baseMipLevel + anv_get_levelCount(image, subresource) <= image->levels);
- assert(subresource->baseArrayLayer < image->array_size);
- assert(subresource->baseArrayLayer + anv_get_layerCount(image, subresource) <= image->array_size);
- assert(pView);
-
- MAYBE_UNUSED const VkImageAspectFlags view_format_aspects =
- vk_format_aspects(pCreateInfo->format);
-
- const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT
- | VK_IMAGE_ASPECT_STENCIL_BIT;
-
- /* Validate format. */
- if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(view_format_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
- } else if (subresource->aspectMask & ds_flags) {
- assert((subresource->aspectMask & ~ds_flags) == 0);
-
- assert(pCreateInfo->format == image->vk_format);
-
- if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
- assert(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
- assert(view_format_aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
- }
-
- if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
- /* FINISHME: Is it legal to have an R8 view of S8? */
- assert(image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
- assert(view_format_aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
- }
- } else {
- assert(!"bad VkImageSubresourceRange::aspectFlags");
- }
-
- return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView);
-}
-
static struct anv_state
alloc_surface_state(struct anv_device *device,
struct anv_cmd_buffer *cmd_buffer)
@@ -628,18 +553,19 @@ void anv_buffer_view_init(struct anv_buffer_view *view,
view->format = anv_get_isl_format(&device->info, pCreateInfo->format,
VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_TILING_LINEAR);
+ const uint32_t format_bs = isl_format_get_layout(view->format)->bs;
view->bo = buffer->bo;
view->offset = buffer->offset + pCreateInfo->offset;
view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
- buffer->size - view->offset : pCreateInfo->range;
+ buffer->size - pCreateInfo->offset : pCreateInfo->range;
+ view->range = align_down_npot_u32(view->range, format_bs);
if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) {
view->surface_state = alloc_surface_state(device, cmd_buffer);
anv_fill_buffer_surface_state(device, view->surface_state,
view->format,
- view->offset, view->range,
- isl_format_get_layout(view->format)->bs);
+ view->offset, view->range, format_bs);
} else {
view->surface_state = (struct anv_state){ 0 };
}
diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c
index dc098ef..af15c2c 100644
--- a/src/intel/vulkan/anv_meta_blit.c
+++ b/src/intel/vulkan/anv_meta_blit.c
@@ -106,7 +106,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
- nir_store_var(&b, color_out, &tex->dest.ssa, 4);
+ nir_store_var(&b, color_out, &tex->dest.ssa, 0xf);
return b.shader;
}
diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c
index 06e1043..649c11f 100644
--- a/src/intel/vulkan/anv_meta_blit2d.c
+++ b/src/intel/vulkan/anv_meta_blit2d.c
@@ -92,6 +92,21 @@ vk_format_for_size(int bs)
}
}
+/* This function returns the format corresponding to a single component of the
+ * RGB format for the given size returned by vk_format_for_size().
+ */
+static VkFormat
+vk_single_component_format_for_rgb_size(int bs)
+{
+ switch (bs) {
+ case 3: return VK_FORMAT_R8_UNORM;
+ case 6: return VK_FORMAT_R16_UNORM;
+ case 12: return VK_FORMAT_R32_UINT;
+ default:
+ unreachable("Invalid format block size");
+ }
+}
+
static void
create_iview(struct anv_cmd_buffer *cmd_buffer,
struct anv_meta_blit2d_surf *surf,
@@ -99,13 +114,14 @@ create_iview(struct anv_cmd_buffer *cmd_buffer,
VkImageUsageFlags usage,
uint32_t width,
uint32_t height,
+ VkFormat format,
VkImage *img,
struct anv_image_view *iview)
{
const VkImageCreateInfo image_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
- .format = vk_format_for_size(surf->bs),
+ .format = format,
.extent = {
.width = width,
.height = height,
@@ -179,6 +195,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer,
create_iview(cmd_buffer, src, offset, VK_IMAGE_USAGE_SAMPLED_BIT,
rect->src_x + rect->width, rect->src_y + rect->height,
+ vk_format_for_size(src->bs),
&tmp->image, &tmp->iview);
anv_CreateDescriptorPool(vk_device,
@@ -328,10 +345,11 @@ blit2d_bind_dst(struct anv_cmd_buffer *cmd_buffer,
uint64_t offset,
uint32_t width,
uint32_t height,
+ VkFormat format,
struct blit2d_dst_temps *tmp)
{
create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
- width, height, &tmp->image, &tmp->iview);
+ width, height, format, &tmp->image, &tmp->iview);
anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device),
&(VkFramebufferCreateInfo) {
@@ -406,7 +424,8 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer,
struct blit2d_dst_temps dst_temps;
blit2d_bind_dst(cmd_buffer, dst, offset, rects[r].dst_x + rects[r].width,
- rects[r].dst_y + rects[r].height, &dst_temps);
+ rects[r].dst_y + rects[r].height,
+ vk_format_for_size(dst->bs), &dst_temps);
struct blit_vb_data {
float pos[2];
@@ -544,7 +563,8 @@ anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer,
};
struct blit2d_dst_temps dst_temps;
- blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y, &dst_temps);
+ blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y,
+ VK_FORMAT_R8_UINT, &dst_temps);
struct blit_vb_header {
struct anv_vue_header vue;
@@ -647,6 +667,141 @@ anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer,
}
}
+static void
+anv_meta_blit2d_rgb_dst(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_meta_blit2d_surf *src,
+ enum blit2d_src_type src_type,
+ struct anv_meta_blit2d_surf *dst,
+ unsigned num_rects,
+ struct anv_meta_blit2d_rect *rects)
+{
+ struct anv_device *device = cmd_buffer->device;
+
+ for (unsigned r = 0; r < num_rects; ++r) {
+ struct blit2d_src_temps src_temps;
+ blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps);
+
+ assert(dst->bs % 3 == 0);
+ assert(dst->tiling == ISL_TILING_LINEAR);
+
+ uint32_t offset;
+ isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev,
+ dst->tiling, 1, dst->pitch,
+ rects[r].dst_x, rects[r].dst_y,
+ &offset,
+ &rects[r].dst_x, &rects[r].dst_y);
+
+ /* A red surface three times as wide as the actual RGB destination */
+ struct anv_meta_blit2d_surf dst_R = {
+ .bo = dst->bo,
+ .tiling = dst->tiling,
+ .base_offset = dst->base_offset,
+ .bs = dst->bs / 3,
+ .pitch = dst->pitch,
+ };
+
+ struct blit2d_dst_temps dst_temps;
+ blit2d_bind_dst(cmd_buffer, &dst_R, offset,
+ (rects[r].dst_x + rects[r].width) * 3,
+ rects[r].dst_y + rects[r].height,
+ vk_single_component_format_for_rgb_size(dst->bs),
+ &dst_temps);
+
+ struct blit_vb_data {
+ float pos[2];
+ float tex_coord[3];
+ } *vb_data;
+
+ unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
+
+ struct anv_state vb_state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
+ memset(vb_state.map, 0, sizeof(struct anv_vue_header));
+ vb_data = vb_state.map + sizeof(struct anv_vue_header);
+
+ vb_data[0] = (struct blit_vb_data) {
+ .pos = {
+ (rects[r].dst_x + rects[r].width) * 3,
+ rects[r].dst_y + rects[r].height,
+ },
+ .tex_coord = {
+ rects[r].src_x + rects[r].width,
+ rects[r].src_y + rects[r].height,
+ src->pitch,
+ },
+ };
+
+ vb_data[1] = (struct blit_vb_data) {
+ .pos = {
+ rects[r].dst_x * 3,
+ rects[r].dst_y + rects[r].height,
+ },
+ .tex_coord = {
+ rects[r].src_x,
+ rects[r].src_y + rects[r].height,
+ src->pitch,
+ },
+ };
+
+ vb_data[2] = (struct blit_vb_data) {
+ .pos = {
+ rects[r].dst_x * 3,
+ rects[r].dst_y,
+ },
+ .tex_coord = {
+ rects[r].src_x,
+ rects[r].src_y,
+ src->pitch,
+ },
+ };
+
+ if (!device->info.has_llc)
+ anv_state_clflush(vb_state);
+
+ struct anv_buffer vertex_buffer = {
+ .device = device,
+ .size = vb_size,
+ .bo = &device->dynamic_state_block_pool.bo,
+ .offset = vb_state.offset,
+ };
+
+ anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
+ (VkBuffer[]) {
+ anv_buffer_to_handle(&vertex_buffer),
+ anv_buffer_to_handle(&vertex_buffer)
+ },
+ (VkDeviceSize[]) {
+ 0,
+ sizeof(struct anv_vue_header),
+ });
+
+ ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
+ &(VkRenderPassBeginInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.blit2d.render_pass,
+ .framebuffer = dst_temps.fb,
+ .renderArea = {
+ .offset = { rects[r].dst_x, rects[r].dst_y, },
+ .extent = { rects[r].width, rects[r].height },
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ }, VK_SUBPASS_CONTENTS_INLINE);
+
+ bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_RGB);
+
+ ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+
+ ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
+
+ /* At the point where we emit the draw call, all data from the
+ * descriptor sets, etc. has been used. We are free to delete it.
+ */
+ blit2d_unbind_src(cmd_buffer, src_type, &src_temps);
+ blit2d_unbind_dst(cmd_buffer, &dst_temps);
+ }
+}
+
void
anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
struct anv_meta_blit2d_surf *src,
@@ -666,7 +821,8 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
num_rects, rects);
return;
} else if (dst->bs % 3 == 0) {
- anv_finishme("Blitting to RGB destinations not yet supported");
+ anv_meta_blit2d_rgb_dst(cmd_buffer, src, src_type, dst,
+ num_rects, rects);
return;
} else {
assert(util_is_power_of_two(dst->bs));
@@ -892,6 +1048,61 @@ build_nir_copy_fragment_shader(struct anv_device *device,
return b.shader;
}
+/* RGB copies have the same interface as normal copies */
+#define rgb_vi_create_info normal_vi_create_info
+
+static nir_shader *
+build_nir_rgb_fragment_shader(struct anv_device *device,
+ texel_fetch_build_func txf_func)
+{
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
+ nir_builder b;
+
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs");
+
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
+ vec3, "v_tex_pos");
+ tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+ vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_DATA0;
+
+ /* We need gl_FragCoord so we know our position */
+ nir_variable *frag_coord_in = nir_variable_create(b.shader,
+ nir_var_shader_in,
+ vec4, "gl_FragCoord");
+ frag_coord_in->data.location = VARYING_SLOT_POS;
+ frag_coord_in->data.origin_upper_left = true;
+
+ nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in));
+ unsigned swiz[4] = { 0, 1 };
+ nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);
+ nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2);
+
+ nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch);
+
+ /* We figure out which component we are by the x component of FragCoord */
+ nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in));
+ nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, frag_coord_int, 0),
+ nir_imm_int(&b, 3));
+
+ /* Select the given channel from the texelFetch result */
+ nir_ssa_def *color_channel =
+ nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 0)),
+ nir_channel(&b, color, 0),
+ nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 1)),
+ nir_channel(&b, color, 1),
+ nir_channel(&b, color, 2)));
+
+ nir_ssa_def *u = nir_ssa_undef(&b, 1, 32);
+ nir_store_var(&b, color_out, nir_vec4(&b, color_channel, u, u, u), 0x1);
+
+ return b.shader;
+}
+
static const VkPipelineVertexInputStateCreateInfo w_tiled_vi_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 2,
@@ -1095,7 +1306,13 @@ blit2d_init_pipeline(struct anv_device *device,
vi_create_info = &w_tiled_vi_create_info;
break;
case BLIT2D_DST_TYPE_RGB:
- /* Not yet supported */
+ /* RGB destinations and W-detiling don't mix */
+ if (src_type != BLIT2D_SRC_TYPE_NORMAL)
+ return VK_SUCCESS;
+
+ fs.nir = build_nir_rgb_fragment_shader(device, src_func);
+ vi_create_info = &rgb_vi_create_info;
+ break;
default:
return VK_SUCCESS;
}
diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c
index 7ec0608..5d8dd3d 100644
--- a/src/intel/vulkan/anv_meta_clear.c
+++ b/src/intel/vulkan/anv_meta_clear.c
@@ -25,6 +25,8 @@
#include "anv_private.h"
#include "nir/nir_builder.h"
+#include "util/u_format_rgb9e5.h"
+
/** Vertex attributes for color clears. */
struct color_clear_vattrs {
struct anv_vue_header vue_header;
@@ -754,12 +756,22 @@ static void
anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
struct anv_image *image,
VkImageLayout image_layout,
- const VkClearValue *clear_value,
+ VkClearValue clear_value,
uint32_t range_count,
const VkImageSubresourceRange *ranges)
{
VkDevice device_h = anv_device_to_handle(cmd_buffer->device);
+ VkFormat vk_format = image->vk_format;
+ if (vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
+ /* We can't actually render to this format so we have to work around it
+ * by manually unpacking and using R32_UINT.
+ */
+ clear_value.color.uint32[0] =
+ float3_to_rgb9e5(clear_value.color.float32);
+ vk_format = VK_FORMAT_R32_UINT;
+ }
+
for (uint32_t r = 0; r < range_count; r++) {
const VkImageSubresourceRange *range = &ranges[r];
for (uint32_t l = 0; l < anv_get_levelCount(image, range); ++l) {
@@ -773,7 +785,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = anv_image_to_handle(image),
.viewType = anv_meta_get_view_type(image),
- .format = image->vk_format,
+ .format = vk_format,
.subresourceRange = {
.aspectMask = range->aspectMask,
.baseMipLevel = range->baseMipLevel + l,
@@ -800,7 +812,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
&fb);
VkAttachmentDescription att_desc = {
- .format = iview.vk_format,
+ .format = vk_format,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
@@ -864,7 +876,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer,
VkClearAttachment clear_att = {
.aspectMask = range->aspectMask,
.colorAttachment = 0,
- .clearValue = *clear_value,
+ .clearValue = clear_value,
};
VkClearRect clear_rect = {
@@ -903,7 +915,7 @@ void anv_CmdClearColorImage(
meta_clear_begin(&saved_state, cmd_buffer);
anv_cmd_clear_image(cmd_buffer, image, imageLayout,
- (const VkClearValue *) pColor,
+ (VkClearValue) { .color = *pColor },
rangeCount, pRanges);
meta_clear_end(&saved_state, cmd_buffer);
@@ -924,7 +936,7 @@ void anv_CmdClearDepthStencilImage(
meta_clear_begin(&saved_state, cmd_buffer);
anv_cmd_clear_image(cmd_buffer, image, imageLayout,
- (const VkClearValue *) pDepthStencil,
+ (VkClearValue) { .depthStencil = *pDepthStencil },
rangeCount, pRanges);
meta_clear_end(&saved_state, cmd_buffer);
@@ -1005,7 +1017,7 @@ do_buffer_fill(struct anv_cmd_buffer *cmd_buffer,
anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image),
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- &clear_value, 1, &range);
+ clear_value, 1, &range);
}
void anv_CmdFillBuffer(
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 5a09464..33c7fe4 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -204,6 +204,12 @@ void anv_DestroyPipeline(
pAllocator ? pAllocator : &device->alloc);
if (pipeline->blend_state.map)
anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
+
+ for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+ if (pipeline->shaders[s])
+ anv_shader_bin_unref(device, pipeline->shaders[s]);
+ }
+
anv_free2(&device->alloc, pAllocator, pipeline);
}
@@ -391,15 +397,34 @@ anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
prog_data->binding_table.image_start = bias;
}
+static struct anv_shader_bin *
+anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
+ struct anv_pipeline_cache *cache,
+ const void *key_data, uint32_t key_size,
+ const void *kernel_data, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map)
+{
+ if (cache) {
+ return anv_pipeline_cache_upload_kernel(cache, key_data, key_size,
+ kernel_data, kernel_size,
+ prog_data, prog_data_size,
+ bind_map);
+ } else {
+ return anv_shader_bin_create(pipeline->device, key_data, key_size,
+ kernel_data, kernel_size,
+ prog_data, prog_data_size, bind_map);
+ }
+}
+
+
static void
anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
gl_shader_stage stage,
- const struct brw_stage_prog_data *prog_data,
- struct anv_pipeline_bind_map *map)
+ struct anv_shader_bin *shader)
{
- pipeline->prog_data[stage] = prog_data;
+ pipeline->shaders[stage] = shader;
pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
- pipeline->bindings[stage] = *map;
}
static VkResult
@@ -412,20 +437,20 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
{
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
- const struct brw_stage_prog_data *stage_prog_data;
struct anv_pipeline_bind_map map;
struct brw_vs_prog_key key;
- uint32_t kernel = NO_KERNEL;
+ struct anv_shader_bin *bin = NULL;
unsigned char sha1[20];
populate_vs_prog_key(&pipeline->device->info, &key);
- if (module->size > 0) {
- anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+ if (cache) {
+ anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+ pipeline->layout, spec_info);
+ bin = anv_pipeline_cache_search(cache, sha1, 20);
}
- if (kernel == NO_KERNEL) {
+ if (bin == NULL) {
struct brw_vs_prog_data prog_data = { 0, };
struct anv_pipeline_binding surface_to_descriptor[256];
struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -464,28 +489,29 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
- stage_prog_data = &prog_data.base.base;
- kernel = anv_pipeline_cache_upload_kernel(cache,
- module->size > 0 ? sha1 : NULL,
- shader_code, code_size,
- &stage_prog_data, sizeof(prog_data),
- &map);
+ bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+ shader_code, code_size,
+ &prog_data, sizeof(prog_data), &map);
+ if (!bin) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
ralloc_free(mem_ctx);
}
const struct brw_vs_prog_data *vs_prog_data =
- (const struct brw_vs_prog_data *) stage_prog_data;
+ (const struct brw_vs_prog_data *)anv_shader_bin_get_prog_data(bin);
if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
- pipeline->vs_simd8 = kernel;
+ pipeline->vs_simd8 = bin->kernel.offset;
pipeline->vs_vec4 = NO_KERNEL;
} else {
pipeline->vs_simd8 = NO_KERNEL;
- pipeline->vs_vec4 = kernel;
+ pipeline->vs_vec4 = bin->kernel.offset;
}
- anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
- stage_prog_data, &map);
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, bin);
return VK_SUCCESS;
}
@@ -500,20 +526,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
{
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
- const struct brw_stage_prog_data *stage_prog_data;
struct anv_pipeline_bind_map map;
struct brw_gs_prog_key key;
- uint32_t kernel = NO_KERNEL;
+ struct anv_shader_bin *bin = NULL;
unsigned char sha1[20];
populate_gs_prog_key(&pipeline->device->info, &key);
- if (module->size > 0) {
- anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+ if (cache) {
+ anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+ pipeline->layout, spec_info);
+ bin = anv_pipeline_cache_search(cache, sha1, 20);
}
- if (kernel == NO_KERNEL) {
+ if (bin == NULL) {
struct brw_gs_prog_data prog_data = { 0, };
struct anv_pipeline_binding surface_to_descriptor[256];
struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -551,20 +577,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
}
/* TODO: SIMD8 GS */
- stage_prog_data = &prog_data.base.base;
- kernel = anv_pipeline_cache_upload_kernel(cache,
- module->size > 0 ? sha1 : NULL,
- shader_code, code_size,
- &stage_prog_data, sizeof(prog_data),
- &map);
+ bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+ shader_code, code_size,
+ &prog_data, sizeof(prog_data), &map);
+ if (!bin) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
ralloc_free(mem_ctx);
}
- pipeline->gs_kernel = kernel;
+ pipeline->gs_kernel = bin->kernel.offset;
- anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
- stage_prog_data, &map);
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, bin);
return VK_SUCCESS;
}
@@ -580,20 +606,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
{
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
- const struct brw_stage_prog_data *stage_prog_data;
struct anv_pipeline_bind_map map;
struct brw_wm_prog_key key;
+ struct anv_shader_bin *bin = NULL;
unsigned char sha1[20];
populate_wm_prog_key(&pipeline->device->info, info, extra, &key);
- if (module->size > 0) {
- anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- pipeline->ps_ksp0 =
- anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+ if (cache) {
+ anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+ pipeline->layout, spec_info);
+ bin = anv_pipeline_cache_search(cache, sha1, 20);
}
- if (pipeline->ps_ksp0 == NO_KERNEL) {
+ if (bin == NULL) {
struct brw_wm_prog_data prog_data = { 0, };
struct anv_pipeline_binding surface_to_descriptor[256];
struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -633,7 +659,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
assert(num_rts + array_len <= 8);
for (unsigned i = 0; i < array_len; i++) {
- rt_bindings[num_rts] = (struct anv_pipeline_binding) {
+ rt_bindings[num_rts + i] = (struct anv_pipeline_binding) {
.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
.binding = 0,
.index = rt + i,
@@ -682,19 +708,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
- stage_prog_data = &prog_data.base;
- pipeline->ps_ksp0 =
- anv_pipeline_cache_upload_kernel(cache,
- module->size > 0 ? sha1 : NULL,
- shader_code, code_size,
- &stage_prog_data, sizeof(prog_data),
- &map);
+ bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+ shader_code, code_size,
+ &prog_data, sizeof(prog_data), &map);
+ if (!bin) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
ralloc_free(mem_ctx);
}
- anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
- stage_prog_data, &map);
+ pipeline->ps_ksp0 = bin->kernel.offset;
+
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, bin);
return VK_SUCCESS;
}
@@ -709,20 +736,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
{
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
- const struct brw_stage_prog_data *stage_prog_data;
struct anv_pipeline_bind_map map;
struct brw_cs_prog_key key;
- uint32_t kernel = NO_KERNEL;
+ struct anv_shader_bin *bin = NULL;
unsigned char sha1[20];
populate_cs_prog_key(&pipeline->device->info, &key);
- if (module->size > 0) {
- anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
+ if (cache) {
+ anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
+ pipeline->layout, spec_info);
+ bin = anv_pipeline_cache_search(cache, sha1, 20);
}
- if (module->size == 0 || kernel == NO_KERNEL) {
+ if (bin == NULL) {
struct brw_cs_prog_data prog_data = { 0, };
struct anv_pipeline_binding surface_to_descriptor[256];
struct anv_pipeline_binding sampler_to_descriptor[256];
@@ -754,20 +781,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
- stage_prog_data = &prog_data.base;
- kernel = anv_pipeline_cache_upload_kernel(cache,
- module->size > 0 ? sha1 : NULL,
- shader_code, code_size,
- &stage_prog_data, sizeof(prog_data),
- &map);
+ bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
+ shader_code, code_size,
+ &prog_data, sizeof(prog_data), &map);
+ if (!bin) {
+ ralloc_free(mem_ctx);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
ralloc_free(mem_ctx);
}
- pipeline->cs_simd = kernel;
+ pipeline->cs_simd = bin->kernel.offset;
- anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
- stage_prog_data, &map);
+ anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, bin);
return VK_SUCCESS;
}
@@ -1161,8 +1188,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
/* When we free the pipeline, we detect stages based on the NULL status
* of various prog_data pointers. Make them NULL by default.
*/
- memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
- memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
+ memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
pipeline->vs_simd8 = NO_KERNEL;
pipeline->vs_vec4 = NO_KERNEL;
@@ -1180,27 +1206,33 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
}
if (modules[MESA_SHADER_VERTEX]) {
- anv_pipeline_compile_vs(pipeline, cache, pCreateInfo,
- modules[MESA_SHADER_VERTEX],
- pStages[MESA_SHADER_VERTEX]->pName,
- pStages[MESA_SHADER_VERTEX]->pSpecializationInfo);
+ result = anv_pipeline_compile_vs(pipeline, cache, pCreateInfo,
+ modules[MESA_SHADER_VERTEX],
+ pStages[MESA_SHADER_VERTEX]->pName,
+ pStages[MESA_SHADER_VERTEX]->pSpecializationInfo);
+ if (result != VK_SUCCESS)
+ goto compile_fail;
}
if (modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL])
anv_finishme("no tessellation support");
if (modules[MESA_SHADER_GEOMETRY]) {
- anv_pipeline_compile_gs(pipeline, cache, pCreateInfo,
- modules[MESA_SHADER_GEOMETRY],
- pStages[MESA_SHADER_GEOMETRY]->pName,
- pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo);
+ result = anv_pipeline_compile_gs(pipeline, cache, pCreateInfo,
+ modules[MESA_SHADER_GEOMETRY],
+ pStages[MESA_SHADER_GEOMETRY]->pName,
+ pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo);
+ if (result != VK_SUCCESS)
+ goto compile_fail;
}
if (modules[MESA_SHADER_FRAGMENT]) {
- anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra,
- modules[MESA_SHADER_FRAGMENT],
- pStages[MESA_SHADER_FRAGMENT]->pName,
- pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo);
+ result = anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra,
+ modules[MESA_SHADER_FRAGMENT],
+ pStages[MESA_SHADER_FRAGMENT]->pName,
+ pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo);
+ if (result != VK_SUCCESS)
+ goto compile_fail;
}
if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
@@ -1263,6 +1295,16 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
pipeline->topology = _3DPRIM_RECTLIST;
return VK_SUCCESS;
+
+compile_fail:
+ for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+ if (pipeline->shaders[s])
+ anv_shader_bin_unref(device, pipeline->shaders[s]);
+ }
+
+ anv_reloc_list_finish(&pipeline->batch_relocs, alloc);
+
+ return result;
}
VkResult
@@ -1277,9 +1319,6 @@ anv_graphics_pipeline_create(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
- if (cache == NULL)
- cache = &device->default_pipeline_cache;
-
switch (device->info.gen) {
case 7:
if (device->info.is_haswell)
@@ -1333,9 +1372,6 @@ static VkResult anv_compute_pipeline_create(
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
- if (cache == NULL)
- cache = &device->default_pipeline_cache;
-
switch (device->info.gen) {
case 7:
if (device->info.is_haswell)
diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c
index fbca311..2753c46 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -22,9 +22,120 @@
*/
#include "util/mesa-sha1.h"
+#include "util/hash_table.h"
#include "util/debug.h"
#include "anv_private.h"
+struct shader_bin_key {
+ uint32_t size;
+ uint8_t data[0];
+};
+
+static size_t
+anv_shader_bin_size(uint32_t prog_data_size, uint32_t key_size,
+ uint32_t surface_count, uint32_t sampler_count)
+{
+ const uint32_t binding_data_size =
+ (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);
+
+ return align_u32(sizeof(struct anv_shader_bin), 8) +
+ align_u32(prog_data_size, 8) +
+ align_u32(sizeof(uint32_t) + key_size, 8) +
+ align_u32(binding_data_size, 8);
+}
+
+static inline const struct shader_bin_key *
+anv_shader_bin_get_key(const struct anv_shader_bin *shader)
+{
+ const void *data = shader;
+ data += align_u32(sizeof(struct anv_shader_bin), 8);
+ data += align_u32(shader->prog_data_size, 8);
+ return data;
+}
+
+struct anv_shader_bin *
+anv_shader_bin_create(struct anv_device *device,
+ const void *key_data, uint32_t key_size,
+ const void *kernel_data, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map)
+{
+ const size_t size =
+ anv_shader_bin_size(prog_data_size, key_size,
+ bind_map->surface_count, bind_map->sampler_count);
+
+ struct anv_shader_bin *shader =
+ anv_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!shader)
+ return NULL;
+
+ shader->ref_cnt = 1;
+
+ shader->kernel =
+ anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
+ memcpy(shader->kernel.map, kernel_data, kernel_size);
+ shader->kernel_size = kernel_size;
+ shader->bind_map = *bind_map;
+ shader->prog_data_size = prog_data_size;
+
+ /* Now we fill out the floating data at the end */
+ void *data = shader;
+ data += align_u32(sizeof(struct anv_shader_bin), 8);
+
+ memcpy(data, prog_data, prog_data_size);
+ data += align_u32(prog_data_size, 8);
+
+ struct shader_bin_key *key = data;
+ key->size = key_size;
+ memcpy(key->data, key_data, key_size);
+ data += align_u32(sizeof(*key) + key_size, 8);
+
+ shader->bind_map.surface_to_descriptor = data;
+ memcpy(data, bind_map->surface_to_descriptor,
+ bind_map->surface_count * sizeof(struct anv_pipeline_binding));
+ data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);
+
+ shader->bind_map.sampler_to_descriptor = data;
+ memcpy(data, bind_map->sampler_to_descriptor,
+ bind_map->sampler_count * sizeof(struct anv_pipeline_binding));
+
+ return shader;
+}
+
+void
+anv_shader_bin_destroy(struct anv_device *device,
+ struct anv_shader_bin *shader)
+{
+ assert(shader->ref_cnt == 0);
+ anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
+ anv_free(&device->alloc, shader);
+}
+
+static size_t
+anv_shader_bin_data_size(const struct anv_shader_bin *shader)
+{
+ return anv_shader_bin_size(shader->prog_data_size,
+ anv_shader_bin_get_key(shader)->size,
+ shader->bind_map.surface_count,
+ shader->bind_map.sampler_count) +
+ align_u32(shader->kernel_size, 8);
+}
+
+static void
+anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
+{
+ size_t struct_size =
+ anv_shader_bin_size(shader->prog_data_size,
+ anv_shader_bin_get_key(shader)->size,
+ shader->bind_map.surface_count,
+ shader->bind_map.sampler_count);
+
+ memcpy(data, shader, struct_size);
+ data += struct_size;
+
+ memcpy(data, shader->kernel.map, shader->kernel_size);
+}
+
/* Remaining work:
*
* - Compact binding table layout so it's tight and not dependent on
@@ -37,69 +148,62 @@
* dual_src_blend.
*/
+static uint32_t
+shader_bin_key_hash_func(const void *void_key)
+{
+ const struct shader_bin_key *key = void_key;
+ return _mesa_hash_data(key->data, key->size);
+}
+
+static bool
+shader_bin_key_compare_func(const void *void_a, const void *void_b)
+{
+ const struct shader_bin_key *a = void_a, *b = void_b;
+ if (a->size != b->size)
+ return false;
+
+ return memcmp(a->data, b->data, a->size) == 0;
+}
+
void
anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
- struct anv_device *device)
+ struct anv_device *device,
+ bool cache_enabled)
{
cache->device = device;
- anv_state_stream_init(&cache->program_stream,
- &device->instruction_block_pool);
pthread_mutex_init(&cache->mutex, NULL);
- cache->kernel_count = 0;
- cache->total_size = 0;
- cache->table_size = 1024;
- const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
- cache->hash_table = malloc(byte_size);
-
- /* We don't consider allocation failure fatal, we just start with a 0-sized
- * cache. */
- if (cache->hash_table == NULL ||
- !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true))
- cache->table_size = 0;
- else
- memset(cache->hash_table, 0xff, byte_size);
+ if (cache_enabled) {
+ cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
+ shader_bin_key_compare_func);
+ } else {
+ cache->cache = NULL;
+ }
}
void
anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
{
- anv_state_stream_finish(&cache->program_stream);
pthread_mutex_destroy(&cache->mutex);
- free(cache->hash_table);
-}
-
-struct cache_entry {
- unsigned char sha1[20];
- uint32_t prog_data_size;
- uint32_t kernel_size;
- uint32_t surface_count;
- uint32_t sampler_count;
- uint32_t image_count;
-
- char prog_data[0];
-
- /* kernel follows prog_data at next 64 byte aligned address */
-};
-
-static uint32_t
-entry_size(struct cache_entry *entry)
-{
- /* This returns the number of bytes needed to serialize an entry, which
- * doesn't include the alignment padding bytes.
- */
- const uint32_t map_size =
- entry->surface_count * sizeof(struct anv_pipeline_binding) +
- entry->sampler_count * sizeof(struct anv_pipeline_binding);
+ if (cache->cache) {
+ /* This is a bit unfortunate. In order to keep things from randomly
+ * going away, the shader cache has to hold a reference to all shader
+ * binaries it contains. We unref them when we destroy the cache.
+ */
+ struct hash_entry *entry;
+ hash_table_foreach(cache->cache, entry)
+ anv_shader_bin_unref(cache->device, entry->data);
- return sizeof(*entry) + entry->prog_data_size + map_size;
+ _mesa_hash_table_destroy(cache->cache, NULL);
+ }
}
void
anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
struct anv_shader_module *module,
const char *entrypoint,
+ const struct anv_pipeline_layout *pipeline_layout,
const VkSpecializationInfo *spec_info)
{
struct mesa_sha1 *ctx;
@@ -108,6 +212,10 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
_mesa_sha1_update(ctx, key, key_size);
_mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
_mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
+ if (pipeline_layout) {
+ _mesa_sha1_update(ctx, pipeline_layout->sha1,
+ sizeof(pipeline_layout->sha1));
+ }
/* hash in shader stage, pipeline layout? */
if (spec_info) {
_mesa_sha1_update(ctx, spec_info->pMapEntries,
@@ -117,210 +225,94 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
_mesa_sha1_final(ctx, hash);
}
-static uint32_t
-anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache,
- const unsigned char *sha1,
- const struct brw_stage_prog_data **prog_data,
- struct anv_pipeline_bind_map *map)
+static struct anv_shader_bin *
+anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
+ const void *key_data, uint32_t key_size)
{
- const uint32_t mask = cache->table_size - 1;
- const uint32_t start = (*(uint32_t *) sha1);
-
- for (uint32_t i = 0; i < cache->table_size; i++) {
- const uint32_t index = (start + i) & mask;
- const uint32_t offset = cache->hash_table[index];
-
- if (offset == ~0)
- return NO_KERNEL;
-
- struct cache_entry *entry =
- cache->program_stream.block_pool->map + offset;
- if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
- if (prog_data) {
- assert(map);
- void *p = entry->prog_data;
- *prog_data = p;
- p += entry->prog_data_size;
- map->surface_count = entry->surface_count;
- map->sampler_count = entry->sampler_count;
- map->image_count = entry->image_count;
- map->surface_to_descriptor = p;
- p += map->surface_count * sizeof(struct anv_pipeline_binding);
- map->sampler_to_descriptor = p;
- }
-
- return offset + align_u32(entry_size(entry), 64);
- }
- }
-
- /* This can happen if the pipeline cache is disabled via
- * ANV_ENABLE_PIPELINE_CACHE=false
- */
- return NO_KERNEL;
+ uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
+ struct shader_bin_key *key = (void *)vla;
+ key->size = key_size;
+ memcpy(key->data, key_data, key_size);
+
+ struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
+ if (entry)
+ return entry->data;
+ else
+ return NULL;
}
-uint32_t
+struct anv_shader_bin *
anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
- const unsigned char *sha1,
- const struct brw_stage_prog_data **prog_data,
- struct anv_pipeline_bind_map *map)
+ const void *key_data, uint32_t key_size)
{
- uint32_t kernel;
+ if (!cache->cache)
+ return NULL;
pthread_mutex_lock(&cache->mutex);
- kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
+ struct anv_shader_bin *shader =
+ anv_pipeline_cache_search_locked(cache, key_data, key_size);
pthread_mutex_unlock(&cache->mutex);
- return kernel;
-}
-
-static void
-anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache,
- struct cache_entry *entry, uint32_t entry_offset)
-{
- const uint32_t mask = cache->table_size - 1;
- const uint32_t start = (*(uint32_t *) entry->sha1);
-
- /* We'll always be able to insert when we get here. */
- assert(cache->kernel_count < cache->table_size / 2);
+ /* We increment refcount before handing it to the caller */
+ if (shader)
+ anv_shader_bin_ref(shader);
- for (uint32_t i = 0; i < cache->table_size; i++) {
- const uint32_t index = (start + i) & mask;
- if (cache->hash_table[index] == ~0) {
- cache->hash_table[index] = entry_offset;
- break;
- }
- }
-
- cache->total_size += entry_size(entry) + entry->kernel_size;
- cache->kernel_count++;
+ return shader;
}
-static VkResult
-anv_pipeline_cache_grow(struct anv_pipeline_cache *cache)
+static struct anv_shader_bin *
+anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
+ const void *key_data, uint32_t key_size,
+ const void *kernel_data, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map)
{
- const uint32_t table_size = cache->table_size * 2;
- const uint32_t old_table_size = cache->table_size;
- const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
- uint32_t *table;
- uint32_t *old_table = cache->hash_table;
-
- table = malloc(byte_size);
- if (table == NULL)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- cache->hash_table = table;
- cache->table_size = table_size;
- cache->kernel_count = 0;
- cache->total_size = 0;
-
- memset(cache->hash_table, 0xff, byte_size);
- for (uint32_t i = 0; i < old_table_size; i++) {
- const uint32_t offset = old_table[i];
- if (offset == ~0)
- continue;
+ struct anv_shader_bin *shader =
+ anv_pipeline_cache_search_locked(cache, key_data, key_size);
+ if (shader)
+ return shader;
- struct cache_entry *entry =
- cache->program_stream.block_pool->map + offset;
- anv_pipeline_cache_set_entry(cache, entry, offset);
- }
+ struct anv_shader_bin *bin =
+ anv_shader_bin_create(cache->device, key_data, key_size,
+ kernel_data, kernel_size,
+ prog_data, prog_data_size, bind_map);
+ if (!bin)
+ return NULL;
- free(old_table);
+ _mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin);
- return VK_SUCCESS;
+ return bin;
}
-static void
-anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
- struct cache_entry *entry, uint32_t entry_offset)
-{
- if (cache->kernel_count == cache->table_size / 2)
- anv_pipeline_cache_grow(cache);
-
- /* Failing to grow that hash table isn't fatal, but may mean we don't
- * have enough space to add this new kernel. Only add it if there's room.
- */
- if (cache->kernel_count < cache->table_size / 2)
- anv_pipeline_cache_set_entry(cache, entry, entry_offset);
-}
-
-uint32_t
+struct anv_shader_bin *
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
- const unsigned char *sha1,
- const void *kernel, size_t kernel_size,
- const struct brw_stage_prog_data **prog_data,
- size_t prog_data_size,
- struct anv_pipeline_bind_map *map)
+ const void *key_data, uint32_t key_size,
+ const void *kernel_data, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map)
{
- pthread_mutex_lock(&cache->mutex);
-
- /* Before uploading, check again that another thread didn't upload this
- * shader while we were compiling it.
- */
- if (sha1) {
- uint32_t cached_kernel =
- anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
- if (cached_kernel != NO_KERNEL) {
- pthread_mutex_unlock(&cache->mutex);
- return cached_kernel;
- }
- }
+ if (cache->cache) {
+ pthread_mutex_lock(&cache->mutex);
- struct cache_entry *entry;
+ struct anv_shader_bin *bin =
+ anv_pipeline_cache_add_shader(cache, key_data, key_size,
+ kernel_data, kernel_size,
+ prog_data, prog_data_size, bind_map);
- const uint32_t map_size =
- map->surface_count * sizeof(struct anv_pipeline_binding) +
- map->sampler_count * sizeof(struct anv_pipeline_binding);
+ pthread_mutex_unlock(&cache->mutex);
- const uint32_t preamble_size =
- align_u32(sizeof(*entry) + prog_data_size + map_size, 64);
+ /* We increment refcount before handing it to the caller */
+ anv_shader_bin_ref(bin);
- const uint32_t size = preamble_size + kernel_size;
-
- assert(size < cache->program_stream.block_pool->block_size);
- const struct anv_state state =
- anv_state_stream_alloc(&cache->program_stream, size, 64);
-
- entry = state.map;
- entry->prog_data_size = prog_data_size;
- entry->surface_count = map->surface_count;
- entry->sampler_count = map->sampler_count;
- entry->image_count = map->image_count;
- entry->kernel_size = kernel_size;
-
- void *p = entry->prog_data;
- memcpy(p, *prog_data, prog_data_size);
- p += prog_data_size;
-
- memcpy(p, map->surface_to_descriptor,
- map->surface_count * sizeof(struct anv_pipeline_binding));
- map->surface_to_descriptor = p;
- p += map->surface_count * sizeof(struct anv_pipeline_binding);
-
- memcpy(p, map->sampler_to_descriptor,
- map->sampler_count * sizeof(struct anv_pipeline_binding));
- map->sampler_to_descriptor = p;
-
- if (sha1) {
- assert(anv_pipeline_cache_search_unlocked(cache, sha1,
- NULL, NULL) == NO_KERNEL);
-
- memcpy(entry->sha1, sha1, sizeof(entry->sha1));
- anv_pipeline_cache_add_entry(cache, entry, state.offset);
+ return bin;
+ } else {
+ /* In this case, we're not caching it so the caller owns it entirely */
+ return anv_shader_bin_create(cache->device, key_data, key_size,
+ kernel_data, kernel_size,
+ prog_data, prog_data_size, bind_map);
}
-
- pthread_mutex_unlock(&cache->mutex);
-
- memcpy(state.map + preamble_size, kernel, kernel_size);
-
- if (!cache->device->info.has_llc)
- anv_state_clflush(state);
-
- *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
-
- return state.offset + preamble_size;
}
struct cache_header {
@@ -339,6 +331,9 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
struct cache_header header;
uint8_t uuid[VK_UUID_SIZE];
+ if (cache->cache == NULL)
+ return;
+
if (size < sizeof(header))
return;
memcpy(&header, data, sizeof(header));
@@ -354,38 +349,62 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
return;
- void *end = (void *) data + size;
- void *p = (void *) data + header.header_size;
-
- while (p < end) {
- struct cache_entry *entry = p;
-
- void *data = entry->prog_data;
- const struct brw_stage_prog_data *prog_data = data;
- data += entry->prog_data_size;
-
- struct anv_pipeline_binding *surface_to_descriptor = data;
- data += entry->surface_count * sizeof(struct anv_pipeline_binding);
- struct anv_pipeline_binding *sampler_to_descriptor = data;
- data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
- void *kernel = data;
-
- struct anv_pipeline_bind_map map = {
- .surface_count = entry->surface_count,
- .sampler_count = entry->sampler_count,
- .image_count = entry->image_count,
- .surface_to_descriptor = surface_to_descriptor,
- .sampler_to_descriptor = sampler_to_descriptor
- };
-
- anv_pipeline_cache_upload_kernel(cache, entry->sha1,
- kernel, entry->kernel_size,
- &prog_data,
- entry->prog_data_size, &map);
- p = kernel + entry->kernel_size;
+ const void *end = data + size;
+ const void *p = data + header.header_size;
+
+ /* Count is the total number of valid entries */
+ uint32_t count;
+ if (p + sizeof(count) >= end)
+ return;
+ memcpy(&count, p, sizeof(count));
+ p += align_u32(sizeof(count), 8);
+
+ for (uint32_t i = 0; i < count; i++) {
+ struct anv_shader_bin bin;
+ if (p + sizeof(bin) > end)
+ break;
+ memcpy(&bin, p, sizeof(bin));
+ p += align_u32(sizeof(struct anv_shader_bin), 8);
+
+ const void *prog_data = p;
+ p += align_u32(bin.prog_data_size, 8);
+
+ struct shader_bin_key key;
+ if (p + sizeof(key) > end)
+ break;
+ memcpy(&key, p, sizeof(key));
+ const void *key_data = p + sizeof(key);
+ p += align_u32(sizeof(key) + key.size, 8);
+
+ /* We're going to memcpy this so getting rid of const is fine */
+ struct anv_pipeline_binding *bindings = (void *)p;
+ p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
+ sizeof(struct anv_pipeline_binding), 8);
+ bin.bind_map.surface_to_descriptor = bindings;
+ bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
+
+ const void *kernel_data = p;
+ p += align_u32(bin.kernel_size, 8);
+
+ if (p > end)
+ break;
+
+ anv_pipeline_cache_add_shader(cache, key_data, key.size,
+ kernel_data, bin.kernel_size,
+ prog_data, bin.prog_data_size,
+ &bin.bind_map);
}
}
+static bool
+pipeline_cache_enabled()
+{
+ static int enabled = -1;
+ if (enabled < 0)
+ enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
+ return enabled;
+}
+
VkResult anv_CreatePipelineCache(
VkDevice _device,
const VkPipelineCacheCreateInfo* pCreateInfo,
@@ -404,7 +423,7 @@ VkResult anv_CreatePipelineCache(
if (cache == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- anv_pipeline_cache_init(cache, device);
+ anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
if (pCreateInfo->initialDataSize > 0)
anv_pipeline_cache_load(cache,
@@ -439,9 +458,16 @@ VkResult anv_GetPipelineCacheData(
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
struct cache_header *header;
- const size_t size = sizeof(*header) + cache->total_size;
-
if (pData == NULL) {
+ size_t size = align_u32(sizeof(*header), 8) +
+ align_u32(sizeof(uint32_t), 8);
+
+ if (cache->cache) {
+ struct hash_entry *entry;
+ hash_table_foreach(cache->cache, entry)
+ size += anv_shader_bin_data_size(entry->data);
+ }
+
*pDataSize = size;
return VK_SUCCESS;
}
@@ -458,25 +484,25 @@ VkResult anv_GetPipelineCacheData(
header->vendor_id = 0x8086;
header->device_id = device->chipset_id;
anv_device_get_cache_uuid(header->uuid);
- p += header->header_size;
+ p += align_u32(header->header_size, 8);
- struct cache_entry *entry;
- for (uint32_t i = 0; i < cache->table_size; i++) {
- if (cache->hash_table[i] == ~0)
- continue;
+ uint32_t *count = p;
+ p += align_u32(sizeof(*count), 8);
+ *count = 0;
- entry = cache->program_stream.block_pool->map + cache->hash_table[i];
- const uint32_t size = entry_size(entry);
- if (end < p + size + entry->kernel_size)
- break;
-
- memcpy(p, entry, size);
- p += size;
+ if (cache->cache) {
+ struct hash_entry *entry;
+ hash_table_foreach(cache->cache, entry) {
+ struct anv_shader_bin *shader = entry->data;
+ size_t data_size = anv_shader_bin_data_size(entry->data);
+ if (p + data_size > end)
+ break;
- void *kernel = (void *) entry + align_u32(size, 64);
+ anv_shader_bin_write_data(shader, p);
+ p += data_size;
- memcpy(p, kernel, entry->kernel_size);
- p += entry->kernel_size;
+ (*count)++;
+ }
}
*pDataSize = p - pData;
@@ -484,25 +510,6 @@ VkResult anv_GetPipelineCacheData(
return VK_SUCCESS;
}
-static void
-anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
- struct anv_pipeline_cache *src)
-{
- for (uint32_t i = 0; i < src->table_size; i++) {
- const uint32_t offset = src->hash_table[i];
- if (offset == ~0)
- continue;
-
- struct cache_entry *entry =
- src->program_stream.block_pool->map + offset;
-
- if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
- continue;
-
- anv_pipeline_cache_add_entry(dst, entry, offset);
- }
-}
-
VkResult anv_MergePipelineCaches(
VkDevice _device,
VkPipelineCache destCache,
@@ -511,10 +518,23 @@ VkResult anv_MergePipelineCaches(
{
ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
+ if (!dst->cache)
+ return VK_SUCCESS;
+
for (uint32_t i = 0; i < srcCacheCount; i++) {
ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
+ if (!src->cache)
+ continue;
+
+ struct hash_entry *entry;
+ hash_table_foreach(src->cache, entry) {
+ struct anv_shader_bin *bin = entry->data;
+ if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin)))
+ continue;
- anv_pipeline_cache_merge(dst, src);
+ anv_shader_bin_ref(bin);
+ _mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin);
+ }
}
return VK_SUCCESS;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 50b860c..8b57e1b 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -83,6 +83,12 @@ extern "C" {
#define MAX(a, b) ((a) > (b) ? (a) : (b))
static inline uint32_t
+align_down_npot_u32(uint32_t v, uint32_t a)
+{
+ return v - (v % a);
+}
+
+static inline uint32_t
align_u32(uint32_t v, uint32_t a)
{
assert(a != 0 && a == (a & -a));
@@ -394,9 +400,9 @@ struct anv_fixed_size_state_pool {
};
#define ANV_MIN_STATE_SIZE_LOG2 6
-#define ANV_MAX_STATE_SIZE_LOG2 10
+#define ANV_MAX_STATE_SIZE_LOG2 17
-#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2)
+#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
struct anv_state_pool {
struct anv_block_pool *block_pool;
@@ -652,31 +658,27 @@ struct anv_queue {
struct anv_pipeline_cache {
struct anv_device * device;
- struct anv_state_stream program_stream;
pthread_mutex_t mutex;
- uint32_t total_size;
- uint32_t table_size;
- uint32_t kernel_count;
- uint32_t * hash_table;
+ struct hash_table * cache;
};
struct anv_pipeline_bind_map;
void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
- struct anv_device *device);
+ struct anv_device *device,
+ bool cache_enabled);
void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
-uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
- const unsigned char *sha1,
- const struct brw_stage_prog_data **prog_data,
- struct anv_pipeline_bind_map *map);
-uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
- const unsigned char *sha1,
- const void *kernel,
- size_t kernel_size,
- const struct brw_stage_prog_data **prog_data,
- size_t prog_data_size,
- struct anv_pipeline_bind_map *map);
+
+struct anv_shader_bin *
+anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
+ const void *key, uint32_t key_size);
+struct anv_shader_bin *
+anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
+ const void *key_data, uint32_t key_size,
+ const void *kernel_data, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map);
struct anv_device {
VK_LOADER_DATA _loader_data;
@@ -698,7 +700,7 @@ struct anv_device {
struct anv_state_pool dynamic_state_pool;
struct anv_block_pool instruction_block_pool;
- struct anv_pipeline_cache default_pipeline_cache;
+ struct anv_state_pool instruction_state_pool;
struct anv_block_pool surface_state_block_pool;
struct anv_state_pool surface_state_pool;
@@ -1057,6 +1059,8 @@ struct anv_pipeline_layout {
struct {
bool has_dynamic_offsets;
} stage[MESA_SHADER_STAGES];
+
+ unsigned char sha1[20];
};
struct anv_buffer {
@@ -1422,6 +1426,7 @@ struct anv_shader_module {
void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
struct anv_shader_module *module,
const char *entrypoint,
+ const struct anv_pipeline_layout *pipeline_layout,
const VkSpecializationInfo *spec_info);
static inline gl_shader_stage
@@ -1449,13 +1454,57 @@ struct anv_pipeline_bind_map {
uint32_t surface_count;
uint32_t sampler_count;
uint32_t image_count;
- uint32_t attachment_count;
struct anv_pipeline_binding * surface_to_descriptor;
struct anv_pipeline_binding * sampler_to_descriptor;
- uint32_t * surface_to_attachment;
};
+struct anv_shader_bin {
+ uint32_t ref_cnt;
+
+ struct anv_state kernel;
+ uint32_t kernel_size;
+
+ struct anv_pipeline_bind_map bind_map;
+
+ uint32_t prog_data_size;
+
+ /* Prog data follows, then the key, both aligned to 8-bytes */
+};
+
+struct anv_shader_bin *
+anv_shader_bin_create(struct anv_device *device,
+ const void *key, uint32_t key_size,
+ const void *kernel, uint32_t kernel_size,
+ const void *prog_data, uint32_t prog_data_size,
+ const struct anv_pipeline_bind_map *bind_map);
+
+void
+anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader);
+
+static inline void
+anv_shader_bin_ref(struct anv_shader_bin *shader)
+{
+ assert(shader->ref_cnt >= 1);
+ __sync_fetch_and_add(&shader->ref_cnt, 1);
+}
+
+static inline void
+anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
+{
+ assert(shader->ref_cnt >= 1);
+ if (__sync_fetch_and_add(&shader->ref_cnt, -1) == 1)
+ anv_shader_bin_destroy(device, shader);
+}
+
+static inline const struct brw_stage_prog_data *
+anv_shader_bin_get_prog_data(const struct anv_shader_bin *shader)
+{
+ const void *data = shader;
+ data += align_u32(sizeof(struct anv_shader_bin), 8);
+ return data;
+}
+
struct anv_pipeline {
struct anv_device * device;
struct anv_batch batch;
@@ -1465,12 +1514,12 @@ struct anv_pipeline {
struct anv_dynamic_state dynamic_state;
struct anv_pipeline_layout * layout;
- struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES];
bool use_repclear;
bool needs_data_cache;
- const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES];
+ struct anv_shader_bin * shaders[MESA_SHADER_STAGES];
+
struct {
uint32_t start[MESA_SHADER_GEOMETRY + 1];
uint32_t size[MESA_SHADER_GEOMETRY + 1];
@@ -1513,29 +1562,29 @@ struct anv_pipeline {
} gen9;
};
-static inline const struct brw_vs_prog_data *
-get_vs_prog_data(struct anv_pipeline *pipeline)
-{
- return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX];
-}
-
-static inline const struct brw_gs_prog_data *
-get_gs_prog_data(struct anv_pipeline *pipeline)
+static inline bool
+anv_pipeline_has_stage(const struct anv_pipeline *pipeline,
+ gl_shader_stage stage)
{
- return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY];
+ return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
}
-static inline const struct brw_wm_prog_data *
-get_wm_prog_data(struct anv_pipeline *pipeline)
-{
- return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT];
+#define ANV_DECL_GET_PROG_DATA_FUNC(prefix, stage) \
+static inline const struct brw_##prefix##_prog_data * \
+get_##prefix##_prog_data(struct anv_pipeline *pipeline) \
+{ \
+ if (anv_pipeline_has_stage(pipeline, stage)) { \
+ return (const struct brw_##prefix##_prog_data *) \
+ anv_shader_bin_get_prog_data(pipeline->shaders[stage]); \
+ } else { \
+ return NULL; \
+ } \
}
-static inline const struct brw_cs_prog_data *
-get_cs_prog_data(struct anv_pipeline *pipeline)
-{
- return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE];
-}
+ANV_DECL_GET_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
+ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
+ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
+ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE)
struct anv_graphics_pipeline_create_info {
/**
diff --git a/src/intel/vulkan/anv_wsi_wayland.c b/src/intel/vulkan/anv_wsi_wayland.c
index a9e1617..18dae0a 100644
--- a/src/intel/vulkan/anv_wsi_wayland.c
+++ b/src/intel/vulkan/anv_wsi_wayland.c
@@ -519,6 +519,7 @@ wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain,
if (!chain->images[i].busy) {
/* We found a non-busy image */
*image_index = i;
+ chain->images[i].busy = true;
return VK_SUCCESS;
}
}
diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c
index 2895d6b..81c524b 100644
--- a/src/intel/vulkan/anv_wsi_x11.c
+++ b/src/intel/vulkan/anv_wsi_x11.c
@@ -516,6 +516,7 @@ x11_acquire_next_image(struct anv_swapchain *anv_chain,
/* We found a non-busy image */
xshmfence_await(chain->images[i].shm_fence);
*image_index = i;
+ chain->images[i].busy = true;
return VK_SUCCESS;
}
}
@@ -553,6 +554,7 @@ x11_queue_present(struct anv_swapchain *anv_chain,
xshmfence_reset(image->shm_fence);
+ ++chain->send_sbc;
xcb_void_cookie_t cookie =
xcb_present_pixmap(chain->conn,
chain->window,
@@ -786,6 +788,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
chain->window = surface->window;
chain->extent = pCreateInfo->imageExtent;
chain->image_count = num_images;
+ chain->send_sbc = 0;
chain->event_id = xcb_generate_id(chain->conn);
xcb_present_select_input(chain->conn, chain->event_id, chain->window,
diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c
index 89cb51f..d1b18e0 100644
--- a/src/intel/vulkan/gen7_pipeline.c
+++ b/src/intel/vulkan/gen7_pipeline.c
@@ -75,76 +75,6 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline,
GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf);
}
-static void
-gen7_emit_cb_state(struct anv_pipeline *pipeline,
- const VkPipelineColorBlendStateCreateInfo *info,
- const VkPipelineMultisampleStateCreateInfo *ms_info)
-{
- struct anv_device *device = pipeline->device;
-
- if (info == NULL || info->attachmentCount == 0) {
- pipeline->blend_state =
- anv_state_pool_emit(&device->dynamic_state_pool,
- GENX(BLEND_STATE), 64,
- .ColorBufferBlendEnable = false,
- .WriteDisableAlpha = true,
- .WriteDisableRed = true,
- .WriteDisableGreen = true,
- .WriteDisableBlue = true);
- } else {
- const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0];
- struct GENX(BLEND_STATE) blend = {
- .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
- .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
-
- .LogicOpEnable = info->logicOpEnable,
- .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
- .ColorBufferBlendEnable = a->blendEnable,
- .ColorClampRange = COLORCLAMP_RTFORMAT,
- .PreBlendColorClampEnable = true,
- .PostBlendColorClampEnable = true,
- .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
- .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
- .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
- .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
- .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
- .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
- .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
- .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
- .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
- .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
- };
-
- /* Our hardware applies the blend factor prior to the blend function
- * regardless of what function is used. Technically, this means the
- * hardware can do MORE than GL or Vulkan specify. However, it also
- * means that, for MIN and MAX, we have to stomp the blend factor to
- * ONE to make it a no-op.
- */
- if (a->colorBlendOp == VK_BLEND_OP_MIN ||
- a->colorBlendOp == VK_BLEND_OP_MAX) {
- blend.SourceBlendFactor = BLENDFACTOR_ONE;
- blend.DestinationBlendFactor = BLENDFACTOR_ONE;
- }
- if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
- a->alphaBlendOp == VK_BLEND_OP_MAX) {
- blend.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
- blend.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
- }
-
- pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool,
- GENX(BLEND_STATE_length) * 4,
- 64);
- GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend);
- if (pipeline->device->info.has_llc)
- anv_state_clflush(pipeline->blend_state);
- }
-
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
- bsp.BlendStatePointer = pipeline->blend_state.offset;
- }
-}
-
VkResult
genX(graphics_pipeline_create)(
VkDevice _device,
@@ -182,31 +112,13 @@ genX(graphics_pipeline_create)(
emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass);
- gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
- pCreateInfo->pMultisampleState);
+ emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
+ pCreateInfo->pMultisampleState);
emit_urb_setup(pipeline);
- const VkPipelineRasterizationStateCreateInfo *rs_info =
- pCreateInfo->pRasterizationState;
-
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
- clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace],
- clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode],
- clip.ClipEnable = !(extra && extra->use_rectlist),
- clip.APIMode = APIMODE_OGL,
- clip.ViewportXYClipTestEnable = true,
- clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable,
- clip.ClipMode = CLIPMODE_NORMAL,
-
- clip.TriangleStripListProvokingVertexSelect = 0,
- clip.LineStripListProvokingVertexSelect = 0,
- clip.TriangleFanProvokingVertexSelect = 1,
-
- clip.MinimumPointWidth = 0.125,
- clip.MaximumPointWidth = 255.875,
- clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1;
- }
+ emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
+ pCreateInfo->pRasterizationState, extra);
if (pCreateInfo->pMultisampleState &&
pCreateInfo->pMultisampleState->rasterizationSamples > 1)
@@ -385,6 +297,7 @@ genX(graphics_pipeline_create)(
wm.LineEndCapAntialiasingRegionWidth = 0; /* 0.5 pixels */
wm.LineAntialiasingRegionWidth = 1; /* 1.0 pixels */
wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
+ wm.PixelShaderKillPixel = wm_prog_data->uses_kill;
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c
index 6d70df6..cc10d3a 100644
--- a/src/intel/vulkan/gen8_pipeline.c
+++ b/src/intel/vulkan/gen8_pipeline.c
@@ -100,123 +100,6 @@ emit_rs_state(struct anv_pipeline *pipeline,
}
static void
-emit_cb_state(struct anv_pipeline *pipeline,
- const VkPipelineColorBlendStateCreateInfo *info,
- const VkPipelineMultisampleStateCreateInfo *ms_info)
-{
- struct anv_device *device = pipeline->device;
-
- uint32_t num_dwords = GENX(BLEND_STATE_length);
- pipeline->blend_state =
- anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
-
- struct GENX(BLEND_STATE) blend_state = {
- .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
- .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
- };
-
- /* Default everything to disabled */
- for (uint32_t i = 0; i < 8; i++) {
- blend_state.Entry[i].WriteDisableAlpha = true;
- blend_state.Entry[i].WriteDisableRed = true;
- blend_state.Entry[i].WriteDisableGreen = true;
- blend_state.Entry[i].WriteDisableBlue = true;
- }
-
- struct anv_pipeline_bind_map *map =
- &pipeline->bindings[MESA_SHADER_FRAGMENT];
-
- bool has_writeable_rt = false;
- for (unsigned i = 0; i < map->surface_count; i++) {
- struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
-
- /* All color attachments are at the beginning of the binding table */
- if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
- break;
-
- /* We can have at most 8 attachments */
- assert(i < 8);
-
- if (binding->index >= info->attachmentCount)
- continue;
-
- assert(binding->binding == 0);
- const VkPipelineColorBlendAttachmentState *a =
- &info->pAttachments[binding->index];
-
- if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
- a->dstColorBlendFactor != a->dstAlphaBlendFactor ||
- a->colorBlendOp != a->alphaBlendOp) {
- blend_state.IndependentAlphaBlendEnable = true;
- }
-
- blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) {
- .LogicOpEnable = info->logicOpEnable,
- .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
- .ColorBufferBlendEnable = a->blendEnable,
- .PreBlendSourceOnlyClampEnable = false,
- .ColorClampRange = COLORCLAMP_RTFORMAT,
- .PreBlendColorClampEnable = true,
- .PostBlendColorClampEnable = true,
- .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
- .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
- .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
- .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
- .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
- .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
- .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
- .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
- .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
- .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
- };
-
- if (a->colorWriteMask != 0)
- has_writeable_rt = true;
-
- /* Our hardware applies the blend factor prior to the blend function
- * regardless of what function is used. Technically, this means the
- * hardware can do MORE than GL or Vulkan specify. However, it also
- * means that, for MIN and MAX, we have to stomp the blend factor to
- * ONE to make it a no-op.
- */
- if (a->colorBlendOp == VK_BLEND_OP_MIN ||
- a->colorBlendOp == VK_BLEND_OP_MAX) {
- blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE;
- blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE;
- }
- if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
- a->alphaBlendOp == VK_BLEND_OP_MAX) {
- blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE;
- blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
- }
- }
-
- struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0];
-
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) {
- blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
- blend.HasWriteableRT = has_writeable_rt;
- blend.ColorBufferBlendEnable = bs0->ColorBufferBlendEnable;
- blend.SourceAlphaBlendFactor = bs0->SourceAlphaBlendFactor;
- blend.DestinationAlphaBlendFactor = bs0->DestinationAlphaBlendFactor;
- blend.SourceBlendFactor = bs0->SourceBlendFactor;
- blend.DestinationBlendFactor = bs0->DestinationBlendFactor;
- blend.AlphaTestEnable = false;
- blend.IndependentAlphaBlendEnable =
- blend_state.IndependentAlphaBlendEnable;
- }
-
- GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
- if (!device->info.has_llc)
- anv_state_clflush(pipeline->blend_state);
-
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
- bsp.BlendStatePointer = pipeline->blend_state.offset;
- bsp.BlendStatePointerValid = true;
- }
-}
-
-static void
emit_ms_state(struct anv_pipeline *pipeline,
const VkPipelineMultisampleStateCreateInfo *info)
{
@@ -303,29 +186,10 @@ genX(graphics_pipeline_create)(
emit_urb_setup(pipeline);
- const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
- clip.ClipEnable = !(extra && extra->use_rectlist);
- clip.EarlyCullEnable = true;
- clip.APIMode = 1; /* D3D */
- clip.ViewportXYClipTestEnable = true;
-
- clip.ClipMode =
- pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
- REJECT_ALL : NORMAL;
-
- clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
- (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0;
-
- clip.TriangleStripListProvokingVertexSelect = 0;
- clip.LineStripListProvokingVertexSelect = 0;
- clip.TriangleFanProvokingVertexSelect = 1;
-
- clip.MinimumPointWidth = 0.125;
- clip.MaximumPointWidth = 255.875;
- clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1;
- }
+ emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
+ pCreateInfo->pRasterizationState, extra);
+ const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
wm.StatisticsEnable = true;
wm.LineEndCapAntialiasingRegionWidth = _05pixels;
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 741d5bf..f92d856 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1448,4 +1448,17 @@ void genX(CmdCopyQueryPoolResults)(
}
}
+#else
+void genX(CmdCopyQueryPoolResults)(
+ VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t firstQuery,
+ uint32_t queryCount,
+ VkBuffer destBuffer,
+ VkDeviceSize destOffset,
+ VkDeviceSize destStride,
+ VkQueryResultFlags flags)
+{
+ anv_finishme("Queries not yet supported on Ivy Bridge");
+}
#endif
diff --git a/src/intel/vulkan/genX_l3.c b/src/intel/vulkan/genX_l3.c
index 0d36e3c..a74071c 100644
--- a/src/intel/vulkan/genX_l3.c
+++ b/src/intel/vulkan/genX_l3.c
@@ -315,10 +315,14 @@ get_pipeline_state_l3_weights(const struct anv_pipeline *pipeline)
bool needs_dc = false, needs_slm = false;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- const struct brw_stage_prog_data *prog_data = pipeline->prog_data[i];
+ if (!anv_pipeline_has_stage(pipeline, i))
+ continue;
+
+ const struct brw_stage_prog_data *prog_data =
+ anv_shader_bin_get_prog_data(pipeline->shaders[i]);
needs_dc |= pipeline->needs_data_cache;
- needs_slm |= prog_data && prog_data->total_shared;
+ needs_slm |= prog_data->total_shared;
}
return get_default_l3_weights(&pipeline->device->info,
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 5cbcfd2..7d8129d 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -63,8 +63,7 @@ genX(compute_pipeline_create)(
/* When we free the pipeline, we detect stages based on the NULL status
* of various prog_data pointers. Make them NULL by default.
*/
- memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
- memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
+ memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
pipeline->vs_simd8 = NO_KERNEL;
pipeline->vs_vec4 = NO_KERNEL;
@@ -76,9 +75,13 @@ genX(compute_pipeline_create)(
assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module);
- anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
- pCreateInfo->stage.pName,
- pCreateInfo->stage.pSpecializationInfo);
+ result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
+ pCreateInfo->stage.pName,
+ pCreateInfo->stage.pSpecializationInfo);
+ if (result != VK_SUCCESS) {
+ anv_free2(&device->alloc, pAllocator, pipeline);
+ return result;
+ }
pipeline->use_repclear = false;
diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h
index 669b456..94692e4 100644
--- a/src/intel/vulkan/genX_pipeline_util.h
+++ b/src/intel/vulkan/genX_pipeline_util.h
@@ -291,6 +291,11 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline)
if (input_index < 0)
continue;
+ if (attr == VARYING_SLOT_PNTC) {
+ sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
+ continue;
+ }
+
const int slot = fs_input_map->varying_to_slot[attr];
if (input_index >= 16)
@@ -512,3 +517,177 @@ emit_ds_state(struct anv_pipeline *pipeline,
GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, depth_stencil_dw, &depth_stencil);
#endif
}
+
+static void
+emit_cb_state(struct anv_pipeline *pipeline,
+ const VkPipelineColorBlendStateCreateInfo *info,
+ const VkPipelineMultisampleStateCreateInfo *ms_info)
+{
+ struct anv_device *device = pipeline->device;
+
+ const uint32_t num_dwords = GENX(BLEND_STATE_length);
+ pipeline->blend_state =
+ anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
+
+ struct GENX(BLEND_STATE) blend_state = {
+#if GEN_GEN >= 8
+ .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
+ .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
+#else
+ /* Make sure it gets zeroed */
+ .Entry = { { 0, }, },
+#endif
+ };
+
+ /* Default everything to disabled */
+ for (uint32_t i = 0; i < 8; i++) {
+ blend_state.Entry[i].WriteDisableAlpha = true;
+ blend_state.Entry[i].WriteDisableRed = true;
+ blend_state.Entry[i].WriteDisableGreen = true;
+ blend_state.Entry[i].WriteDisableBlue = true;
+ }
+
+ uint32_t surface_count = 0;
+ struct anv_pipeline_bind_map *map;
+ if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
+ map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
+ surface_count = map->surface_count;
+ }
+
+ bool has_writeable_rt = false;
+ for (unsigned i = 0; i < surface_count; i++) {
+ struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
+
+ /* All color attachments are at the beginning of the binding table */
+ if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
+ break;
+
+ /* We can have at most 8 attachments */
+ assert(i < 8);
+
+ if (binding->index >= info->attachmentCount)
+ continue;
+
+ assert(binding->binding == 0);
+ const VkPipelineColorBlendAttachmentState *a =
+ &info->pAttachments[binding->index];
+
+ blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) {
+#if GEN_GEN < 8
+ .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
+ .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
+#endif
+ .LogicOpEnable = info->logicOpEnable,
+ .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
+ .ColorBufferBlendEnable = a->blendEnable,
+ .ColorClampRange = COLORCLAMP_RTFORMAT,
+ .PreBlendColorClampEnable = true,
+ .PostBlendColorClampEnable = true,
+ .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
+ .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
+ .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
+ .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
+ .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
+ .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
+ .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
+ .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
+ .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
+ .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
+ };
+
+ if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
+ a->dstColorBlendFactor != a->dstAlphaBlendFactor ||
+ a->colorBlendOp != a->alphaBlendOp) {
+#if GEN_GEN >= 8
+ blend_state.IndependentAlphaBlendEnable = true;
+#else
+ blend_state.Entry[i].IndependentAlphaBlendEnable = true;
+#endif
+ }
+
+ if (a->colorWriteMask != 0)
+ has_writeable_rt = true;
+
+ /* Our hardware applies the blend factor prior to the blend function
+ * regardless of what function is used. Technically, this means the
+ * hardware can do MORE than GL or Vulkan specify. However, it also
+ * means that, for MIN and MAX, we have to stomp the blend factor to
+ * ONE to make it a no-op.
+ */
+ if (a->colorBlendOp == VK_BLEND_OP_MIN ||
+ a->colorBlendOp == VK_BLEND_OP_MAX) {
+ blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE;
+ blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE;
+ }
+ if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
+ a->alphaBlendOp == VK_BLEND_OP_MAX) {
+ blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE;
+ blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
+ }
+ }
+
+#if GEN_GEN >= 8
+ struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0];
+ anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) {
+ blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
+ blend.HasWriteableRT = has_writeable_rt;
+ blend.ColorBufferBlendEnable = bs0->ColorBufferBlendEnable;
+ blend.SourceAlphaBlendFactor = bs0->SourceAlphaBlendFactor;
+ blend.DestinationAlphaBlendFactor = bs0->DestinationAlphaBlendFactor;
+ blend.SourceBlendFactor = bs0->SourceBlendFactor;
+ blend.DestinationBlendFactor = bs0->DestinationBlendFactor;
+ blend.AlphaTestEnable = false;
+ blend.IndependentAlphaBlendEnable =
+ blend_state.IndependentAlphaBlendEnable;
+ }
+#else
+ (void)has_writeable_rt;
+#endif
+
+ GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
+ if (!device->info.has_llc)
+ anv_state_clflush(pipeline->blend_state);
+
+ anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
+ bsp.BlendStatePointer = pipeline->blend_state.offset;
+#if GEN_GEN >= 8
+ bsp.BlendStatePointerValid = true;
+#endif
+ }
+}
+
+static void
+emit_3dstate_clip(struct anv_pipeline *pipeline,
+ const VkPipelineViewportStateCreateInfo *vp_info,
+ const VkPipelineRasterizationStateCreateInfo *rs_info,
+ const struct anv_graphics_pipeline_create_info *extra)
+{
+ const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+ (void) wm_prog_data;
+ anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
+ clip.ClipEnable = !(extra && extra->use_rectlist);
+ clip.EarlyCullEnable = true;
+ clip.APIMode = APIMODE_D3D,
+ clip.ViewportXYClipTestEnable = true;
+
+ clip.ClipMode = rs_info->rasterizerDiscardEnable ?
+ CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL;
+
+ clip.TriangleStripListProvokingVertexSelect = 0;
+ clip.LineStripListProvokingVertexSelect = 0;
+ clip.TriangleFanProvokingVertexSelect = 1;
+
+ clip.MinimumPointWidth = 0.125;
+ clip.MaximumPointWidth = 255.875;
+ clip.MaximumVPIndex = vp_info->viewportCount - 1;
+
+#if GEN_GEN == 7
+ clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace];
+ clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode];
+ clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable;
+#else
+ clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
+ (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0;
+#endif
+ }
+}
diff --git a/src/loader/loader.c b/src/loader/loader.c
index 522fba3..56ffc5d 100644
--- a/src/loader/loader.c
+++ b/src/loader/loader.c
@@ -80,8 +80,11 @@
#include "xmlpool.h"
#endif
#endif
-#ifdef HAVE_SYSFS
-#include <sys/types.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
+#include <sys/sysmacros.h>
#endif
#include "loader.h"
diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
index 896f225..4c2e849 100644
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -68,15 +68,10 @@ dri3_fence_await(xcb_connection_t *c, struct loader_dri3_buffer *buffer)
static void
dri3_update_num_back(struct loader_dri3_drawable *draw)
{
- draw->num_back = 1;
- if (draw->flipping) {
- if (!draw->is_pixmap &&
- !(draw->present_capabilities & XCB_PRESENT_CAPABILITY_ASYNC))
- draw->num_back++;
- draw->num_back++;
- }
- if (draw->vtable->get_swap_interval(draw) == 0)
- draw->num_back++;
+ if (draw->flipping)
+ draw->num_back = 3;
+ else
+ draw->num_back = 2;
}
void
@@ -785,6 +780,7 @@ loader_dri3_open(xcb_connection_t *conn,
}
fd = xcb_dri3_open_reply_fds(conn, reply)[0];
+ free(reply);
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
return fd;
@@ -1115,6 +1111,7 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, unsigned int format,
xcb_sync_fence_t sync_fence;
struct xshmfence *shm_fence;
int fence_fd;
+ __DRIscreen *cur_screen;
if (buffer)
return buffer;
@@ -1145,8 +1142,17 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, unsigned int format,
if (!bp_reply)
goto no_image;
+ /* Get the currently-bound screen or revert to using the drawable's screen if
+ * no contexts are currently bound. The latter case is at least necessary for
+ * obs-studio, when using Window Capture (Xcomposite) as a Source.
+ */
+ cur_screen = draw->vtable->get_dri_screen(draw);
+ if (!cur_screen) {
+ cur_screen = draw->dri_screen;
+ }
+
buffer->image = loader_dri3_create_image(draw->conn, bp_reply, format,
- draw->dri_screen, draw->ext->image,
+ cur_screen, draw->ext->image,
buffer);
if (!buffer->image)
goto no_image;
diff --git a/src/loader/loader_dri3_helper.h b/src/loader/loader_dri3_helper.h
index 5b8fd1d..658e190 100644
--- a/src/loader/loader_dri3_helper.h
+++ b/src/loader/loader_dri3_helper.h
@@ -103,6 +103,7 @@ struct loader_dri3_vtable {
void (*set_drawable_size)(struct loader_dri3_drawable *, int, int);
bool (*in_current_context)(struct loader_dri3_drawable *);
__DRIcontext *(*get_dri_context)(struct loader_dri3_drawable *);
+ __DRIscreen *(*get_dri_screen)(struct loader_dri3_drawable *);
void (*flush_drawable)(struct loader_dri3_drawable *, unsigned);
void (*show_fps)(struct loader_dri3_drawable *, uint64_t);
};
diff --git a/src/mapi/Makefile.am b/src/mapi/Makefile.am
index 68a28a2..b44341d 100644
--- a/src/mapi/Makefile.am
+++ b/src/mapi/Makefile.am
@@ -64,6 +64,9 @@ BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h
lib_LTLIBRARIES += shared-glapi/libglapi.la
shared_glapi_libglapi_la_SOURCES = $(MAPI_GLAPI_FILES) shared-glapi/glapi_mapi_tmp.h
+shared_glapi_libglapi_la_CFLAGS = \
+ $(AM_CFLAGS) \
+ $(VISIBILITY_CFLAGS)
shared_glapi_libglapi_la_CPPFLAGS = \
$(AM_CPPFLAGS) \
-DMAPI_MODE_GLAPI \
diff --git a/src/mapi/entry_x86-64_tls.h b/src/mapi/entry_x86-64_tls.h
index 38faccc..8f3fa91 100644
--- a/src/mapi/entry_x86-64_tls.h
+++ b/src/mapi/entry_x86-64_tls.h
@@ -25,6 +25,11 @@
* Chia-I Wu <olv@lunarg.com>
*/
+#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#define HIDDEN __attribute__((visibility("hidden")))
+#else
+#define HIDDEN
+#endif
__asm__(".text\n"
".balign 32\n"
@@ -54,8 +59,8 @@ entry_patch_public(void)
{
}
-static char
-x86_64_entry_start[];
+extern char
+x86_64_entry_start[] HIDDEN;
mapi_func
entry_get_public(int slot)
diff --git a/src/mapi/entry_x86_tls.h b/src/mapi/entry_x86_tls.h
index 46d2ece..545b5a3 100644
--- a/src/mapi/entry_x86_tls.h
+++ b/src/mapi/entry_x86_tls.h
@@ -27,6 +27,12 @@
#include <string.h>
+#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#define HIDDEN __attribute__((visibility("hidden")))
+#else
+#define HIDDEN
+#endif
+
__asm__(".text");
__asm__("x86_current_tls:\n\t"
@@ -71,8 +77,8 @@ __asm__(".text");
extern unsigned long
x86_current_tls();
-static char x86_entry_start[];
-static char x86_entry_end[];
+extern char x86_entry_start[] HIDDEN;
+extern char x86_entry_end[] HIDDEN;
void
entry_patch_public(void)
diff --git a/src/mapi/entry_x86_tsd.h b/src/mapi/entry_x86_tsd.h
index ea7bacb..0c28c8f 100644
--- a/src/mapi/entry_x86_tsd.h
+++ b/src/mapi/entry_x86_tsd.h
@@ -25,6 +25,11 @@
* Chia-I Wu <olv@lunarg.com>
*/
+#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#define HIDDEN __attribute__((visibility("hidden")))
+#else
+#define HIDDEN
+#endif
#define X86_ENTRY_SIZE 32
@@ -58,8 +63,8 @@ __asm__(".balign 32\n"
#include <string.h>
#include "u_execmem.h"
-static const char x86_entry_start[];
-static const char x86_entry_end[];
+extern const char x86_entry_start[] HIDDEN;
+extern const char x86_entry_end[] HIDDEN;
void
entry_patch_public(void)
diff --git a/src/mesa/Android.gen.mk b/src/mesa/Android.gen.mk
index e04482b..aaa2de9 100644
--- a/src/mesa/Android.gen.mk
+++ b/src/mesa/Android.gen.mk
@@ -70,7 +70,7 @@ define es-gen
$(hide) $(PRIVATE_SCRIPT) $(1) $(PRIVATE_XML) > $@
endef
-$(intermediates)/main/git_sha1.h: $(wildcard $(MESA_TOP)/.git/HEAD)
+$(intermediates)/main/git_sha1.h: $(wildcard $(MESA_TOP)/.git/ORIG_HEAD)
@mkdir -p $(dir $@)
@echo "GIT-SHA1: $(PRIVATE_MODULE) <= git"
$(hide) touch $@
diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
index 037384a..9710c7f 100644
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -33,11 +33,6 @@ if HAVE_OSMESA
SUBDIRS += drivers/osmesa
endif
-if HAVE_GLX
-gldir = $(includedir)/GL
-gl_HEADERS = $(top_srcdir)/include/GL/*.h
-endif
-
include Makefile.sources
EXTRA_DIST = \
@@ -161,11 +156,6 @@ libmesa_sse41_la_SOURCES = \
libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS)
-if HAVE_GLX
-pkgconfigdir = $(libdir)/pkgconfig
-pkgconfig_DATA = gl.pc
-endif
-
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
YACC_GEN = $(AM_V_GEN)$(YACC) $(YFLAGS)
LEX_GEN = $(AM_V_GEN)$(LEX) $(LFLAGS)
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 84db5a8..bd5b3d3 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -645,11 +645,11 @@ INCLUDE_DIRS = \
-I$(top_builddir)/src \
-I$(top_srcdir)/src \
-I$(top_builddir)/src/compiler/nir \
- -I$(top_srcdir)/src/mesa \
-I$(top_builddir)/src/mesa \
- -I$(top_srcdir)/src/mesa/main \
+ -I$(top_srcdir)/src/mesa \
-I$(top_builddir)/src/mesa/main \
- -I$(top_srcdir)/src/mapi \
+ -I$(top_srcdir)/src/mesa/main \
-I$(top_builddir)/src/mapi \
+ -I$(top_srcdir)/src/mapi \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 434800e..8f41174 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -15,13 +15,13 @@ env.MSVC2013Compat()
env.Append(CPPPATH = [
'../compiler/nir', # for generated nir_opcodes.h, etc
'#/src',
+ Dir('../mapi'), # src/mapi build path
'#/src/mapi',
'#/src/glsl',
+ Dir('.'), # src/mesa build path
'#/src/mesa',
'#/src/gallium/include',
'#/src/gallium/auxiliary',
- Dir('../mapi'), # src/mapi build path
- Dir('.'), # src/mesa build path
])
if env['platform'] == 'windows':
@@ -116,7 +116,7 @@ if env['platform'] not in ('cygwin', 'darwin', 'windows', 'haiku'):
)
# Add the dir containing the generated header (somewhere inside the
# build dir) to the include path
- env.Append(CPPPATH = [matypes[0].dir])
+ env.Prepend(CPPPATH = [matypes[0].dir])
def write_git_sha1_h_file(filename):
diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c
index e5a3f00..5607d5b 100644
--- a/src/mesa/drivers/dri/i915/intel_context.c
+++ b/src/mesa/drivers/dri/i915/intel_context.c
@@ -858,6 +858,7 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable)
struct __DRIimageList images;
unsigned int format;
uint32_t buffer_mask = 0;
+ int ret;
front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
@@ -877,12 +878,14 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable)
if (back_rb)
buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
- (*screen->image.loader->getBuffers) (drawable,
- driGLFormatToImageFormat(format),
- &drawable->dri2.stamp,
- drawable->loaderPrivate,
- buffer_mask,
- &images);
+ ret = screen->image.loader->getBuffers(drawable,
+ driGLFormatToImageFormat(format),
+ &drawable->dri2.stamp,
+ drawable->loaderPrivate,
+ buffer_mask,
+ &images);
+ if (!ret)
+ return;
if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
drawable->w = images.front->width;
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index f448551..194b412 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -48,6 +48,7 @@ i965_compiler_FILES = \
brw_nir_attribute_workarounds.c \
brw_nir_intrinsics.c \
brw_nir_opt_peephole_ffma.c \
+ brw_nir_tcs_workarounds.c \
brw_packed_float.c \
brw_predicated_break.cpp \
brw_reg.h \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
index 9590968..6be82c5 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -167,7 +167,8 @@ nir_uniform_type_size(const struct glsl_type *type)
}
const unsigned *
-brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir,
+brw_blorp_compile_nir_shader(struct brw_context *brw, void *mem_ctx,
+ struct nir_shader *nir,
const struct brw_wm_prog_key *wm_key,
bool use_repclear,
struct brw_blorp_prog_data *prog_data,
@@ -175,13 +176,6 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir,
{
const struct brw_compiler *compiler = brw->intelScreen->compiler;
- void *mem_ctx = ralloc_context(NULL);
-
- /* Calling brw_preprocess_nir and friends is destructive and, if cloning is
- * enabled, may end up completely replacing the nir_shader. Therefore, we
- * own it and might as well put it in our context for easy cleanup.
- */
- ralloc_steal(mem_ctx, nir);
nir->options =
compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions;
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index 7ec5875..133a8ac 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -366,7 +366,8 @@ struct brw_blorp_blit_prog_key
void brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key);
const unsigned *
-brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir,
+brw_blorp_compile_nir_shader(struct brw_context *brw, void *mem_ctx,
+ struct nir_shader *nir,
const struct brw_wm_prog_key *wm_key,
bool use_repclear,
struct brw_blorp_prog_data *prog_data,
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 782d285..db94f33 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1296,7 +1296,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
* of samples).
*/
static nir_shader *
-brw_blorp_build_nir_shader(struct brw_context *brw,
+brw_blorp_build_nir_shader(struct brw_context *brw, void *mem_ctx,
const brw_blorp_blit_prog_key *key)
{
nir_ssa_def *src_pos, *dst_pos, *color;
@@ -1342,7 +1342,7 @@ brw_blorp_build_nir_shader(struct brw_context *brw,
(key->dst_samples == 0));
nir_builder b;
- nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL);
struct brw_blorp_blit_vars v;
brw_blorp_blit_vars_init(&b, &v, key);
@@ -1505,6 +1505,8 @@ brw_blorp_get_blit_kernel(struct brw_context *brw,
&params->wm_prog_kernel, &params->wm_prog_data))
return;
+ void *mem_ctx = ralloc_context(NULL);
+
const unsigned *program;
unsigned program_size;
struct brw_blorp_prog_data prog_data;
@@ -1512,7 +1514,7 @@ brw_blorp_get_blit_kernel(struct brw_context *brw,
/* Try and compile with NIR first. If that fails, fall back to the old
* method of building shaders manually.
*/
- nir_shader *nir = brw_blorp_build_nir_shader(brw, prog_key);
+ nir_shader *nir = brw_blorp_build_nir_shader(brw, mem_ctx, prog_key);
struct brw_wm_prog_key wm_key;
brw_blorp_init_wm_prog_key(&wm_key);
wm_key.tex.compressed_multisample_layout_mask =
@@ -1520,7 +1522,7 @@ brw_blorp_get_blit_kernel(struct brw_context *brw,
wm_key.tex.msaa_16 = prog_key->tex_samples == 16;
wm_key.multisample_fbo = prog_key->rt_samples > 1;
- program = brw_blorp_compile_nir_shader(brw, nir, &wm_key, false,
+ program = brw_blorp_compile_nir_shader(brw, mem_ctx, nir, &wm_key, false,
&prog_data, &program_size);
brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG,
@@ -1528,6 +1530,8 @@ brw_blorp_get_blit_kernel(struct brw_context *brw,
program, program_size,
&prog_data, sizeof(prog_data),
&params->wm_prog_kernel, &params->wm_prog_data);
+
+ ralloc_free(mem_ctx);
}
static void
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
index 2515a04..6400218 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
@@ -64,7 +64,7 @@ brw_blorp_params_get_clear_kernel(struct brw_context *brw,
void *mem_ctx = ralloc_context(NULL);
nir_builder b;
- nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "BLORP-clear");
nir_variable *u_color = nir_variable_create(b.shader, nir_var_uniform,
@@ -84,7 +84,8 @@ brw_blorp_params_get_clear_kernel(struct brw_context *brw,
struct brw_blorp_prog_data prog_data;
unsigned program_size;
const unsigned *program =
- brw_blorp_compile_nir_shader(brw, b.shader, &wm_key, use_replicated_data,
+ brw_blorp_compile_nir_shader(brw, mem_ctx,
+ b.shader, &wm_key, use_replicated_data,
&prog_data, &program_size);
brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG,
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 10e9f47..7d15c28 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -220,6 +220,8 @@ struct brw_tcs_prog_key
/** A bitfield of per-vertex outputs written. */
uint64_t outputs_written;
+ bool quads_workaround;
+
struct brw_sampler_prog_key_data tex;
};
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 1cb99da..2af42e0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5681,7 +5681,7 @@ fs_visitor::setup_gs_payload()
* have to multiply by VerticesIn to obtain the total storage requirement.
*/
if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >
- max_push_components) {
+ max_push_components || gs_prog_data->invocations > 1) {
gs_prog_data->base.include_vue_handles = true;
/* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 11c078a..91763d3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2322,23 +2322,23 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
break;
fs_reg m0 = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- fs_reg m0_2 = byte_offset(m0, 2 * sizeof(uint32_t));
+ fs_reg m0_2 = component(m0, 2);
- const fs_builder fwa_bld = bld.exec_all();
+ const fs_builder chanbld = bld.exec_all().group(1, 0);
/* Zero the message header */
- fwa_bld.MOV(m0, brw_imm_ud(0u));
+ bld.exec_all().MOV(m0, brw_imm_ud(0u));
/* Copy "Barrier ID" from r0.2, bits 16:13 */
- fwa_bld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+ chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(INTEL_MASK(16, 13)));
/* Shift it up to bits 27:24. */
- fwa_bld.SHL(m0_2, m0_2, brw_imm_ud(11));
+ chanbld.SHL(m0_2, m0_2, brw_imm_ud(11));
/* Set the Barrier Count and the enable bit */
- fwa_bld.OR(m0_2, m0_2,
- brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15)));
+ chanbld.OR(m0_2, m0_2,
+ brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15)));
bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0);
break;
@@ -4060,12 +4060,23 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
dest = get_nir_dest(instr->dest);
fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
- fs_reg offset = get_nir_src(instr->src[0]);
+ fs_reg offset;
fs_reg data1 = get_nir_src(instr->src[1]);
fs_reg data2;
if (op == BRW_AOP_CMPWR)
data2 = get_nir_src(instr->src[2]);
+ /* Get the offset */
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
+ offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+ } else {
+ offset = vgrf(glsl_type::uint_type);
+ bld.ADD(offset,
+ retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(instr->const_index[0]));
+ }
+
/* Emit the actual atomic operation operation */
fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index 74c354f..6185310 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -117,6 +117,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
bool brw_nir_apply_trig_workarounds(nir_shader *nir);
+void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
+
nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
const struct brw_device_info *devinfo,
const struct brw_sampler_prog_key_data *key,
diff --git a/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c
new file mode 100644
index 0000000..0626981
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "compiler/nir/nir_builder.h"
+#include "brw_nir.h"
+
+/**
+ * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
+ *
+ * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
+ * definition of the patch header layouts):
+ *
+ * "HW Bug: The Tessellation stage will incorrectly add domain points
+ * along patch edges under the following conditions, which may result
+ * in conformance failures and/or cracking artifacts:
+ *
+ * * QUAD domain
+ * * INTEGER partitioning
+ * * All three TessFactors in a given U or V direction (e.g., V
+ * direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
+ * * All three TessFactors in the other direction are > 1.0 and all
+ * round up to the same integer value (e.g, U direction:
+ * VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
+ *
+ * The suggested workaround (to be implemented as part of the postamble
+ * to the HS shader in the HS kernel) is:
+ *
+ * if (
+ * (TF[UEQ0] > 1.0) ||
+ * (TF[VEQ0] > 1.0) ||
+ * (TF[UEQ1] > 1.0) ||
+ * (TF[VEQ1] > 1.0) ||
+ * (TF[INSIDE_U] > 1.0) ||
+ * (TF[INSIDE_V] > 1.0) )
+ * {
+ * TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
+ * TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
+ * }"
+ *
+ * There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes
+ * that the above workaround fails to fix certain GL/ES CTS tests which
+ * have inside tessellation factors of -1.0. This can be explained by
+ * a quote from the ARB_tessellation_shader specification:
+ *
+ * "If "equal_spacing" is used, the floating-point tessellation level is
+ * first clamped to the range [1,<max>], where <max> is implementation-
+ * dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
+ *
+ * In other words, the actual inner tessellation factor used is
+ * clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped
+ * value against 1.0. To accomplish this, we change the comparison from
+ * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0).
+ */
+
+static inline nir_ssa_def *
+load_output(nir_builder *b, int num_components, int offset)
+{
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output);
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL);
+ load->num_components = num_components;
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+ nir_intrinsic_set_base(load, offset);
+
+ nir_builder_instr_insert(b, &load->instr);
+
+ return &load->dest.ssa;
+}
+
+static inline void
+store_output(nir_builder *b, nir_ssa_def *value, int offset, unsigned comps)
+{
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+ store->num_components = comps;
+ nir_intrinsic_set_write_mask(store, (1u << comps) - 1);
+ store->src[0] = nir_src_for_ssa(value);
+ store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+ nir_builder_instr_insert(b, &store->instr);
+}
+
+static void
+emit_quads_workaround(nir_builder *b, nir_block *block)
+{
+ /* We're going to insert a new if-statement in a predecessor of the end
+ * block. This would normally create a new block (after the if) which
+ * would then become the predecessor of the end block, causing our set
+ * walking to get screwed up. To avoid this, just emit a constant at
+ * the end of our current block, and insert the if before that.
+ */
+ b->cursor = nir_after_block_before_jump(block);
+ b->cursor = nir_before_instr(nir_imm_int(b, 0)->parent_instr);
+
+ nir_ssa_def *inner = load_output(b, 2, 0);
+ nir_ssa_def *outer = load_output(b, 4, 1);
+
+ nir_ssa_def *any_greater_than_1 =
+ nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)),
+ nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner)));
+
+ nir_if *if_stmt = nir_if_create(b->shader);
+ if_stmt->condition = nir_src_for_ssa(any_greater_than_1);
+ nir_builder_cf_insert(b, &if_stmt->cf_node);
+
+ /* Fill out the new then-block */
+ b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+ store_output(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner),
+ nir_imm_float(b, 2.0f), inner), 0, 2);
+}
+
+void
+brw_nir_apply_tcs_quads_workaround(nir_shader *nir)
+{
+ assert(nir->stage == MESA_SHADER_TESS_CTRL);
+
+ nir_foreach_function(func, nir) {
+ if (!func->impl)
+ continue;
+
+ nir_builder b;
+ nir_builder_init(&b, func->impl);
+
+ struct set_entry *entry;
+ set_foreach(func->impl->end_block->predecessors, entry) {
+ nir_block *pred = (nir_block *) entry->key;
+ emit_quads_workaround(&b, pred);
+ }
+
+ nir_metadata_preserve(func->impl, 0);
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
index a91c6e2..a42a322 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
@@ -686,12 +686,12 @@ stop_oa_counters(struct brw_context *brw)
* The amount of batch space it takes to emit an MI_REPORT_PERF_COUNT snapshot,
* including the required PIPE_CONTROL flushes.
*
- * Sandybridge is the worst case scenario: brw_emit_mi_flush
- * expands to three PIPE_CONTROLs which are 4 DWords each. We have to flush
- * before and after MI_REPORT_PERF_COUNT, so multiply by two. Finally, add
- * the 3 DWords for MI_REPORT_PERF_COUNT itself.
+ * Sandybridge is the worst case scenario: brw_emit_mi_flush expands to four
+ * PIPE_CONTROLs which are 5 DWords each. We have to flush before and after
+ * MI_REPORT_PERF_COUNT, so multiply by two. Finally, add the 3 DWords for
+ * MI_REPORT_PERF_COUNT itself.
*/
-#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (3 * 4) + 3)
+#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (4 * 5) + 3)
/**
* Emit an MI_REPORT_PERF_COUNT command packet.
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 4672efd..d51cf1b 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -96,10 +96,38 @@ gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
void
brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
{
+ if (brw->gen >= 6 &&
+ (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
+ (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
+ /* A pipe control command with flush and invalidate bits set
+ * simultaneously is an inherently racy operation on Gen6+ if the
+ * contents of the flushed caches were intended to become visible from
+ * any of the invalidated caches. Split it in two PIPE_CONTROLs, the
+ * first one should stall the pipeline to make sure that the flushed R/W
+ * caches are coherent with memory once the specified R/O caches are
+ * invalidated. On pre-Gen6 hardware the (implicit) R/O cache
+ * invalidation seems to happen at the bottom of the pipeline together
+ * with any write cache flush, so this shouldn't be a concern.
+ */
+ brw_emit_pipe_control_flush(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) |
+ PIPE_CONTROL_CS_STALL);
+ flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
+ }
+
if (brw->gen >= 8) {
if (brw->gen == 8)
gen8_add_cs_stall_workaround_bits(&flags);
+ if (brw->gen == 9 &&
+ (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
+ /* Hardware workaround: SKL
+ *
+ * Emit Pipe Control with all bits set to zero before emitting
+ * a Pipe Control with VF Cache Invalidate set.
+ */
+ brw_emit_pipe_control_flush(brw, 0);
+ }
+
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
OUT_BATCH(flags);
@@ -311,15 +339,6 @@ brw_emit_mi_flush(struct brw_context *brw)
} else {
int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
if (brw->gen >= 6) {
- if (brw->gen == 9) {
- /* Hardware workaround: SKL
- *
- * Emit Pipe Control with all bits set to zero before emitting
- * a Pipe Control with VF Cache Invalidate set.
- */
- brw_emit_pipe_control_flush(brw, 0);
- }
-
flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_VF_CACHE_INVALIDATE |
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c
index 8a5dd7e..6b7fde2 100644
--- a/src/mesa/drivers/dri/i965/brw_tcs.c
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c
@@ -153,6 +153,8 @@ brw_tcs_debug_recompile(struct brw_context *brw,
key->patch_outputs_written);
found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode,
key->tes_primitive_mode);
+ found |= key_debug(brw, "quads and equal_spacing workaround",
+ old_key->quads_workaround, key->quads_workaround);
found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
if (!found) {
@@ -346,6 +348,9 @@ brw_upload_tcs_prog(struct brw_context *brw,
* based on the domain the DS is expecting to tessellate.
*/
key.tes_primitive_mode = tep->program.PrimitiveMode;
+ key.quads_workaround = brw->gen < 9 &&
+ tep->program.PrimitiveMode == GL_QUADS &&
+ tep->program.Spacing == GL_EQUAL;
if (tcp) {
key.program_string_id = tcp->id;
@@ -383,6 +388,8 @@ brw_tcs_precompile(struct gl_context *ctx,
struct gl_tess_ctrl_program *tcp = (struct gl_tess_ctrl_program *)prog;
struct brw_tess_ctrl_program *btcp = brw_tess_ctrl_program(tcp);
+ const struct gl_shader *tes =
+ shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
memset(&key, 0, sizeof(key));
@@ -393,9 +400,14 @@ brw_tcs_precompile(struct gl_context *ctx,
if (brw->gen < 8)
key.input_vertices = shader_prog->TessCtrl.VerticesOut;
- key.tes_primitive_mode =
- shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] ?
- shader_prog->TessEval.PrimitiveMode : GL_TRIANGLES;
+ if (tes) {
+ key.tes_primitive_mode = shader_prog->TessEval.PrimitiveMode;
+ key.quads_workaround = brw->gen < 9 &&
+ shader_prog->TessEval.PrimitiveMode == GL_QUADS &&
+ shader_prog->TessEval.Spacing == GL_EQUAL;
+ } else {
+ key.tes_primitive_mode = GL_TRIANGLES;
+ }
key.outputs_written = prog->OutputsWritten;
key.patch_outputs_written = prog->PatchOutputsWritten;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 162b481..a7398a7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -354,95 +354,97 @@ vec4_visitor::opt_vector_float()
{
bool progress = false;
- int last_reg = -1, last_reg_offset = -1;
- enum brw_reg_file last_reg_file = BAD_FILE;
+ foreach_block(block, cfg) {
+ int last_reg = -1, last_reg_offset = -1;
+ enum brw_reg_file last_reg_file = BAD_FILE;
+
+ uint8_t imm[4] = { 0 };
+ int inst_count = 0;
+ vec4_instruction *imm_inst[4];
+ unsigned writemask = 0;
+ enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F;
+
+ foreach_inst_in_block_safe(vec4_instruction, inst, block) {
+ int vf = -1;
+ enum brw_reg_type need_type;
+
+ /* Look for unconditional MOVs from an immediate with a partial
+ * writemask. Skip type-conversion MOVs other than integer 0,
+ * where the type doesn't matter. See if the immediate can be
+ * represented as a VF.
+ */
+ if (inst->opcode == BRW_OPCODE_MOV &&
+ inst->src[0].file == IMM &&
+ inst->predicate == BRW_PREDICATE_NONE &&
+ inst->dst.writemask != WRITEMASK_XYZW &&
+ (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) {
+
+ vf = brw_float_to_vf(inst->src[0].d);
+ need_type = BRW_REGISTER_TYPE_D;
+
+ if (vf == -1) {
+ vf = brw_float_to_vf(inst->src[0].f);
+ need_type = BRW_REGISTER_TYPE_F;
+ }
+ } else {
+ last_reg = -1;
+ }
- uint8_t imm[4] = { 0 };
- int inst_count = 0;
- vec4_instruction *imm_inst[4];
- unsigned writemask = 0;
- enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F;
+ /* If this wasn't a MOV, or the destination register doesn't match,
+ * or we have to switch destination types, then this breaks our
+ * sequence. Combine anything we've accumulated so far.
+ */
+ if (last_reg != inst->dst.nr ||
+ last_reg_offset != inst->dst.reg_offset ||
+ last_reg_file != inst->dst.file ||
+ (vf > 0 && dest_type != need_type)) {
+
+ if (inst_count > 1) {
+ unsigned vf;
+ memcpy(&vf, imm, sizeof(vf));
+ vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf));
+ mov->dst.type = dest_type;
+ mov->dst.writemask = writemask;
+ inst->insert_before(block, mov);
+
+ for (int i = 0; i < inst_count; i++) {
+ imm_inst[i]->remove(block);
+ }
- foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
- int vf = -1;
- enum brw_reg_type need_type;
+ progress = true;
+ }
- /* Look for unconditional MOVs from an immediate with a partial
- * writemask. Skip type-conversion MOVs other than integer 0,
- * where the type doesn't matter. See if the immediate can be
- * represented as a VF.
- */
- if (inst->opcode == BRW_OPCODE_MOV &&
- inst->src[0].file == IMM &&
- inst->predicate == BRW_PREDICATE_NONE &&
- inst->dst.writemask != WRITEMASK_XYZW &&
- (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) {
-
- vf = brw_float_to_vf(inst->src[0].d);
- need_type = BRW_REGISTER_TYPE_D;
-
- if (vf == -1) {
- vf = brw_float_to_vf(inst->src[0].f);
- need_type = BRW_REGISTER_TYPE_F;
- }
- } else {
- last_reg = -1;
- }
+ inst_count = 0;
+ last_reg = -1;
+ writemask = 0;
+ dest_type = BRW_REGISTER_TYPE_F;
- /* If this wasn't a MOV, or the destination register doesn't match,
- * or we have to switch destination types, then this breaks our
- * sequence. Combine anything we've accumulated so far.
- */
- if (last_reg != inst->dst.nr ||
- last_reg_offset != inst->dst.reg_offset ||
- last_reg_file != inst->dst.file ||
- (vf > 0 && dest_type != need_type)) {
-
- if (inst_count > 1) {
- unsigned vf;
- memcpy(&vf, imm, sizeof(vf));
- vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf));
- mov->dst.type = dest_type;
- mov->dst.writemask = writemask;
- inst->insert_before(block, mov);
-
- for (int i = 0; i < inst_count; i++) {
- imm_inst[i]->remove(block);
+ for (int i = 0; i < 4; i++) {
+ imm[i] = 0;
}
-
- progress = true;
}
- inst_count = 0;
- last_reg = -1;
- writemask = 0;
- dest_type = BRW_REGISTER_TYPE_F;
-
- for (int i = 0; i < 4; i++) {
- imm[i] = 0;
+ /* Record this instruction's value (if it was representable). */
+ if (vf != -1) {
+ if ((inst->dst.writemask & WRITEMASK_X) != 0)
+ imm[0] = vf;
+ if ((inst->dst.writemask & WRITEMASK_Y) != 0)
+ imm[1] = vf;
+ if ((inst->dst.writemask & WRITEMASK_Z) != 0)
+ imm[2] = vf;
+ if ((inst->dst.writemask & WRITEMASK_W) != 0)
+ imm[3] = vf;
+
+ writemask |= inst->dst.writemask;
+ imm_inst[inst_count++] = inst;
+
+ last_reg = inst->dst.nr;
+ last_reg_offset = inst->dst.reg_offset;
+ last_reg_file = inst->dst.file;
+ if (vf > 0)
+ dest_type = need_type;
}
}
-
- /* Record this instruction's value (if it was representable). */
- if (vf != -1) {
- if ((inst->dst.writemask & WRITEMASK_X) != 0)
- imm[0] = vf;
- if ((inst->dst.writemask & WRITEMASK_Y) != 0)
- imm[1] = vf;
- if ((inst->dst.writemask & WRITEMASK_Z) != 0)
- imm[2] = vf;
- if ((inst->dst.writemask & WRITEMASK_W) != 0)
- imm[3] = vf;
-
- writemask |= inst->dst.writemask;
- imm_inst[inst_count++] = inst;
-
- last_reg = inst->dst.nr;
- last_reg_offset = inst->dst.reg_offset;
- last_reg_file = inst->dst.file;
- if (vf > 0)
- dest_type = need_type;
- }
}
if (progress)
@@ -1109,7 +1111,7 @@ vec4_visitor::opt_register_coalesce()
/* Can't coalesce this GRF if someone else was going to
* read it later.
*/
- if (var_range_end(var_from_reg(alloc, inst->src[0]), 4) > ip)
+ if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 4) > ip)
continue;
/* We need to check interference with the final destination between this
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 0c1f0c3..10898a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -246,7 +246,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
* more -- a sure sign they'll fail operands_match().
*/
if (src->file == VGRF) {
- if (var_range_end(var_from_reg(alloc, *src), 4) < ip) {
+ if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 4) < ip) {
entry->remove();
ralloc_free(entry);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 927438f..26a910c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -59,7 +59,10 @@ vec4_gs_visitor::make_reg_for_system_value(int location)
switch (location) {
case SYSTEM_VALUE_INVOCATION_ID:
this->current_annotation = "initialize gl_InvocationID";
- emit(GS_OPCODE_GET_INSTANCE_ID, *reg);
+ if (gs_prog_data->invocations > 1)
+ emit(GS_OPCODE_GET_INSTANCE_ID, *reg);
+ else
+ emit(MOV(*reg, brw_imm_ud(0)));
break;
default:
unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index f61c612..5440dba 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -451,6 +451,9 @@ brw_compile_tcs(const struct brw_compiler *compiler,
nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map);
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map);
+ if (key->quads_workaround)
+ brw_nir_apply_tcs_quads_workaround(nir);
+
nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
if (is_scalar)
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 609285e..61ada53 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1443,10 +1443,12 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw)
/* _NEW_PROGRAM */
struct gl_shader_program *prog =
ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+ /* BRW_NEW_CS_PROG_DATA */
+ const struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
- if (prog && brw->cs.prog_data->uses_num_work_groups) {
+ if (prog && cs_prog_data->uses_num_work_groups) {
const unsigned surf_idx =
- brw->cs.prog_data->binding_table.work_groups_start;
+ cs_prog_data->binding_table.work_groups_start;
uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
drm_intel_bo *bo;
uint32_t bo_offset;
@@ -1475,6 +1477,7 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw)
const struct brw_tracked_state brw_cs_work_groups_surface = {
.dirty = {
.brw = BRW_NEW_BLORP |
+ BRW_NEW_CS_PROG_DATA |
BRW_NEW_CS_WORK_GROUPS
},
.emit = brw_upload_cs_work_groups_surface,
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index 26de633..64ccdb6 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -50,6 +50,7 @@ upload_clip_state(struct brw_context *brw)
dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
}
+ /* BRW_NEW_VS_PROG_DATA */
dw1 |= brw->vs.prog_data->base.cull_distance_mask;
if (brw->gen >= 7)
@@ -224,6 +225,7 @@ const struct brw_tracked_state gen7_clip_state = {
BRW_NEW_CONTEXT |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_GEOMETRY_PROGRAM |
+ BRW_NEW_VS_PROG_DATA |
BRW_NEW_META_IN_PROGRESS |
BRW_NEW_PRIMITIVE |
BRW_NEW_RASTERIZER_DISCARD |
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index 5427fa5..b245226 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -283,7 +283,7 @@ gen7_upload_cs_push_constants(struct brw_context *brw)
(struct brw_compute_program *) brw->compute_program;
if (cp) {
- /* CACHE_NEW_CS_PROG */
+ /* BRW_NEW_CS_PROG_DATA */
struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
brw_upload_cs_push_constants(brw, &cp->program.Base, cs_prog_data,
@@ -297,6 +297,7 @@ const struct brw_tracked_state gen7_cs_push_constants = {
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_CS_PROG_DATA |
BRW_NEW_PUSH_CONSTANT_ALLOCATION,
},
.emit = gen7_upload_cs_push_constants,
diff --git a/src/mesa/drivers/dri/i965/gen8_ds_state.c b/src/mesa/drivers/dri/i965/gen8_ds_state.c
index 6f01abb..3b79b55 100644
--- a/src/mesa/drivers/dri/i965/gen8_ds_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ds_state.c
@@ -69,6 +69,7 @@ gen8_upload_ds_state(struct brw_context *brw)
GEN7_DS_SIMD8_DISPATCH_ENABLE : 0) |
(tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ?
GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0));
+ /* _NEW_TRANSFORM */
OUT_BATCH(SET_FIELD(ctx->Transform.ClipPlanesEnabled,
GEN8_DS_USER_CLIP_DISTANCE) |
SET_FIELD(vue_prog_data->cull_distance_mask,
@@ -106,7 +107,7 @@ gen8_upload_ds_state(struct brw_context *brw)
const struct brw_tracked_state gen8_ds_state = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_TRANSFORM,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_TESS_PROGRAMS |
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 8a904fe..f916d99 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -124,6 +124,7 @@ const struct brw_tracked_state gen8_ps_extra = {
.mesa = _NEW_BUFFERS | _NEW_COLOR,
.brw = BRW_NEW_BLORP |
BRW_NEW_CONTEXT |
+ BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA,
},
.emit = upload_ps_extra,
@@ -283,7 +284,6 @@ const struct brw_tracked_state gen8_ps_state = {
.mesa = _NEW_MULTISAMPLE,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
- BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_FS_PROG_DATA,
},
.emit = upload_ps_state,
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index aa1dc38..67e8e8f 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -21,13 +21,13 @@ extern "C" {
* - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes)
* - Disabling OA counters on Gen6+ (3 DWords = 12 bytes)
* - Ending MI_REPORT_PERF_COUNT on Gen5+, plus associated PIPE_CONTROLs:
- * - Two sets of PIPE_CONTROLs, which become 3 PIPE_CONTROLs each on SNB,
- * which are 5 DWords each ==> 2 * 3 * 5 * 4 = 120 bytes
+ * - Two sets of PIPE_CONTROLs, which become 4 PIPE_CONTROLs each on SNB,
+ * which are 5 DWords each ==> 2 * 4 * 5 * 4 = 160 bytes
* - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+. ==> 12 bytes.
* On Ironlake, it's 6 DWords, but we have some slack due to the lack of
* Sandybridge PIPE_CONTROL madness.
- * - CC_STATE workaround on HSW (12 * 4 = 48 bytes)
- * - 5 dwords for initial mi_flush
+ * - CC_STATE workaround on HSW (17 * 4 = 68 bytes)
+ * - 10 dwords for initial mi_flush
* - 2 dwords for CC state setup
* - 5 dwords for the required pipe control at the end
* - Restoring L3 configuration: (24 dwords = 96 bytes)
@@ -35,7 +35,7 @@ extern "C" {
* - 7 dwords for L3 configuration set-up.
* - 5 dwords for L3 atomic set-up (on HSW).
*/
-#define BATCH_RESERVED 248
+#define BATCH_RESERVED 308
struct intel_batchbuffer;
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 939f9a0..8a0d2ad 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -374,6 +374,19 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
if (!irb->mt)
return;
+ /* Adjust the miptree's upper-left coordinate.
+ *
+ * FIXME: Adjusting the miptree's layout outside of
+ * intel_miptree_create_layout() is fragile. Plumb the adjustment through
+ * intel_miptree_create_layout() and brw_tex_layout().
+ */
+ irb->mt->level[0].level_x = image->tile_x;
+ irb->mt->level[0].level_y = image->tile_y;
+ irb->mt->level[0].slice[0].x_offset = image->tile_x;
+ irb->mt->level[0].slice[0].y_offset = image->tile_y;
+ irb->mt->total_width += image->tile_x;
+ irb->mt->total_height += image->tile_y;
+
rb->InternalFormat = image->internal_format;
rb->Width = image->width;
rb->Height = image->height;
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b6265dc..e74a2dc 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -366,25 +366,8 @@ intel_miptree_create_layout(struct brw_context *brw,
_mesa_get_format_name(format),
first_level, last_level, depth0, mt);
- if (target == GL_TEXTURE_1D_ARRAY) {
- /* For a 1D Array texture the OpenGL API will treat the height0
- * parameter as the number of array slices. For Intel hardware, we treat
- * the 1D array as a 2D Array with a height of 1.
- *
- * So, when we first come through this path to create a 1D Array
- * texture, height0 stores the number of slices, and depth0 is 1. In
- * this case, we want to swap height0 and depth0.
- *
- * Since some miptrees will be created based on the base miptree, we may
- * come through this path and see height0 as 1 and depth0 being the
- * number of slices. In this case we don't need to do the swap.
- */
- assert(height0 == 1 || depth0 == 1);
- if (height0 > 1) {
- depth0 = height0;
- height0 = 1;
- }
- }
+ if (target == GL_TEXTURE_1D_ARRAY)
+ assert(height0 == 1);
mt->target = target;
mt->format = format;
@@ -1050,6 +1033,7 @@ intel_get_image_dims(struct gl_texture_image *image,
* as a 2D Array with a height of 1. So, here we want to swap image
* height and depth.
*/
+ assert(image->Depth == 1);
*width = image->Width;
*height = 1;
*depth = image->Height;
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index a486d6e..cacd7e2 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -110,22 +110,6 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
if (ctx->_ImageTransferState)
return false;
- /* This renderbuffer can come from a texture. In this case, we impose
- * some of the same restrictions we have for textures and adjust for
- * miplevels.
- */
- if (rb->TexImage) {
- if (rb->TexImage->TexObject->Target != GL_TEXTURE_2D &&
- rb->TexImage->TexObject->Target != GL_TEXTURE_RECTANGLE)
- return false;
-
- int level = rb->TexImage->Level + rb->TexImage->TexObject->MinLevel;
-
- /* Adjust x and y offset based on miplevel */
- xoffset += irb->mt->level[level].level_x;
- yoffset += irb->mt->level[level].level_y;
- }
-
/* It is possible that the renderbuffer (or underlying texture) is
* multisampled. Since ReadPixels from a multisampled buffer requires a
* multisample resolve, we can't handle this here
@@ -169,6 +153,9 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
return false;
}
+ xoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].x_offset;
+ yoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].y_offset;
+
dst_pitch = _mesa_image_row_stride(pack, width, format, type);
/* For a window-system renderbuffer, the buffer is actually flipped
@@ -201,7 +188,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
xoffset * cpp, (xoffset + width) * cpp,
yoffset, yoffset + height,
pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp,
- bo->virtual,
+ bo->virtual + irb->mt->offset,
dst_pitch, irb->mt->pitch,
brw->has_swizzling,
irb->mt->tiling,
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index 95365fe..7a82be4 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -134,6 +134,15 @@
#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
+#define PIPE_CONTROL_CACHE_FLUSH_BITS \
+ (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
+ PIPE_CONTROL_RENDER_TARGET_FLUSH)
+
+#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
+ (PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
+ PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+ PIPE_CONTROL_INSTRUCTION_INVALIDATE)
+
/** @} */
#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22))
diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c
index 3e359a5..39c9636 100644
--- a/src/mesa/drivers/dri/i965/intel_syncobj.c
+++ b/src/mesa/drivers/dri/i965/intel_syncobj.c
@@ -49,6 +49,7 @@ struct brw_fence {
/** The fence waits for completion of this batch. */
drm_intel_bo *batch_bo;
+ mtx_t mutex;
bool signalled;
};
@@ -58,10 +59,20 @@ struct intel_gl_sync_object {
};
static void
+brw_fence_init(struct brw_context *brw, struct brw_fence *fence)
+{
+ fence->brw = brw;
+ fence->batch_bo = NULL;
+ mtx_init(&fence->mutex, mtx_plain);
+}
+
+static void
brw_fence_finish(struct brw_fence *fence)
{
if (fence->batch_bo)
drm_intel_bo_unreference(fence->batch_bo);
+
+ mtx_destroy(&fence->mutex);
}
static void
@@ -77,7 +88,7 @@ brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
}
static bool
-brw_fence_has_completed(struct brw_fence *fence)
+brw_fence_has_completed_locked(struct brw_fence *fence)
{
if (fence->signalled)
return true;
@@ -92,13 +103,21 @@ brw_fence_has_completed(struct brw_fence *fence)
return false;
}
-/**
- * Return true if the function successfully signals or has already signalled.
- * (This matches the behavior expected from __DRI2fence::client_wait_sync).
- */
static bool
-brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
- uint64_t timeout)
+brw_fence_has_completed(struct brw_fence *fence)
+{
+ bool ret;
+
+ mtx_lock(&fence->mutex);
+ ret = brw_fence_has_completed_locked(fence);
+ mtx_unlock(&fence->mutex);
+
+ return ret;
+}
+
+static bool
+brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence,
+ uint64_t timeout)
{
if (fence->signalled)
return true;
@@ -123,6 +142,23 @@ brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
return true;
}
+/**
+ * Return true if the function successfully signals or has already signalled.
+ * (This matches the behavior expected from __DRI2fence::client_wait_sync).
+ */
+static bool
+brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence,
+ uint64_t timeout)
+{
+ bool ret;
+
+ mtx_lock(&fence->mutex);
+ ret = brw_fence_client_wait_locked(brw, fence, timeout);
+ mtx_unlock(&fence->mutex);
+
+ return ret;
+}
+
static void
brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence)
{
@@ -161,6 +197,7 @@ intel_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *s,
struct brw_context *brw = brw_context(ctx);
struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s;
+ brw_fence_init(brw, &sync->fence);
brw_fence_insert(brw, &sync->fence);
}
@@ -215,7 +252,7 @@ intel_dri_create_fence(__DRIcontext *ctx)
if (!fence)
return NULL;
- fence->brw = brw;
+ brw_fence_init(brw, fence);
brw_fence_insert(brw, fence);
return fence;
@@ -244,6 +281,12 @@ intel_dri_server_wait_sync(__DRIcontext *ctx, void *driver_fence, unsigned flags
{
struct brw_fence *fence = driver_fence;
+ /* We might be called here with a NULL fence as a result of WaitSyncKHR
+ * on a EGL_KHR_reusable_sync fence. Nothing to do here in such case.
+ */
+ if (!fence)
+ return;
+
brw_fence_server_wait(fence->brw, fence);
}
diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c
index cac33ac..a1364b9 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.c
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -140,6 +140,8 @@ intel_alloc_texture_storage(struct gl_context *ctx,
!intel_miptree_match_image(intel_texobj->mt, first_image) ||
intel_texobj->mt->last_level != levels - 1) {
intel_miptree_release(&intel_texobj->mt);
+
+ intel_get_image_dims(first_image, &width, &height, &depth);
intel_texobj->mt = intel_miptree_create(brw, texobj->Target,
first_image->TexFormat,
0, levels - 1,
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_frag.c b/src/mesa/drivers/dri/nouveau/nv20_state_frag.c
index 492ecdc..2c5c2db 100644
--- a/src/mesa/drivers/dri/nouveau/nv20_state_frag.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_frag.c
@@ -67,5 +67,5 @@ nv20_emit_frag(struct gl_context *ctx, int emit)
PUSH_DATA (push, in >> 32);
BEGIN_NV04(push, NV20_3D(RC_ENABLE), 1);
- PUSH_DATA (push, n);
+ PUSH_DATA (push, MAX2(1, n));
}
diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c
index 2d4bb70..6e006f8 100644
--- a/src/mesa/drivers/dri/swrast/swrast.c
+++ b/src/mesa/drivers/dri/swrast/swrast.c
@@ -484,14 +484,14 @@ swrast_map_renderbuffer(struct gl_context *ctx,
xrb->map_mode = mode;
xrb->map_x = x;
- xrb->map_y = y;
+ xrb->map_y = rb->Height - y - h;
xrb->map_w = w;
xrb->map_h = h;
stride = w * cpp;
xrb->Base.Buffer = malloc(h * stride);
- sPriv->swrast_loader->getImage(dPriv, x, rb->Height - y - h, w, h,
+ sPriv->swrast_loader->getImage(dPriv, x, xrb->map_y, w, h,
(char *) xrb->Base.Buffer,
dPriv->loaderPrivate);
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index a28c583..d8815af 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -378,17 +378,48 @@ draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
/* complicated error checking... */
for (output = 0; output < n; output++) {
- /* Section 4.2 (Whole Framebuffer Operations) of the OpenGL 3.0
+ destMask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]);
+
+ /* From the OpenGL 3.0 specification, page 258:
+ * "Each buffer listed in bufs must be one of the values from tables
+ * 4.5 or 4.6. Otherwise, an INVALID_ENUM error is generated.
+ */
+ if (destMask[output] == BAD_MASK) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
+ caller, _mesa_enum_to_string(buffers[output]));
+ return;
+ }
+
+ /* From the OpenGL 4.0 specification, page 256:
+ * "For both the default framebuffer and framebuffer objects, the
+ * constants FRONT, BACK, LEFT, RIGHT, and FRONT_AND_BACK are not
+ * valid in the bufs array passed to DrawBuffers, and will result in
+ * the error INVALID_ENUM. This restriction is because these
+ * constants may themselves refer to multiple buffers, as shown in
+ * table 4.4."
+ * Previous versions of the OpenGL specification say INVALID_OPERATION,
+ * but the Khronos conformance tests expect INVALID_ENUM.
+ */
+ if (_mesa_bitcount(destMask[output]) > 1) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
+ caller, _mesa_enum_to_string(buffers[output]));
+ return;
+ }
+
+ /* Section 4.2 (Whole Framebuffer Operations) of the OpenGL ES 3.0
* specification says:
*
- * "Each buffer listed in bufs must be BACK, NONE, or one of the values
- * from table 4.3 (NONE, COLOR_ATTACHMENTi)"
+ * "If the GL is bound to a draw framebuffer object, the ith buffer
+ * listed in bufs must be COLOR_ATTACHMENTi or NONE . Specifying a
+ * buffer out of order, BACK , or COLOR_ATTACHMENTm where m is greater
+ * than or equal to the value of MAX_- COLOR_ATTACHMENTS , will
+ * generate the error INVALID_OPERATION .
*/
- if (_mesa_is_gles3(ctx) && buffers[output] != GL_NONE &&
- buffers[output] != GL_BACK &&
+ if (_mesa_is_gles3(ctx) && _mesa_is_user_fbo(fb) &&
+ buffers[output] != GL_NONE &&
(buffers[output] < GL_COLOR_ATTACHMENT0 ||
buffers[output] >= GL_COLOR_ATTACHMENT0 + ctx->Const.MaxColorAttachments)) {
- _mesa_error(ctx, GL_INVALID_ENUM, "glDrawBuffers(buffer)");
+ _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawBuffers(buffer)");
return;
}
@@ -412,34 +443,6 @@ draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
return;
}
- destMask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]);
-
- /* From the OpenGL 3.0 specification, page 258:
- * "Each buffer listed in bufs must be one of the values from tables
- * 4.5 or 4.6. Otherwise, an INVALID_ENUM error is generated.
- */
- if (destMask[output] == BAD_MASK) {
- _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
- caller, _mesa_enum_to_string(buffers[output]));
- return;
- }
-
- /* From the OpenGL 4.0 specification, page 256:
- * "For both the default framebuffer and framebuffer objects, the
- * constants FRONT, BACK, LEFT, RIGHT, and FRONT_AND_BACK are not
- * valid in the bufs array passed to DrawBuffers, and will result in
- * the error INVALID_ENUM. This restriction is because these
- * constants may themselves refer to multiple buffers, as shown in
- * table 4.4."
- * Previous versions of the OpenGL specification say INVALID_OPERATION,
- * but the Khronos conformance tests expect INVALID_ENUM.
- */
- if (_mesa_bitcount(destMask[output]) > 1) {
- _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
- caller, _mesa_enum_to_string(buffers[output]));
- return;
- }
-
/* From the OpenGL 3.0 specification, page 259:
* "If the GL is bound to the default framebuffer and DrawBuffers is
* supplied with a constant (other than NONE) that does not indicate
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index bf47c1c..68da639 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -303,9 +303,21 @@ _mesa_get_fb0_attachment(struct gl_context *ctx, struct gl_framebuffer *fb,
switch (attachment) {
case GL_FRONT_LEFT:
- return &fb->Attachment[BUFFER_FRONT_LEFT];
+ /* Front buffers can be allocated on the first use, but
+ * glGetFramebufferAttachmentParameteriv must work even if that
+ * allocation hasn't happened yet. In such case, use the back buffer,
+ * which should be the same.
+ */
+ if (fb->Attachment[BUFFER_FRONT_LEFT].Type == GL_NONE)
+ return &fb->Attachment[BUFFER_BACK_LEFT];
+ else
+ return &fb->Attachment[BUFFER_FRONT_LEFT];
case GL_FRONT_RIGHT:
- return &fb->Attachment[BUFFER_FRONT_RIGHT];
+ /* Same as above. */
+ if (fb->Attachment[BUFFER_FRONT_RIGHT].Type == GL_NONE)
+ return &fb->Attachment[BUFFER_BACK_RIGHT];
+ else
+ return &fb->Attachment[BUFFER_FRONT_RIGHT];
case GL_BACK_LEFT:
return &fb->Attachment[BUFFER_BACK_LEFT];
case GL_BACK_RIGHT:
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index d72bc71..18dffc3 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -293,10 +293,9 @@ struct ureg {
GLuint file:4;
GLint idx:9; /* relative addressing may be negative */
/* sizeof(idx) should == sizeof(prog_src_reg::Index) */
- GLuint abs:1;
GLuint negate:1;
GLuint swz:12;
- GLuint pad:5;
+ GLuint pad:6;
};
@@ -325,7 +324,6 @@ static const struct ureg undef = {
0,
0,
0,
- 0,
0
};
@@ -344,7 +342,6 @@ static struct ureg make_ureg(GLuint file, GLint idx)
struct ureg reg;
reg.file = file;
reg.idx = idx;
- reg.abs = 0;
reg.negate = 0;
reg.swz = SWIZZLE_NOOP;
reg.pad = 0;
@@ -352,15 +349,6 @@ static struct ureg make_ureg(GLuint file, GLint idx)
}
-
-static struct ureg absolute( struct ureg reg )
-{
- reg.abs = 1;
- reg.negate = 0;
- return reg;
-}
-
-
static struct ureg negate( struct ureg reg )
{
reg.negate ^= 1;
@@ -961,7 +949,8 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p,
emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
- emit_op2(p, OPCODE_POW, spot, 0, absolute(spot), swizzle1(attenuation, W));
+ emit_op1(p, OPCODE_ABS, spot, 0, spot);
+ emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
release_temp(p, spot);
diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 215c14f..e9727ea 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -387,13 +387,13 @@ _is_target_supported(struct gl_context *ctx, GLenum target)
* "if a particular type of <target> is not supported by the
* implementation the "unsupported" answer should be given.
* This is not an error."
+ *
+ * For OpenGL ES, queries can only be used with GL_RENDERBUFFER or MS.
*/
switch(target){
+ case GL_TEXTURE_1D:
case GL_TEXTURE_2D:
case GL_TEXTURE_3D:
- break;
-
- case GL_TEXTURE_1D:
if (!_mesa_is_desktop_gl(ctx))
return false;
break;
@@ -404,12 +404,12 @@ _is_target_supported(struct gl_context *ctx, GLenum target)
break;
case GL_TEXTURE_2D_ARRAY:
- if (!(_mesa_has_EXT_texture_array(ctx) || _mesa_is_gles3(ctx)))
+ if (!_mesa_has_EXT_texture_array(ctx))
return false;
break;
case GL_TEXTURE_CUBE_MAP:
- if (!_mesa_has_ARB_texture_cube_map(ctx))
+ if (ctx->API != API_OPENGL_CORE && !_mesa_has_ARB_texture_cube_map(ctx))
return false;
break;
@@ -419,7 +419,7 @@ _is_target_supported(struct gl_context *ctx, GLenum target)
break;
case GL_TEXTURE_RECTANGLE:
- if (!_mesa_has_NV_texture_rectangle(ctx))
+ if (!_mesa_has_ARB_texture_rectangle(ctx))
return false;
break;
@@ -962,7 +962,8 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
switch (pname) {
case GL_INTERNALFORMAT_DEPTH_SIZE:
- if (!_mesa_has_ARB_depth_texture(ctx) &&
+ if (ctx->API != API_OPENGL_CORE &&
+ !_mesa_has_ARB_depth_texture(ctx) &&
target != GL_RENDERBUFFER &&
target != GL_TEXTURE_BUFFER)
goto end;
diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c
index d917220..2afe7be 100644
--- a/src/mesa/main/genmipmap.c
+++ b/src/mesa/main/genmipmap.c
@@ -85,10 +85,15 @@ _mesa_is_valid_generate_texture_mipmap_internalformat(struct gl_context *ctx,
* not specified with an unsized internal format from table 8.3 or a
* sized internal format that is both color-renderable and
* texture-filterable according to table 8.10."
+ *
+ * GL_EXT_texture_format_BGRA8888 adds a GL_BGRA_EXT unsized internal
+ * format, and includes it in a very similar looking table. So we
+ * include it here as well.
*/
return internalformat == GL_RGBA || internalformat == GL_RGB ||
internalformat == GL_LUMINANCE_ALPHA ||
internalformat == GL_LUMINANCE || internalformat == GL_ALPHA ||
+ internalformat == GL_BGRA_EXT ||
(_mesa_is_es3_color_renderable(internalformat) &&
_mesa_is_es3_texture_filterable(internalformat));
}
@@ -144,6 +149,11 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx,
return;
}
+ if (srcImage->Width == 0 || srcImage->Height == 0) {
+ _mesa_unlock_texture(ctx, texObj);
+ return;
+ }
+
if (target == GL_TEXTURE_CUBE_MAP) {
GLuint face;
for (face = 0; face < 6; face++) {
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 9f70749..7623b93 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -411,6 +411,14 @@ static const int extra_ARB_gpu_shader5_or_oes_geometry_shader[] = {
static const int extra_ARB_gpu_shader5_or_OES_sample_variables[] = {
EXT(ARB_gpu_shader5),
EXT(OES_sample_variables),
+ EXTRA_END
+};
+
+static const int extra_KHR_robustness_or_GL[] = {
+ EXT(KHR_robustness),
+ EXTRA_API_GL,
+ EXTRA_API_GL_CORE,
+ EXTRA_END
};
EXTRA_EXT(ARB_texture_cube_map);
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index bfcbfd6..ea3649a 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -338,6 +338,9 @@ descriptor=[
# blend_func_extended
[ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ],
+
+# GL_ARB_robustness / GL_KHR_robustness
+ [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), extra_KHR_robustness_or_GL" ],
]},
# GLES3 is not a typo.
@@ -842,9 +845,6 @@ descriptor=[
# GL 3.2
[ "CONTEXT_PROFILE_MASK", "CONTEXT_INT(Const.ProfileMask), extra_version_32" ],
-# GL_ARB_robustness
- [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ],
-
# GL_ARB_timer_query
[ "TIMESTAMP", "LOC_CUSTOM, TYPE_INT64, 0, extra_ARB_timer_query" ],
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index 24ce7b0..6df09bb 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -907,6 +907,29 @@ _mesa_is_astc_format(GLenum internalFormat)
}
/**
+ * Test if the given format is an ETC2 format.
+ */
+GLboolean
+_mesa_is_etc2_format(GLenum internalFormat)
+{
+ switch (internalFormat) {
+ case GL_COMPRESSED_RGB8_ETC2:
+ case GL_COMPRESSED_SRGB8_ETC2:
+ case GL_COMPRESSED_RGBA8_ETC2_EAC:
+ case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
+ case GL_COMPRESSED_R11_EAC:
+ case GL_COMPRESSED_RG11_EAC:
+ case GL_COMPRESSED_SIGNED_R11_EAC:
+ case GL_COMPRESSED_SIGNED_RG11_EAC:
+ case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+ case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
* Test if the given format is an integer (non-normalized) format.
*/
GLboolean
@@ -2495,7 +2518,6 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
case GL_RGBA8I_EXT:
case GL_RGBA16I_EXT:
case GL_RGBA32I_EXT:
- case GL_RGB10_A2UI:
return GL_RGBA;
case GL_RGB8UI_EXT:
case GL_RGB16UI_EXT:
@@ -2507,6 +2529,13 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
}
}
+ if (ctx->Extensions.ARB_texture_rgb10_a2ui) {
+ switch (internalFormat) {
+ case GL_RGB10_A2UI:
+ return GL_RGBA;
+ }
+ }
+
if (ctx->Extensions.EXT_texture_integer) {
switch (internalFormat) {
case GL_ALPHA8UI_EXT:
diff --git a/src/mesa/main/glformats.h b/src/mesa/main/glformats.h
index c73f464..474ede2 100644
--- a/src/mesa/main/glformats.h
+++ b/src/mesa/main/glformats.h
@@ -61,6 +61,9 @@ extern GLboolean
_mesa_is_astc_format(GLenum internalFormat);
extern GLboolean
+_mesa_is_etc2_format(GLenum internalFormat);
+
+extern GLboolean
_mesa_is_type_unsigned(GLenum type);
extern GLboolean
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 5956ce4..35ce0f2 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -1385,13 +1385,24 @@ _mesa_get_program_resourceiv(struct gl_shader_program *shProg,
static bool
validate_io(struct gl_shader_program *producer,
- struct gl_shader_program *consumer)
+ struct gl_shader_program *consumer,
+ gl_shader_stage producer_stage,
+ gl_shader_stage consumer_stage)
{
if (producer == consumer)
return true;
+ const bool nonarray_stage_to_array_stage =
+ producer_stage != MESA_SHADER_TESS_CTRL &&
+ (consumer_stage == MESA_SHADER_GEOMETRY ||
+ consumer_stage == MESA_SHADER_TESS_CTRL ||
+ consumer_stage == MESA_SHADER_TESS_EVAL);
+
bool valid = true;
+ void *name_buffer = NULL;
+ size_t name_buffer_size = 0;
+
gl_shader_variable const **outputs =
(gl_shader_variable const **) calloc(producer->NumProgramResourceList,
sizeof(gl_shader_variable *));
@@ -1463,11 +1474,52 @@ validate_io(struct gl_shader_program *producer,
}
}
} else {
+ char *consumer_name = consumer_var->name;
+
+ if (nonarray_stage_to_array_stage &&
+ consumer_var->interface_type != NULL &&
+ consumer_var->interface_type->is_array() &&
+ !is_gl_identifier(consumer_var->name)) {
+ const size_t name_len = strlen(consumer_var->name);
+
+ if (name_len >= name_buffer_size) {
+ free(name_buffer);
+
+ name_buffer_size = name_len + 1;
+ name_buffer = malloc(name_buffer_size);
+ if (name_buffer == NULL) {
+ valid = false;
+ goto out;
+ }
+ }
+
+ consumer_name = (char *) name_buffer;
+
+ char *s = strchr(consumer_var->name, '[');
+ if (s == NULL) {
+ valid = false;
+ goto out;
+ }
+
+ char *t = strchr(s, ']');
+ if (t == NULL) {
+ valid = false;
+ goto out;
+ }
+
+ assert(t[1] == '.' || t[1] == '[');
+
+ const ptrdiff_t base_name_len = s - consumer_var->name;
+
+ memcpy(consumer_name, consumer_var->name, base_name_len);
+ strcpy(consumer_name + base_name_len, t + 1);
+ }
+
for (unsigned j = 0; j < num_outputs; j++) {
const gl_shader_variable *const var = outputs[j];
if (!var->explicit_location &&
- strcmp(consumer_var->name, var->name) == 0) {
+ strcmp(consumer_name, var->name) == 0) {
producer_var = var;
match_index = j;
break;
@@ -1529,25 +1581,53 @@ validate_io(struct gl_shader_program *producer,
* Note that location mismatches are detected by the loops above that
* find the producer variable that goes with the consumer variable.
*/
- if (producer_var->type != consumer_var->type ||
- producer_var->interpolation != consumer_var->interpolation ||
- producer_var->precision != consumer_var->precision) {
+ if (nonarray_stage_to_array_stage) {
+ if (!consumer_var->type->is_array() ||
+ consumer_var->type->fields.array != producer_var->type) {
+ valid = false;
+ goto out;
+ }
+
+ if (consumer_var->interface_type != NULL) {
+ if (!consumer_var->interface_type->is_array() ||
+ consumer_var->interface_type->fields.array != producer_var->interface_type) {
+ valid = false;
+ goto out;
+ }
+ } else if (producer_var->interface_type != NULL) {
+ valid = false;
+ goto out;
+ }
+ } else {
+ if (producer_var->type != consumer_var->type) {
+ valid = false;
+ goto out;
+ }
+
+ if (producer_var->interface_type != consumer_var->interface_type) {
+ valid = false;
+ goto out;
+ }
+ }
+
+ if (producer_var->interpolation != consumer_var->interpolation) {
valid = false;
goto out;
}
- if (producer_var->outermost_struct_type != consumer_var->outermost_struct_type) {
+ if (producer_var->precision != consumer_var->precision) {
valid = false;
goto out;
}
- if (producer_var->interface_type != consumer_var->interface_type) {
+ if (producer_var->outermost_struct_type != consumer_var->outermost_struct_type) {
valid = false;
goto out;
}
}
out:
+ free(name_buffer);
free(outputs);
return valid && num_outputs == 0;
}
@@ -1579,7 +1659,9 @@ _mesa_validate_pipeline_io(struct gl_pipeline_object *pipeline)
if (shProg[idx]->_LinkedShaders[idx]->Stage == MESA_SHADER_COMPUTE)
break;
- if (!validate_io(shProg[prev], shProg[idx]))
+ if (!validate_io(shProg[prev], shProg[idx],
+ shProg[prev]->_LinkedShaders[prev]->Stage,
+ shProg[idx]->_LinkedShaders[idx]->Stage))
return false;
prev = idx;
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index fc3cc6b..3dde03f 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -502,13 +502,15 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
*/
if (format == rgba_format) {
rgba = dest;
- } else if (rgba == NULL) { /* Allocate the RGBA buffer only once */
+ } else {
need_convert = true;
- rgba = malloc(height * rgba_stride);
- if (!rgba) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()");
- ctx->Driver.UnmapTextureImage(ctx, texImage, img);
- return;
+ if (rgba == NULL) { /* Allocate the RGBA buffer only once */
+ rgba = malloc(height * rgba_stride);
+ if (!rgba) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()");
+ ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+ return;
+ }
}
}
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 58b7f27..7b13a28 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1300,6 +1300,7 @@ bool
_mesa_format_no_online_compression(const struct gl_context *ctx, GLenum format)
{
return _mesa_is_astc_format(format) ||
+ _mesa_is_etc2_format(format) ||
compressedteximage_only_format(ctx, format);
}
@@ -2587,10 +2588,16 @@ check_rtt_cb(GLuint key, void *data, void *userData)
att->Texture == texObj &&
att->TextureLevel == level &&
att->CubeMapFace == face) {
- _mesa_update_texture_renderbuffer(ctx, ctx->DrawBuffer, att);
+ _mesa_update_texture_renderbuffer(ctx, fb, att);
assert(att->Renderbuffer->TexImage);
/* Mark fb status as indeterminate to force re-validation */
fb->_Status = 0;
+
+ /* Make sure that the revalidation actually happens if this is
+ * being done to currently-bound buffers.
+ */
+ if (fb == ctx->DrawBuffer || fb == ctx->ReadBuffer)
+ ctx->NewState |= _NEW_BUFFERS;
}
}
}
diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c
index f4a0760..72ed869 100644
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -179,9 +179,7 @@ clear_texture_fields(struct gl_context *ctx,
return;
}
- _mesa_init_teximage_fields(ctx, texImage,
- 0, 0, 0, 0, /* w, h, d, border */
- GL_NONE, MESA_FORMAT_NONE);
+ _mesa_clear_texture_image(ctx, texImage);
}
}
}
diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
index 0847184..758d8b4 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -386,6 +386,7 @@ static void init_velement(struct pipe_vertex_element *velement,
}
static void init_velement_lowered(struct st_context *st,
+ const struct st_vertex_program *vp,
struct pipe_vertex_element *velements,
int src_offset, int format,
int instance_divisor, int vbo_index,
@@ -396,23 +397,33 @@ static void init_velement_lowered(struct st_context *st,
if (doubles) {
int lower_format;
- if (nr_components == 1)
+ if (nr_components < 2)
lower_format = PIPE_FORMAT_R32G32_UINT;
- else if (nr_components >= 2)
+ else
lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
init_velement(&velements[idx], src_offset,
lower_format, instance_divisor, vbo_index);
idx++;
- if (nr_components > 2) {
- if (nr_components == 3)
- lower_format = PIPE_FORMAT_R32G32_UINT;
- else if (nr_components >= 4)
- lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
+ if (idx < vp->num_inputs &&
+ vp->index_to_input[idx] == ST_DOUBLE_ATTRIB_PLACEHOLDER) {
+ if (nr_components >= 3) {
+ if (nr_components == 3)
+ lower_format = PIPE_FORMAT_R32G32_UINT;
+ else
+ lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
+
+ init_velement(&velements[idx], src_offset + 4 * sizeof(float),
+ lower_format, instance_divisor, vbo_index);
+ } else {
+ /* The values here are undefined. Fill in some conservative
+ * dummy values.
+ */
+ init_velement(&velements[idx], src_offset, PIPE_FORMAT_R32G32_UINT,
+ instance_divisor, vbo_index);
+ }
- init_velement(&velements[idx], src_offset + 4 * sizeof(float),
- lower_format, instance_divisor, vbo_index);
idx++;
}
} else {
@@ -435,10 +446,9 @@ setup_interleaved_attribs(struct st_context *st,
const struct st_vp_variant *vpv,
const struct gl_client_array **arrays,
struct pipe_vertex_buffer *vbuffer,
- struct pipe_vertex_element velements[],
- unsigned *num_velements)
+ struct pipe_vertex_element velements[])
{
- GLuint attr, attr_idx;
+ GLuint attr;
const GLubyte *low_addr = NULL;
GLboolean usingVBO; /* all arrays in a VBO? */
struct gl_buffer_object *bufobj;
@@ -481,15 +491,13 @@ setup_interleaved_attribs(struct st_context *st,
/* are the arrays in user space? */
usingVBO = _mesa_is_bufferobj(bufobj);
- attr_idx = 0;
- for (attr = 0; attr < vpv->num_inputs; attr++) {
+ for (attr = 0; attr < vpv->num_inputs;) {
const struct gl_client_array *array;
unsigned src_offset;
unsigned src_format;
array = get_client_array(vp, arrays, attr);
- if (!array)
- continue;
+ assert(array);
src_offset = (unsigned) (array->Ptr - low_addr);
assert(array->_ElementSize ==
@@ -501,13 +509,11 @@ setup_interleaved_attribs(struct st_context *st,
array->Normalized,
array->Integer);
- init_velement_lowered(st, velements, src_offset, src_format,
+ init_velement_lowered(st, vp, velements, src_offset, src_format,
array->InstanceDivisor, 0,
- array->Size, array->Doubles, &attr_idx);
+ array->Size, array->Doubles, &attr);
}
- *num_velements = attr_idx;
-
/*
* Return the vbuffer info and setup user-space attrib info, if needed.
*/
@@ -554,25 +560,25 @@ setup_non_interleaved_attribs(struct st_context *st,
const struct gl_client_array **arrays,
struct pipe_vertex_buffer vbuffer[],
struct pipe_vertex_element velements[],
- unsigned *num_velements)
+ unsigned *num_vbuffers)
{
struct gl_context *ctx = st->ctx;
- GLuint attr, attr_idx = 0;
+ GLuint attr;
- for (attr = 0; attr < vpv->num_inputs; attr++) {
+ *num_vbuffers = 0;
+
+ for (attr = 0; attr < vpv->num_inputs;) {
const GLuint mesaAttr = vp->index_to_input[attr];
const struct gl_client_array *array;
struct gl_buffer_object *bufobj;
GLsizei stride;
unsigned src_format;
+ unsigned bufidx;
array = get_client_array(vp, arrays, attr);
- if (!array) {
- vbuffer[attr].buffer = NULL;
- vbuffer[attr].user_buffer = NULL;
- vbuffer[attr].buffer_offset = 0;
- continue;
- }
+ assert(array);
+
+ bufidx = (*num_vbuffers)++;
stride = array->StrideB;
bufobj = array->BufferObj;
@@ -590,9 +596,9 @@ setup_non_interleaved_attribs(struct st_context *st,
return FALSE; /* out-of-memory error probably */
}
- vbuffer[attr].buffer = stobj->buffer;
- vbuffer[attr].user_buffer = NULL;
- vbuffer[attr].buffer_offset = pointer_to_offset(array->Ptr);
+ vbuffer[bufidx].buffer = stobj->buffer;
+ vbuffer[bufidx].user_buffer = NULL;
+ vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr);
}
else {
/* wrap user data */
@@ -609,13 +615,13 @@ setup_non_interleaved_attribs(struct st_context *st,
assert(ptr);
- vbuffer[attr].buffer = NULL;
- vbuffer[attr].user_buffer = ptr;
- vbuffer[attr].buffer_offset = 0;
+ vbuffer[bufidx].buffer = NULL;
+ vbuffer[bufidx].user_buffer = ptr;
+ vbuffer[bufidx].buffer_offset = 0;
}
/* common-case setup */
- vbuffer[attr].stride = stride; /* in bytes */
+ vbuffer[bufidx].stride = stride; /* in bytes */
src_format = st_pipe_vertex_format(array->Type,
array->Size,
@@ -623,13 +629,11 @@ setup_non_interleaved_attribs(struct st_context *st,
array->Normalized,
array->Integer);
- init_velement_lowered(st, velements, 0, src_format,
- array->InstanceDivisor, attr,
- array->Size, array->Doubles, &attr_idx);
-
+ init_velement_lowered(st, vp, velements, 0, src_format,
+ array->InstanceDivisor, bufidx,
+ array->Size, array->Doubles, &attr);
}
- *num_velements = attr_idx;
return TRUE;
}
@@ -641,7 +645,7 @@ static void update_array(struct st_context *st)
const struct st_vp_variant *vpv;
struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
- unsigned num_vbuffers, num_velements;
+ unsigned num_vbuffers;
st->vertex_array_out_of_memory = FALSE;
@@ -659,23 +663,21 @@ static void update_array(struct st_context *st)
* Setup the vbuffer[] and velements[] arrays.
*/
if (is_interleaved_arrays(vp, vpv, arrays)) {
- if (!setup_interleaved_attribs(st, vp, vpv, arrays, vbuffer, velements, &num_velements)) {
+ if (!setup_interleaved_attribs(st, vp, vpv, arrays, vbuffer, velements)) {
st->vertex_array_out_of_memory = TRUE;
return;
}
num_vbuffers = 1;
- if (num_velements == 0)
+ if (vpv->num_inputs == 0)
num_vbuffers = 0;
}
else {
if (!setup_non_interleaved_attribs(st, vp, vpv, arrays, vbuffer,
- velements, &num_velements)) {
+ velements, &num_vbuffers)) {
st->vertex_array_out_of_memory = TRUE;
return;
}
-
- num_vbuffers = vpv->num_inputs;
}
cso_set_vertex_buffers(st->cso_context, 0, num_vbuffers, vbuffer);
@@ -685,7 +687,7 @@ static void update_array(struct st_context *st)
st->last_num_vbuffers - num_vbuffers, NULL);
}
st->last_num_vbuffers = num_vbuffers;
- cso_set_vertex_elements(st->cso_context, num_velements, velements);
+ cso_set_vertex_elements(st->cso_context, vpv->num_inputs, velements);
}
diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index 4b7ad77..3d409a6 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -304,12 +304,10 @@ st_create_texture_sampler_view_from_stobj(struct st_context *st,
templ.target = gl_target_to_pipe(stObj->base.Target);
}
- if (swizzle != SWIZZLE_NOOP) {
- templ.swizzle_r = GET_SWZ(swizzle, 0);
- templ.swizzle_g = GET_SWZ(swizzle, 1);
- templ.swizzle_b = GET_SWZ(swizzle, 2);
- templ.swizzle_a = GET_SWZ(swizzle, 3);
- }
+ templ.swizzle_r = GET_SWZ(swizzle, 0);
+ templ.swizzle_g = GET_SWZ(swizzle, 1);
+ templ.swizzle_b = GET_SWZ(swizzle, 2);
+ templ.swizzle_a = GET_SWZ(swizzle, 3);
return st->pipe->create_sampler_view(st->pipe, stObj->pt, &templ);
}
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index 362cef4..1acec7c 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -313,11 +313,13 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
static inline GLboolean
is_scissor_enabled(struct gl_context *ctx, struct gl_renderbuffer *rb)
{
+ const struct gl_scissor_rect *scissor = &ctx->Scissor.ScissorArray[0];
+
return (ctx->Scissor.EnableFlags & 1) &&
- (ctx->Scissor.ScissorArray[0].X > 0 ||
- ctx->Scissor.ScissorArray[0].Y > 0 ||
- (unsigned) ctx->Scissor.ScissorArray[0].Width < rb->Width ||
- (unsigned) ctx->Scissor.ScissorArray[0].Height < rb->Height);
+ (scissor->X > 0 ||
+ scissor->Y > 0 ||
+ scissor->X + scissor->Width < rb->Width ||
+ scissor->Y + scissor->Height < rb->Height);
}
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 3db5749..c013d3b 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -203,8 +203,19 @@ st_draw_vbo(struct gl_context *ctx,
/* The VBO module handles restart for the non-indexed GLDrawArrays
* so we only set these fields for indexed drawing:
*/
- info.primitive_restart = ctx->Array._PrimitiveRestart;
- info.restart_index = _mesa_primitive_restart_index(ctx, ib->type);
+ if (ctx->Array._PrimitiveRestart) {
+ info.restart_index = _mesa_primitive_restart_index(ctx, ib->type);
+
+ /* Enable primitive restart only when the restart index can have an
+ * effect. This is required for correctness in radeonsi VI support,
+ * though other hardware may also benefit from taking a faster,
+ * non-restart path when possible.
+ */
+ if ((ibuffer.index_size >= 4) ||
+ (ibuffer.index_size >= 2 && info.restart_index <= 0xffff) ||
+ (info.restart_index <= 0xff))
+ info.primitive_restart = true;
+ }
}
else {
/* Transform feedback drawing is always non-indexed. */
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 9a280fc..5f76241 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -37,6 +37,7 @@
#include "main/enums.h"
#include "main/formats.h"
#include "main/glformats.h"
+#include "main/texcompress.h"
#include "main/texgetimage.h"
#include "main/teximage.h"
#include "main/texstore.h"
@@ -2282,6 +2283,12 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
}
if (pFormat == PIPE_FORMAT_NONE) {
+ /* lie about using etc1/etc2 natively if we do decoding tricks */
+ mFormat = _mesa_glenum_to_compressed_format(internalFormat);
+ if ((mFormat == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1) ||
+ (_mesa_is_format_etc2(mFormat) && !st->has_etc2))
+ return mFormat;
+
/* no luck at all */
return MESA_FORMAT_NONE;
}
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index aa443a5..ee117c9 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -255,6 +255,7 @@ public:
ir_instruction *ir;
GLboolean cond_update;
bool saturate;
+ bool is_64bit_expanded;
st_src_reg sampler; /**< sampler register */
int sampler_base;
int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */
@@ -515,7 +516,8 @@ public:
unsigned *array_size,
unsigned *base,
unsigned *index,
- st_src_reg *reladdr);
+ st_src_reg *reladdr,
+ bool opaque);
void calc_deref_offsets(ir_dereference *head,
ir_dereference *tail,
unsigned *array_elements,
@@ -523,6 +525,7 @@ public:
unsigned *index,
st_src_reg *indirect,
unsigned *location);
+ st_src_reg canonicalize_gather_offset(st_src_reg offset);
bool try_emit_mad(ir_expression *ir,
int mul_operand);
@@ -670,6 +673,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
inst->src[1] = src1;
inst->src[2] = src2;
inst->src[3] = src3;
+ inst->is_64bit_expanded = false;
inst->ir = ir;
inst->dead_mask = 0;
/* default to float, for paths where this is not initialized
@@ -792,6 +796,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
dinst->prev = NULL;
}
this->instructions.push_tail(dinst);
+ dinst->is_64bit_expanded = true;
/* modify the destination if we are splitting */
for (j = 0; j < 2; j++) {
@@ -1136,7 +1141,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_double(double val)
uval[0].u = *(uint32_t *)&val;
uval[1].u = *(((uint32_t *)&val) + 1);
src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
-
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
return src;
}
@@ -1958,12 +1963,14 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
break;
case ir_unop_bitcast_f2i:
- result_src = op[0];
- result_src.type = GLSL_TYPE_INT;
- break;
case ir_unop_bitcast_f2u:
- result_src = op[0];
- result_src.type = GLSL_TYPE_UINT;
+ /* Make sure we don't propagate the negate modifier to integer opcodes. */
+ if (op[0].negate)
+ emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+ else
+ result_src = op[0];
+ result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT :
+ GLSL_TYPE_UINT;
break;
case ir_unop_bitcast_i2f:
case ir_unop_bitcast_u2f:
@@ -2792,6 +2799,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *
assert(type->is_scalar() || type->is_vector());
+ l->type = type->base_type;
r->type = type->base_type;
if (cond) {
st_src_reg l_src = st_src_reg(*l);
@@ -2903,6 +2911,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
} else if (ir->rhs->as_expression() &&
this->instructions.get_tail() &&
ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
+ !((glsl_to_tgsi_instruction *)this->instructions.get_tail())->is_64bit_expanded &&
type_size(ir->lhs->type) == 1 &&
l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) {
/* To avoid emitting an extra MOV when assigning an expression to a
@@ -3144,7 +3153,7 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
st_src_reg offset;
unsigned array_size = 0, base = 0, index = 0;
- get_deref_offsets(deref, &array_size, &base, &index, &offset);
+ get_deref_offsets(deref, &array_size, &base, &index, &offset, false);
if (offset.file != PROGRAM_UNDEFINED) {
emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
@@ -3451,7 +3460,7 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
get_deref_offsets(img, &sampler_array_size, &sampler_base,
- (unsigned int *)&image.index, &reladdr);
+ (unsigned int *)&image.index, &reladdr, true);
if (reladdr.file != PROGRAM_UNDEFINED) {
emit_arl(ir, sampler_reladdr, reladdr);
image.reladdr = ralloc(mem_ctx, st_src_reg);
@@ -3811,7 +3820,8 @@ glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir,
unsigned *array_size,
unsigned *base,
unsigned *index,
- st_src_reg *reladdr)
+ st_src_reg *reladdr,
+ bool opaque)
{
GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target);
unsigned location = 0;
@@ -3836,12 +3846,27 @@ glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir,
*array_size = 1;
}
- if (location != 0xffffffff) {
+ if (opaque) {
+ assert(location != 0xffffffff);
*base += this->shader_program->UniformStorage[location].opaque[shader].index;
*index += this->shader_program->UniformStorage[location].opaque[shader].index;
}
}
+st_src_reg
+glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset)
+{
+ if (offset.reladdr || offset.reladdr2) {
+ st_src_reg tmp = get_temp(glsl_type::ivec2_type);
+ st_dst_reg tmp_dst = st_dst_reg(tmp);
+ tmp_dst.writemask = WRITEMASK_XY;
+ emit_asm(NULL, TGSI_OPCODE_MOV, tmp_dst, offset);
+ return tmp;
+ }
+
+ return offset;
+}
+
void
glsl_to_tgsi_visitor::visit(ir_texture *ir)
{
@@ -3967,9 +3992,10 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
offset[i].index += i * type_size(elt_type);
offset[i].type = elt_type->base_type;
offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
+ offset[i] = canonicalize_gather_offset(offset[i]);
}
} else {
- offset[0] = this->result;
+ offset[0] = canonicalize_gather_offset(this->result);
}
}
break;
@@ -4075,7 +4101,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
}
get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base,
- &sampler_index, &reladdr);
+ &sampler_index, &reladdr, true);
if (reladdr.file != PROGRAM_UNDEFINED)
emit_arl(ir, sampler_reladdr, reladdr);
@@ -5526,60 +5552,24 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg)
static struct tgsi_texture_offset
translate_tex_offset(struct st_translate *t,
- const st_src_reg *in_offset, int idx)
+ const st_src_reg *in_offset)
{
struct tgsi_texture_offset offset;
- struct ureg_src imm_src;
- struct ureg_dst dst;
- int array;
+ struct ureg_src src = translate_src(t, in_offset);
- switch (in_offset->file) {
- case PROGRAM_IMMEDIATE:
- assert(in_offset->index >= 0 && in_offset->index < t->num_immediates);
- imm_src = t->immediates[in_offset->index];
+ offset.File = src.File;
+ offset.Index = src.Index;
+ offset.SwizzleX = src.SwizzleX;
+ offset.SwizzleY = src.SwizzleY;
+ offset.SwizzleZ = src.SwizzleZ;
+ offset.Padding = 0;
- offset.File = imm_src.File;
- offset.Index = imm_src.Index;
- offset.SwizzleX = imm_src.SwizzleX;
- offset.SwizzleY = imm_src.SwizzleY;
- offset.SwizzleZ = imm_src.SwizzleZ;
- offset.Padding = 0;
- break;
- case PROGRAM_INPUT:
- imm_src = t->inputs[t->inputMapping[in_offset->index]];
- offset.File = imm_src.File;
- offset.Index = imm_src.Index;
- offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
- offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
- offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
- offset.Padding = 0;
- break;
- case PROGRAM_TEMPORARY:
- imm_src = ureg_src(t->temps[in_offset->index]);
- offset.File = imm_src.File;
- offset.Index = imm_src.Index;
- offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
- offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
- offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
- offset.Padding = 0;
- break;
- case PROGRAM_ARRAY:
- array = in_offset->index >> 16;
-
- assert(array >= 0);
- assert(array < (int)t->num_temp_arrays);
+ assert(!src.Indirect);
+ assert(!src.DimIndirect);
+ assert(!src.Dimension);
+ assert(!src.Absolute); /* those shouldn't be used with integers anyway */
+ assert(!src.Negate);
- dst = t->arrays[array];
- offset.File = dst.File;
- offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000;
- offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
- offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
- offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
- offset.Padding = 0;
- break;
- default:
- break;
- }
return offset;
}
@@ -5643,7 +5633,7 @@ compile_tgsi_instruction(struct st_translate *t,
ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
num_src++;
for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
- texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
+ texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
}
tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
@@ -6045,7 +6035,11 @@ st_translate_program(
inputSemanticName[i], inputSemanticIndex[i],
interpMode[i], 0, interpLocation[i],
array_id, array_size);
- i += array_size - 1;
+
+ GLuint base_attr = inputSlotToAttr[i];
+ while (i + 1 < numInputs &&
+ inputSlotToAttr[i + 1] < base_attr + array_size)
+ ++i;
}
else {
t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
diff --git a/src/mesa/state_tracker/st_vdpau.c b/src/mesa/state_tracker/st_vdpau.c
index dffa52f..4f599dd 100644
--- a/src/mesa/state_tracker/st_vdpau.c
+++ b/src/mesa/state_tracker/st_vdpau.c
@@ -65,6 +65,7 @@ st_vdpau_video_surface_gallium(struct gl_context *ctx, const void *vdpSurface,
struct pipe_video_buffer *buffer;
struct pipe_sampler_view **samplers;
+ struct pipe_resource *res = NULL;
getProcAddr = (void *)ctx->vdpGetProcAddress;
if (getProcAddr(device, VDP_FUNC_ID_VIDEO_SURFACE_GALLIUM, (void**)&f))
@@ -82,7 +83,8 @@ st_vdpau_video_surface_gallium(struct gl_context *ctx, const void *vdpSurface,
if (!sv)
return NULL;
- return sv->texture;
+ pipe_resource_reference(&res, sv->texture);
+ return res;
}
static struct pipe_resource *
@@ -90,13 +92,15 @@ st_vdpau_output_surface_gallium(struct gl_context *ctx, const void *vdpSurface)
{
int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr);
uint32_t device = (uintptr_t)ctx->vdpDevice;
+ struct pipe_resource *res = NULL;
VdpOutputSurfaceGallium *f;
getProcAddr = (void *)ctx->vdpGetProcAddress;
if (getProcAddr(device, VDP_FUNC_ID_OUTPUT_SURFACE_GALLIUM, (void**)&f))
return NULL;
- return f((uintptr_t)vdpSurface);
+ pipe_resource_reference(&res, f((uintptr_t)vdpSurface));
+ return res;
}
static struct pipe_resource *
@@ -208,6 +212,7 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access,
/* do we have different screen objects ? */
if (res->screen != st->pipe->screen) {
_mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV");
+ pipe_resource_reference(&res, NULL);
return;
}
@@ -241,6 +246,7 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access,
stObj->surface_format = res->format;
_mesa_dirty_texobj(ctx, texObj);
+ pipe_resource_reference(&res, NULL);
}
static void
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 87ed7f7..96ed84f 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -814,6 +814,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
prim[0].basevertex = basevertex;
prim[0].num_instances = numInstances;
prim[0].base_instance = baseInstance;
+ prim[0].draw_id = 0;
/* Need to give special consideration to rendering a range of
* indices starting somewhere above zero. Typically the
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index 97a1dfd..fafdf1d 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -1167,8 +1167,8 @@ _save_OBE_DrawArrays(GLenum mode, GLint start, GLsizei count)
* then emitting an indexed prim at runtime.
*/
static void GLAPIENTRY
-_save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type,
- const GLvoid * indices)
+_save_OBE_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
+ const GLvoid * indices, GLint basevertex)
{
GET_CURRENT_CONTEXT(ctx);
struct vbo_save_context *save = &vbo_context(ctx)->save;
@@ -1205,15 +1205,15 @@ _save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type,
switch (type) {
case GL_UNSIGNED_BYTE:
for (i = 0; i < count; i++)
- CALL_ArrayElement(GET_DISPATCH(), (((GLubyte *) indices)[i]));
+ CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLubyte *) indices)[i]));
break;
case GL_UNSIGNED_SHORT:
for (i = 0; i < count; i++)
- CALL_ArrayElement(GET_DISPATCH(), (((GLushort *) indices)[i]));
+ CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLushort *) indices)[i]));
break;
case GL_UNSIGNED_INT:
for (i = 0; i < count; i++)
- CALL_ArrayElement(GET_DISPATCH(), (((GLuint *) indices)[i]));
+ CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLuint *) indices)[i]));
break;
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glDrawElements(type)");
@@ -1225,6 +1225,13 @@ _save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type,
_ae_unmap_vbos(ctx);
}
+static void GLAPIENTRY
+_save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type,
+ const GLvoid * indices)
+{
+ _save_OBE_DrawElementsBaseVertex(mode, count, type, indices, 0);
+}
+
static void GLAPIENTRY
_save_OBE_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
@@ -1462,6 +1469,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx,
{
SET_DrawArrays(exec, _save_OBE_DrawArrays);
SET_DrawElements(exec, _save_OBE_DrawElements);
+ SET_DrawElementsBaseVertex(exec, _save_OBE_DrawElementsBaseVertex);
SET_DrawRangeElements(exec, _save_OBE_DrawRangeElements);
SET_MultiDrawElementsEXT(exec, _save_OBE_MultiDrawElements);
SET_MultiDrawElementsBaseVertex(exec, _save_OBE_MultiDrawElementsBaseVertex);
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index cb27ef9..1c3474c 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -243,7 +243,7 @@ begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
static GLuint
elt(struct copy_context *copy, GLuint elt_idx)
{
- GLuint elt = copy->srcelt[elt_idx];
+ GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
GLuint slot = elt & (ELT_TABLE_SIZE-1);
/* printf("elt %d\n", elt); */