diff options
author | Wolfgang Wiedmeyer <wolfgit@wiedmeyer.de> | 2016-12-17 03:40:28 +0100 |
---|---|---|
committer | Wolfgang Wiedmeyer <wolfgit@wiedmeyer.de> | 2016-12-17 03:40:28 +0100 |
commit | ef9a82038acd73936830671dbe43205c28a2151d (patch) | |
tree | 90be2cdd9f48750c18b669ca2ab9553575d9f822 | |
parent | f84f60446aebaeee8a1df741328cbd4a30dd24ea (diff) | |
parent | 743c2327b167b95046e02af4c7b2f7a282a0943d (diff) | |
download | external_mesa3d-replicant-6.0-old.zip external_mesa3d-replicant-6.0-old.tar.gz external_mesa3d-replicant-6.0-old.tar.bz2 |
Merge remote-tracking branch 'androidx86/marshmallow-x86' into replicant-6.0replicant-6.0-beta-0001replicant-6.0-alpha-0006replicant-6.0-old
248 files changed, 4655 insertions, 2004 deletions
diff --git a/.travis.yml b/.travis.yml index da1d81e..5f489a4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: c -sudo: false +sudo: true +dist: trusty cache: directories: @@ -15,7 +16,11 @@ addons: - libexpat1-dev - libxcb-dri2-0-dev - libx11-xcb-dev - - llvm-3.4-dev + - llvm-3.5-dev + # llvm-config is not in the dev package? + - llvm-3.5 + # LLVM packaging is broken and misses this dep. + - libedit-dev - scons env: @@ -41,6 +46,16 @@ install: - export PATH="/usr/lib/ccache:$PATH" - pip install --user mako + # Since libdrm gets updated in configure.ac regularly, try to pick up the + # latest version from there. + - for line in `grep "^LIBDRM_.*_REQUIRED=" configure.ac`; do + old_ver=`echo $LIBDRM_VERSION | sed 's/libdrm-//'`; + new_ver=`echo $line | sed 's/.*REQUIRED=//'`; + if `echo "$old_ver,$new_ver" | tr ',' '\n' | sort -Vc 2> /dev/null`; then + export LIBDRM_VERSION="libdrm-$new_ver"; + fi; + done + # Install dependencies where we require specific versions (or where # disallowed by Travis CI's package whitelisting). @@ -78,22 +93,19 @@ install: - wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2 - tar -jxvf $LIBDRM_VERSION.tar.bz2 - - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix && make install) + - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 && make install) - wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2 - tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2 - (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install) -# Disabled LLVM (and therefore r300 and r600) because the build fails -# with "undefined reference to `clock_gettime'" and "undefined -# reference to `setupterm'" in llvmpipe. script: - if test "x$BUILD" = xmake; then ./autogen.sh --enable-debug - --disable-gallium-llvm --with-egl-platforms=x11,drm --with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau - --with-gallium-drivers=svga,swrast,vc4,virgl + --with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600 + --disable-llvm-shared-libs ; make && make check; elif test x$BUILD = xscons; then diff --git a/Makefile.am b/Makefile.am index b0fbed6..2027a28 100644 --- a/Makefile.am +++ b/Makefile.am @@ -62,6 +62,7 @@ noinst_HEADERS = \ include/c99_math.h \ include/c11 \ include/D3D9 \ + include/GL/wglext.h \ include/HaikuGL \ include/no_extern_c.h \ include/pci_ids @@ -1 +1 @@ -12.0.1 +12.0.4 diff --git a/appveyor.yml b/appveyor.yml index 2e9b9d6..6e69cbf 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -37,6 +37,8 @@ cache: - win_flex_bison-2.4.5.zip - llvm-3.3.1-msvc2013-mtd.7z +os: Visual Studio 2013 + environment: WINFLEXBISON_ARCHIVE: win_flex_bison-2.4.5.zip LLVM_ARCHIVE: llvm-3.3.1-msvc2013-mtd.7z @@ -47,11 +49,13 @@ install: - python -m pip --version # Install Mako - python -m pip install --egg Mako +# Install pywin32 extensions, needed by SCons +- python -m pip install pypiwin32 # Install SCons - python -m pip install --egg scons==2.4.1 - scons --version # Install flex/bison -- if not exist "%WINFLEXBISON_ARCHIVE%" appveyor DownloadFile "http://downloads.sourceforge.net/project/winflexbison/%WINFLEXBISON_ARCHIVE%" +- if not exist "%WINFLEXBISON_ARCHIVE%" appveyor DownloadFile "https://downloads.sourceforge.net/project/winflexbison/old_versions/%WINFLEXBISON_ARCHIVE%" - 7z x -y -owinflexbison\ "%WINFLEXBISON_ARCHIVE%" > nul - set Path=%CD%\winflexbison;%Path% - win_flex --version diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore index 5b0b89c..005e1e3 100644 --- a/bin/.cherry-ignore +++ b/bin/.cherry-ignore @@ -1,2 +1,25 @@ # The offending commit that this patch (part) reverts isn't in 12.0 be32a2132785fbc119f17e62070e007ee7d17af7 i965/compiler: Bring back the INTEL_PRECISE_TRIG environment variable + +# The patch depends on the batch_cache work at least. +89f00f749fda4c1beca38f362c7f86bdc6e32785 a4xx: make sure to actually clamp depth as requested + +# The patch depends on the 'generic' interoplation and location +# implementation introduced with 2d6dd30a9b30 +114874b22beafb2d07006b197c62d717fc7f80cc i965/fs: Use sample interpolation for interpolateAtCentroid in persample mode + +# VAAPI encode landed after the branch point. +a5993022275c20061ac025d9adc26c5f9d02afee st/va Avoid VBR bitrate calculation overflow v2 + +# EGL_KHR_debug landed after the branch point. +17084b6f9340f798111e53e08f5d35c7630cee48 egl: Fix missing unlock in eglGetSyncAttribKHR + +# Depends on update_renderbuffer_read_surfaces at least +f2b9b0c730e345bcffa9eadabb25af3ab02642f2 i965: Add missing BRW_NEW_FS_PROG_DATA to render target reads. + +# The commit in question hasn't landed in branch +1ef787339774bc7f1cc9c1615722f944005e070c Revert "egl/android: Set EGL_MAX_PBUFFER_WIDTH and EGL_MAX_PBUFFER_HEIGHT" + +# Patches depend on the fence_finish() gallium API change and corresponding driver work +f240ad98bc05281ea7013d91973cb5f932ae9434 st/mesa: unduplicate st_check_sync code +b687f766fddb7b39479cd9ee0427984029ea3559 st/mesa: allow multiple concurrent waiters in ClientWaitSync diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh index 0902fd0..4515837 100755 --- a/bin/get-pick-list.sh +++ b/bin/get-pick-list.sh @@ -14,7 +14,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\ sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked # Grep for commits that were marked as a candidate for the stable tree. -git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\ +git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*12\.0.*mesa-stable\)' HEAD..origin/master |\ while read sha do # Check to see whether the patch is on the ignore list. diff --git a/configure.ac b/configure.ac index 535a2e3..78cf178 100644 --- a/configure.ac +++ b/configure.ac @@ -225,6 +225,7 @@ AX_GCC_FUNC_ATTRIBUTE([packed]) AX_GCC_FUNC_ATTRIBUTE([pure]) AX_GCC_FUNC_ATTRIBUTE([returns_nonnull]) AX_GCC_FUNC_ATTRIBUTE([unused]) +AX_GCC_FUNC_ATTRIBUTE([visibility]) AX_GCC_FUNC_ATTRIBUTE([warn_unused_result]) AX_GCC_FUNC_ATTRIBUTE([weak]) @@ -783,6 +784,7 @@ if test "x$enable_asm" = xyes; then esac fi +AC_HEADER_MAJOR AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"]) AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"]) AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"]) @@ -1639,7 +1641,7 @@ esac AC_ARG_WITH([vulkan-icddir], [AS_HELP_STRING([--with-vulkan-icddir=DIR], - [directory for the Vulkan driver icd files @<:@${sysconfdir}/vulkan/icd.d@:>@])], + [directory for the Vulkan driver icd files @<:@${datarootdir}/vulkan/icd.d@:>@])], [VULKAN_ICD_INSTALL_DIR="$withval"], [VULKAN_ICD_INSTALL_DIR='${datarootdir}/vulkan/icd.d']) AC_SUBST([VULKAN_ICD_INSTALL_DIR]) @@ -1997,8 +1999,8 @@ if test "x$with_egl_platforms" != "x" -a "x$enable_egl" != xyes; then AC_MSG_ERROR([cannot build egl state tracker without EGL library]) fi -PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland_scanner], - WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland_scanner`, +PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland-scanner], + WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner`, WAYLAND_SCANNER='') if test "x$WAYLAND_SCANNER" = x; then AC_PATH_PROG([WAYLAND_SCANNER], [wayland-scanner]) @@ -2182,6 +2184,10 @@ if test "x$enable_gallium_llvm" = xyes; then LLVM_COMPONENTS="engine bitwriter mcjit mcdisassembler" + if $LLVM_CONFIG --components | grep -q inteljitevents ; then + LLVM_COMPONENTS="${LLVM_COMPONENTS} inteljitevents" + fi + if test "x$enable_opencl" = xyes; then llvm_check_version_for "3" "5" "0" "opencl" @@ -2331,6 +2337,45 @@ swr_llvm_check() { fi } +swr_require_cxx_feature_flags() { + feature_name="$1" + preprocessor_test="$2" + option_list="$3" + output_var="$4" + + AC_MSG_CHECKING([whether $CXX supports $feature_name]) + AC_LANG_PUSH([C++]) + save_CXXFLAGS="$CXXFLAGS" + save_IFS="$IFS" + IFS="," + found=0 + for opts in $option_list + do + unset IFS + CXXFLAGS="$opts $save_CXXFLAGS" + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [ #if !($preprocessor_test) + #error + #endif + ])], + [found=1; break], + []) + IFS="," + done + IFS="$save_IFS" + CXXFLAGS="$save_CXXFLAGS" + AC_LANG_POP([C++]) + if test $found -eq 1; then + AC_MSG_RESULT([$opts]) + eval "$output_var=\$opts" + return 0 + fi + AC_MSG_RESULT([no]) + AC_MSG_ERROR([swr requires $feature_name support]) + return 1 +} + dnl Duplicates in GALLIUM_DRIVERS_DIRS are removed by sorting it after this block if test -n "$with_gallium_drivers"; then gallium_drivers=`IFS=', '; echo $with_gallium_drivers` @@ -2400,29 +2445,20 @@ if test -n "$with_gallium_drivers"; then xswr) swr_llvm_check "swr" - AC_MSG_CHECKING([whether $CXX supports c++11/AVX/AVX2]) - AVX_CXXFLAGS="-march=core-avx-i" - AVX2_CXXFLAGS="-march=core-avx2" - - AC_LANG_PUSH([C++]) - save_CXXFLAGS="$CXXFLAGS" - CXXFLAGS="-std=c++11 $CXXFLAGS" - AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[], - [AC_MSG_ERROR([c++11 compiler support not detected])]) - CXXFLAGS="$save_CXXFLAGS" - - save_CXXFLAGS="$CXXFLAGS" - CXXFLAGS="$AVX_CXXFLAGS $CXXFLAGS" - AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[], - [AC_MSG_ERROR([AVX compiler support not detected])]) - CXXFLAGS="$save_CXXFLAGS" - - save_CFLAGS="$CXXFLAGS" - CXXFLAGS="$AVX2_CXXFLAGS $CXXFLAGS" - AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[], - [AC_MSG_ERROR([AVX2 compiler support not detected])]) - CXXFLAGS="$save_CXXFLAGS" - AC_LANG_POP([C++]) + swr_require_cxx_feature_flags "C++11" "__cplusplus >= 201103L" \ + ",-std=c++11" \ + SWR_CXX11_CXXFLAGS + AC_SUBST([SWR_CXX11_CXXFLAGS]) + + swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \ + ",-mavx,-march=core-avx" \ + SWR_AVX_CXXFLAGS + AC_SUBST([SWR_AVX_CXXFLAGS]) + + swr_require_cxx_feature_flags "AVX2" "defined(__AVX2__)" \ + ",-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2" \ + SWR_AVX2_CXXFLAGS + AC_SUBST([SWR_AVX2_CXXFLAGS]) HAVE_GALLIUM_SWR=yes ;; @@ -2560,6 +2596,8 @@ fi AM_CONDITIONAL(HAVE_LIBDRM, test "x$have_libdrm" = xyes) AM_CONDITIONAL(HAVE_OSMESA, test "x$enable_osmesa" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_OSMESA, test "x$enable_gallium_osmesa" = xyes) +AM_CONDITIONAL(HAVE_COMMON_OSMESA, test "x$enable_osmesa" = xyes -o \ + "x$enable_gallium_osmesa" = xyes) AM_CONDITIONAL(HAVE_X86_ASM, test "x$asm_arch" = xx86 -o "x$asm_arch" = xx86_64) AM_CONDITIONAL(HAVE_X86_64_ASM, test "x$asm_arch" = xx86_64) diff --git a/docs/relnotes/12.0.2.html b/docs/relnotes/12.0.2.html new file mode 100644 index 0000000..385ef08 --- /dev/null +++ b/docs/relnotes/12.0.2.html @@ -0,0 +1,403 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 12.0.2 Release Notes / September 2, 2016</h1> + +<p> +Mesa 12.0.2 is a bug fix release which fixes bugs found since the 12.0.1 release. +</p> +<p> +Mesa 12.0.2 implements the OpenGL 4.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.3. OpenGL +4.3 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +a08565ab1273751ebe2ffa928cbf785056594c803077c9719d0763da780f2918 mesa-12.0.2.tar.gz +d957a5cc371dcd7ff2aa0d87492f263aece46f79352f4520039b58b1f32552cb mesa-12.0.2.tar.xz +</pre> + + +<h2>New features</h2> +<p>None</p> + + +<h2>Bug fixes</h2> + +<p>This list is likely incomplete.</p> + +<ul> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=69622">Bug 69622</a> - eglTerminate then eglMakeCurrent crahes</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89599">Bug 89599</a> - symbol 'x86_64_entry_start' is already defined when building with LLVM/clang</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92306">Bug 92306</a> - GL Excess demo renders incorrectly on nv43</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94148">Bug 94148</a> - Framebuffer considered invalid when a draw call is done before glCheckFramebufferStatus</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96274">Bug 96274</a> - [NVC0] Failure when compiling compute shader: Assertion `bb->getFirst()->serial <= bb->getExit()->serial' failed</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96358">Bug 96358</a> - SSO: wrong interface validation between GS and VS (regresion due to latest gles 3.1)</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96381">Bug 96381</a> - Texture artifacts with immutable texture storage and mipmaps</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96762">Bug 96762</a> - [radeonsi,apitrace] Firewatch: nothing rendered in scrollable (text) areas</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96835">Bug 96835</a> - "gallium: Force blend color to 16-byte alignment" crash with "-march=native -O3" causes some 32bit games to crash</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96850">Bug 96850</a> - Crucible tests fail for 32bit mesa</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96908">Bug 96908</a> - [radeonsi] MSAA causes graphical artifacts</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96911">Bug 96911</a> - webgl2 conformance2/textures/misc/tex-mipmap-levels.html crashes 12.1 Intel driver</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96971">Bug 96971</a> - invariant qualifier is not valid for shader inputs</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97039">Bug 97039</a> - The Talos Principle and Serious Sam 3 GPU faults</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97207">Bug 97207</a> - [IVY BRIDGE] Fragment shader discard writing to depth</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97214">Bug 97214</a> - X not running with error "Failed to make EGL context current"</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97225">Bug 97225</a> - [i965 on HD4600 Haswell] xcom switch to ingame cinematics cause segmentation fault</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97307">Bug 97307</a> - glsl/glcpp/tests/glcpp-test regression</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97331">Bug 97331</a> - glDrawElementsBaseVertex doesn't work in display list on i915</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97351">Bug 97351</a> - DrawElementsBaseVertex with VBO ignores base vertex on Intel GMA 9xx in some cases</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97426">Bug 97426</a> - glScissor gives vertically inverted result</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97476">Bug 97476</a> - Shader binaries should not be stored in the PipelineCache</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97567">Bug 97567</a> - [SNB, ILK] ctl, piglit regressions in mesa 12.0.2rc1</li> + +</ul> + + +<h2>Changes</h2> + +<p>Andreas Boll (1):</p> +<ul> + <li>configure.ac: Use ${datarootdir} for --with-vulkan-icddir help string too</li> +</ul> + +<p>Bernard Kilarski (1):</p> +<ul> + <li>glx: fix error code when there is no context bound</li> +</ul> + +<p>Brian Paul (4):</p> +<ul> + <li>svga: handle mismatched number of samplers, sampler views</li> + <li>mesa: use _mesa_clear_texture_image() in clear_texture_fields()</li> + <li>swrast: fix incorrectly positioned putImage() in swrast driver</li> + <li>mesa: fix format conversion bug in get_tex_rgba_uncompressed()</li> +</ul> + +<p>Chad Versace (2):</p> +<ul> + <li>i965: Fix miptree layout for EGLImage-based renderbuffers</li> + <li>i965: Respect miptree offsets in intel_readpixels_tiled_memcpy()</li> +</ul> + +<p>Christian König (1):</p> +<ul> + <li>st/mesa: fix reference counting bug in st_vdpau</li> +</ul> + +<p>Chuck Atkins (1):</p> +<ul> + <li>swr: Refactor checks for compiler feature flags</li> +</ul> + +<p>Daniel Scharrer (1):</p> +<ul> + <li>mesa: Fix fixed function spot lighting on newer hardware (again)</li> +</ul> + +<p>Dave Airlie (2):</p> +<ul> + <li>anv: fix writemask on blit fragment shader.</li> + <li>st/glsl_to_tgsi: fix st_src_reg_for_double constant.</li> +</ul> + +<p>Emil Velikov (15):</p> +<ul> + <li>docs: add sha256 checksums for 12.0.1</li> + <li>mesa: automake: list builddir before srcdir</li> + <li>mesa: scons: list builddir before srcdir</li> + <li>i965: store reference to the context within struct brw_fence (v2)</li> + <li>anv: remove internal 'validate' layer</li> + <li>anv: automake: use VISIBILITY_CFLAGS to restrict symbol visibility</li> + <li>anv: automake: build with -Bsymbolic</li> + <li>anv: do not export the Vulkan API</li> + <li>anv: remove dummy VK_DEBUG_MARKER_EXT entry points</li> + <li>isl: automake: use VISIBILITY_CFLAGS to restrict symbol visibility</li> + <li>cherry-ignore: temporary(?) drop "a4xx: make sure to actually clamp depth"</li> + <li>i915: Check return value of screen->image.loader->getBuffers</li> + <li>Revert "i965/miptree: Set logical_depth0 == 6 for cube maps"</li> + <li>glx/glvnd: list the strcmp arguments in correct order</li> + <li>Update version to 12.0.2</li> +</ul> + +<p>Eric Anholt (4):</p> +<ul> + <li>vc4: Close our screen's fd on screen close.</li> + <li>vc4: Disable early Z with computed depth.</li> + <li>vc4: Fix a leak of the src[] array of VPM reads in optimization.</li> + <li>vc4: Fix leak of the bo_handles table.</li> +</ul> + +<p>Francisco Jerez (3):</p> +<ul> + <li>i965: Emit SKL VF cache invalidation W/A from brw_emit_pipe_control_flush.</li> + <li>i965: Make room in the batch epilogue for three more pipe controls.</li> + <li>i965: Fix remaining flush vs invalidate race conditions in brw_emit_pipe_control_flush.</li> +</ul> + +<p>Haixia Shi (1):</p> +<ul> + <li>platform_android: prevent deadlock in droid_swap_buffers</li> +</ul> + +<p>Ian Romanick (5):</p> +<ul> + <li>mesa: Strip arrayness from interface block names in some IO validation</li> + <li>glsl: Pack integer and double varyings as flat even if interpolation mode is none</li> + <li>glcpp: Track the actual version instead of just the version_resolved flag</li> + <li>glcpp: Only disallow #undef of pre-defined macros on GLSL ES >= 3.00 shaders</li> + <li>glsl: Mark cube map array sampler types as reserved in GLSL ES 3.10</li> +</ul> + +<p>Ilia Mirkin (16):</p> +<ul> + <li>mesa: etc2 online compression is unsupported, don't attempt it</li> + <li>st/mesa: return appropriate mesa format for ETC texture formats</li> + <li>mesa: set _NEW_BUFFERS when updating texture bound to current buffers</li> + <li>nv50,nvc0: srgb rendering is only available for rgba/bgra</li> + <li>vbo: allow DrawElementsBaseVertex in display lists</li> + <li>gallium/util: add helper to compute zmin/zmax for a viewport state</li> + <li>nv50,nvc0: fix depth range when halfz is enabled</li> + <li>nv50/ir: fix bb positions after exit instructions</li> + <li>vbo: add basevertex when looking up elements for vbo splitting</li> + <li>a4xx: only disable depth clipping, not all clipping, when requested</li> + <li>nv50/ir: make sure cfg iterator always hits all blocks</li> + <li>main: add missing EXTRA_END in OES_sample_variables get check</li> + <li>nouveau: always enable at least one RC</li> + <li>nv30: only bail on color/depth bpp mismatch when surfaces are swizzled</li> + <li>a4xx: make sure to actually clamp depth as requested</li> + <li>gk110/ir: fix quadop dall emission</li> +</ul> + +<p>Jan Ziak (2):</p> +<ul> + <li>egl/x11: avoid using freed memory if dri2 init fails</li> + <li>loader: fix memory leak in loader_dri3_open</li> +</ul> + +<p>Jason Ekstrand (31):</p> +<ul> + <li>nir/spirv: Don't multiply the push constant block size by 4</li> + <li>anv: Add a stub for CmdCopyQueryPoolResults on Ivy Bridge</li> + <li>glsl/types: Fix function type comparison function</li> + <li>glsl/types: Use _mesa_hash_data for hashing function types</li> + <li>genxml: Make gen6-7 blending look more like gen8</li> + <li>anv/pipeline: Unify blend state setup between gen7 and gen8</li> + <li>anv: Enable independentBlend on gen7</li> + <li>anv: Add an align_down_npot_u32 helper</li> + <li>anv: Handle VK_WHOLE_SIZE properly for buffer views</li> + <li>i965/miptree: Enforce that height == 1 for 1-D array textures</li> + <li>i965/miptree: Set logical_depth0 == 6 for cube maps</li> + <li>nir: Add a nir_deref_foreach_leaf helper</li> + <li>nir/inline: Constant-initialize local variables in the callee if needed</li> + <li>anv/pipeline: Set up point coord enables</li> + <li>i965/miptree: Stop multiplying cube depth by 6 in HiZ calculations</li> + <li>i965/vec4: Make opt_vector_float reset at the top of each block</li> + <li>anv/blit2d: Add a format parameter to bind_dst and create_iview</li> + <li>anv/blit2d: Add support for RGB destinations</li> + <li>anv/clear: Make cmd_clear_image take an actual VkClearValue</li> + <li>anv/clear: Clear E5B9G9R9 images as R32_UINT</li> + <li>anv: Include the pipeline layout in the shader hash</li> + <li>isl: Allow multisampled array textures</li> + <li>anv/descriptor_set: memset anv_descriptor_set_layout</li> + <li>anv/pipeline: Fix bind maps for fragment output arrays</li> + <li>anv/allocator: Correctly set the number of buckets</li> + <li>anv/pipeline: Properly handle OOM during shader compilation</li> + <li>anv: Remove unused fields from anv_pipeline_bind_map</li> + <li>anv: Add pipeline_has_stage guards a few places</li> + <li>anv: Add a struct for storing a compiled shader</li> + <li>anv/pipeline: Add support for caching the push constant map</li> + <li>anv: Rework pipeline caching</li> +</ul> + +<p>José Fonseca (2):</p> +<ul> + <li>appveyor: Install pywin32 extensions.</li> + <li>appveyor: Force Visual Studio 2013 image.</li> +</ul> + +<p>Kenneth Graunke (21):</p> +<ul> + <li>genxml: Add CLIPMODE_* prefix to 3DSTATE_CLIP's "Clip Mode" enum values.</li> + <li>genxml: Add APIMODE_D3D missing enum values and improve consistency.</li> + <li>anv: Fix near plane clipping on Gen7/7.5.</li> + <li>anv: Enable early culling on Gen7.</li> + <li>anv: Unify 3DSTATE_CLIP code across generations.</li> + <li>genxml: Rename "API Rendering Disable" to "Rendering Disable".</li> + <li>anv: Properly call gen75_emit_state_base_address on Haswell.</li> + <li>i965: Include VUE handles for GS with invocations > 1.</li> + <li>nir: Add a base const_index to shared atomic intrinsics.</li> + <li>i965: Fix shared atomic intrinsics to pay attention to base.</li> + <li>mesa: Add GL_BGRA_EXT to the list of GenerateMipmap internal formats.</li> + <li>mesa: Don't call GenerateMipmap if Width or Height == 0.</li> + <li>glsl: Delete bogus ir_set_program_inouts assert.</li> + <li>glsl: Fix the program resource names of gl_TessLevelOuter/Inner[].</li> + <li>glsl: Fix location bias for patch variables.</li> + <li>glsl: Fix invariant matching in GLSL 4.30 and GLSL ES 1.00.</li> + <li>mesa: Fix uf10_to_f32() scale factor in the E == 0 and M != 0 case.</li> + <li>nir/builder: Add bany_inequal and bany helpers.</li> + <li>i965: Implement the WaPreventHSTessLevelsInterference workaround.</li> + <li>i965: Fix execution size of scalar TCS barrier setup code.</li> + <li>i965: Fix barrier count shift in scalar TCS backend.</li> +</ul> + +<p>Leo Liu (2):</p> +<ul> + <li>st/omx/enc: check uninitialized list from task release</li> + <li>vl/dri3: fix a memory leak from front buffer</li> +</ul> + +<p>Marek Olšák (7):</p> +<ul> + <li>glsl_to_tgsi: don't use the negate modifier in integer ops after bitcast</li> + <li>radeonsi: add a workaround for a compute VGPR-usage LLVM bug</li> + <li>winsys/amdgpu: disallow DCC with mipmaps</li> + <li>gallium/util: fix align64</li> + <li>radeonsi: only set dual source blending for MRT0</li> + <li>radeonsi: fix VM faults due NULL internal const buffers on CIK</li> + <li>radeonsi: disable SDMA texture copying on Carrizo</li> +</ul> + +<p>Matt Turner (4):</p> +<ul> + <li>mapi: Massage code to allow clang to compile.</li> + <li>i965/vec4: Ignore swizzle of VGRF for use by var_range_end().</li> + <li>mesa: Use AC_HEADER_MAJOR to include correct header for major().</li> + <li>nir: Walk blocks in source code order in lower_vars_to_ssa.</li> +</ul> + +<p>Michel Dänzer (1):</p> +<ul> + <li>glx: Don't use current context in __glXSendError</li> +</ul> + +<p>Miklós Máté (1):</p> +<ul> + <li>vbo: set draw_id</li> +</ul> + +<p>Nanley Chery (5):</p> +<ul> + <li>anv/descriptor_set: Fix binding partly undefined descriptor sets</li> + <li>isl: Fix assert on raw buffer surface state size</li> + <li>anv/device: Fix max buffer range limits</li> + <li>isl: Fix isl_tiling_is_any_y()</li> + <li>anv/gen7_pipeline: Set PixelShaderKillPixel for discards</li> +</ul> + +<p>Nicolai Hähnle (7):</p> +<ul> + <li>radeonsi: explicitly choose center locations for 1xAA on Polaris</li> + <li>radeonsi: fix Polaris MSAA regression</li> + <li>radeonsi: ensure sample locations are set for line and polygon smoothing</li> + <li>st_glsl_to_tgsi: only skip over slots of an input array that are present</li> + <li>glsl: fix optimization of discard nested multiple levels</li> + <li>radeonsi: flush TC L2 cache for indirect draw data</li> + <li>radeonsi: add si_set_rw_buffer to be used for internal descriptors</li> +</ul> + +<p>Nicolas Boichat (6):</p> +<ul> + <li>egl/dri2: dri2_make_current: Set EGL error if bindContext fails</li> + <li>egl/wayland: Set disp->DriverData to NULL on error</li> + <li>egl/surfaceless: Set disp->DriverData to NULL on error</li> + <li>egl/drm: Set disp->DriverData to NULL on error</li> + <li>egl/android: Set dpy->DriverData to NULL on error</li> + <li>egl/dri2: Add reference count for dri2_egl_display</li> +</ul> + +<p>Rob Herring (3):</p> +<ul> + <li>Android: add missing u_math.h include path for libmesa_isl</li> + <li>vc4: fix vc4_resource_from_handle() stride calculation</li> + <li>vc4: add hash table look-up for exported dmabufs</li> +</ul> + +<p>Samuel Pitoiset (7):</p> +<ul> + <li>nvc0/ir: fix images indirect access on Fermi</li> + <li>nvc0: fix the driver cb size when draw parameters are used</li> + <li>gm107/ir: add missing NEG modifier for IADD32I</li> + <li>gm107/ir: make use of ADD32I for all immediates</li> + <li>nvc0: upload sample locations on GM20x</li> + <li>nvc0: invalidate textures/samplers on GK104+</li> + <li>nv50/ir: always emit the NDV bit for OP_QUADOP</li> +</ul> + +<p>Stefan Dirsch (1):</p> +<ul> + <li>Avoid overflow in 'last' variable of FindGLXFunction(...)</li> +</ul> + +<p>Stencel, Joanna (1):</p> +<ul> + <li>egl/wayland-egl: Fix for segfault in dri2_wl_destroy_surface.</li> +</ul> + +<p>Tim Rowley (2):</p> +<ul> + <li>Revert "gallium: Force blend color to 16-byte alignment"</li> + <li>swr: switch from overriding -march to selecting features</li> +</ul> + +<p>Tomasz Figa (8):</p> +<ul> + <li>gallium/dri: Add shared glapi to LIBADD on Android</li> + <li>egl/android: Remove unused variables</li> + <li>egl/android: Check return value of dri2_get_dri_config()</li> + <li>egl/android: Stop leaking DRI images</li> + <li>gallium/winsys/kms: Fix double refcount when importing from prime FD (v2)</li> + <li>gallium/winsys/kms: Fully initialize kms_sw_dt at prime import time (v2)</li> + <li>gallium/winsys/kms: Move display target handle lookup to separate function</li> + <li>gallium/winsys/kms: Look up the GEM handle after importing a prime FD</li> +</ul> + + +</div> +</body> +</html> diff --git a/docs/relnotes/12.0.3.html b/docs/relnotes/12.0.3.html new file mode 100644 index 0000000..70e704b --- /dev/null +++ b/docs/relnotes/12.0.3.html @@ -0,0 +1,71 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 12.0.3 Release Notes / September 15, 2016</h1> + +<p> +Mesa 12.0.3 is a bug fix release which fixes bugs found since the 12.0.3 release. +</p> +<p> +Mesa 12.0.3 implements the OpenGL 4.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.3. OpenGL +4.3 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +79abcfab3de30dbd416d1582a3cf6b1be308466231488775f1b7bb43be353602 mesa-12.0.3.tar.gz +1dc86dd9b51272eee1fad3df65e18cda2e556ef1bc0b6e07cd750b9757f493b1 mesa-12.0.3.tar.xz +</pre> + + +<h2>New features</h2> +<p>None</p> + + +<h2>Bug fixes</h2> + +<p>This list is likely incomplete.</p> + +<ul> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97781">Bug 97781</a> - [HSW, BYT, IVB] es2-cts.gtf.gl2extensiontests.depth_texture_cube_map.depth_texture_cube_map</li> + +</ul> + + +<h2>Changes</h2> + +<p>Emil Velikov (3):</p> +<ul> + <li>docs: add sha256 checksums for 12.0.2</li> + <li>Revert "i965/miptree: Stop multiplying cube depth by 6 in HiZ calculations"</li> + <li>Update version to 12.0.3</li> +</ul> + +<p>José Fonseca (1):</p> +<ul> + <li>appveyor: Update winflexbison download URL.</li> +</ul> + + +</div> +</body> +</html> diff --git a/docs/relnotes/12.0.4.html b/docs/relnotes/12.0.4.html new file mode 100644 index 0000000..eaa9ba5 --- /dev/null +++ b/docs/relnotes/12.0.4.html @@ -0,0 +1,321 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 12.0.4 Release Notes / November 10, 2016</h1> + +<p> +Mesa 12.0.4 is a bug fix release which fixes bugs found since the 12.0.4 release. +</p> +<p> +Mesa 12.0.4 implements the OpenGL 4.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.3. OpenGL +4.3 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +22026ce4f1c6a7908b0d10ff057decec0a5633afe7f38a0cef5c08d0689f02a6 mesa-12.0.4.tar.gz +5d6003da867d3f54e5000b4acdfc37e6cce5b6a4459274fdad73e24bd2f0065e mesa-12.0.4.tar.xz +</pre> + + +<h2>New features</h2> +<p>None</p> + + +<h2>Bug fixes</h2> + +<p>This list is likely incomplete.</p> + +<ul> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71759">Bug 71759</a> - Intel driver fails with "intel_do_flush_locked failed: No such file or directory" if buffer imported with EGL_NATIVE_PIXMAP_KHR</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94354">Bug 94354</a> - R9285 Unigine Valley perf regression since radeonsi: use re-Z</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96770">Bug 96770</a> - include/GL/mesa_glinterop.h:62: error: redefinition of typedef ‘GLXContext’</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97233">Bug 97233</a> - vkQuake VkSpecializationMapEntry related bug</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97260">Bug 97260</a> - R9 290 low performance in Linux 4.7</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97549">Bug 97549</a> - [SNB, BXT] up to 40% perf drop from "loader/dri3: Overhaul dri3_update_num_back" commit</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97887">Bug 97887</a> - llvm segfault in janusvr -render vive</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98025">Bug 98025</a> - [radeonsi] incorrect primitive restart index used</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98134">Bug 98134</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.draw_buffers wants a different GL error code</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98326">Bug 98326</a> - [dEQP, EGL] pbuffer depth/stencil tests fail</li> + +</ul> + + +<h2>Changes</h2> + +<p>Axel Davy (4):</p> +<ul> + <li>gallium/util: Really allow aliasing of dst for u_box_union_*</li> + <li>st/nine: Fix the calculation of the number of vs inputs</li> + <li>st/nine: Fix mistake in Volume9 UnlockBox</li> + <li>st/nine: Fix locking CubeTexture surfaces.</li> +</ul> + +<p>Brendan King (1):</p> +<ul> + <li>configure.ac: fix the name of the Wayland Scanner pc file</li> +</ul> + +<p>Brian Paul (1):</p> +<ul> + <li>st/mesa: fix swizzle issue in st_create_sampler_view_from_stobj()</li> +</ul> + +<p>Chad Versace (3):</p> +<ul> + <li>egl: Fix truncation error in _eglParseSyncAttribList64</li> + <li>i965/sync: Fix uninitalized usage and leak of mutex</li> + <li>egl: Don't advertise unsupported platform extensions</li> +</ul> + +<p>Chuanbo Weng (1):</p> +<ul> + <li>gbm: fix potential NULL deref of mapImage/unmapImage.</li> +</ul> + +<p>Chuck Atkins (1):</p> +<ul> + <li>autoconf: Make header install distinct for various APIs (v2)</li> +</ul> + +<p>Dave Airlie (3):</p> +<ul> + <li>anv: initialise and increment send_sbc</li> + <li>anv/wsi: fix apps that acquire multiple images up front</li> + <li>Revert "st/vdpau: use linear layout for output surfaces"</li> +</ul> + +<p>Emil Velikov (12):</p> +<ul> + <li>docs: add sha256 checksums for 12.0.3</li> + <li>cherry-ignore: add non-applicable i965 commit</li> + <li>cherry-ignore: add vaapi encode fix</li> + <li>cherry-ignore: add EGL_KHR_debug fix</li> + <li>cherry-ignore: add update_renderbuffer_read_surfaces()</li> + <li>isl/gen6: correctly check msaa layout samples count</li> + <li>egl/x11: don't crash if dri2_dpy->conn is NULL</li> + <li>get-pick-list.sh: Require explicit "12.0" for nominating stable patches</li> + <li>automake: don't forget to pick wglext.h in the tarball</li> + <li>cherry-ignore: add N/A EGL revert</li> + <li>cherry-ignore: add ClientWaitSync fixes</li> + <li>Update version to 12.0.4</li> +</ul> + +<p>Eric Anholt (5):</p> +<ul> + <li>travis: Parse configure.ac to pick an updated LIBDRM_VERSION.</li> + <li>travis: Update to the Ubuntu Trusty image.</li> + <li>travis: Enable vc4 in libdrm to satisfy vc4 test build dependency.</li> + <li>travis: Upgrade LLVM dependency to 3.5 and enable LLVM drivers.</li> + <li>gallium: Fix install-gallium-links.mk on non-bash /bin/sh</li> +</ul> + +<p>Hans de Goede (1):</p> +<ul> + <li>pipe_loader_sw: Fix fd leak when instantiated via pipe_loader_sw_probe_kms</li> +</ul> + +<p>Ian Romanick (1):</p> +<ul> + <li>glsl: Fix cut-and-paste bug in hierarchical visitor ir_expression::accept</li> +</ul> + +<p>Ilia Mirkin (16):</p> +<ul> + <li>nv30: set usage to staging so that the buffer is allocated in GART</li> + <li>a3xx: make sure to actually clamp depth as requested</li> + <li>a3xx: make use of software clipping when hw can't handle it</li> + <li>a3xx: use window scissor to simulate viewport xy clip</li> + <li>main: GL_RGB10_A2UI does not come with GL 3.0/EXT_texture_integer</li> + <li>mesa/formatquery: limit ES target support, fix core context support</li> + <li>nir: fix definition of pack_uvec2_to_uint</li> + <li>gm107/ir: AL2P writes to a predicate register</li> + <li>st/mesa: fix is_scissor_enabled when X/Y are negative</li> + <li>nvc0/ir: fix overwriting of value backing non-constant gather offset</li> + <li>nv50/ir: copy over value's register id when resolving merge of a phi</li> + <li>nvc0/ir: fix textureGather with a single offset</li> + <li>gm107/ir: fix texturing with indirect samplers</li> + <li>gm107/ir: fix bit offset of tex lod setting for indirect texturing</li> + <li>nv50,nvc0: avoid reading out of bounds when getting bogus so info</li> + <li>nv50/ir: process texture offset sources as regular sources</li> +</ul> + +<p>James Legg (1):</p> +<ul> + <li>radeonsi: Fix primitive restart when index changes</li> +</ul> + +<p>Jason Ekstrand (9):</p> +<ul> + <li>nir/spirv: Swap the argument order for AtomicCompareExchange</li> + <li>nir/spirv: Use the correct sources for CompareExchange on images</li> + <li>nir/spirv: Break variable decoration handling into a helper</li> + <li>nir/spirv: Refactor variable deocration handling</li> + <li>nir/spirv/cfg: Handle switches whose break block is a loop continue</li> + <li>nir/spirv/cfg: Detect switch_break after loop_break/continue</li> + <li>nir: Add a nop intrinsic</li> + <li>nir/spirv/cfg: Use a nop intrinsic for tagging the ends of blocks</li> + <li>intel/blorp: Rework our usage of ralloc when compiling shaders</li> +</ul> + +<p>Jonathan Gray (3):</p> +<ul> + <li>genxml: add generated headers to EXTRA_DIST</li> + <li>mapi: automake: set VISIBILITY_CFLAGS for shared glapi</li> + <li>mesa: automake: include mesa_glinterop.h in distfile</li> +</ul> + +<p>Julien Isorce (1):</p> +<ul> + <li>st/va: also honors interlaced preference when providing a video format</li> +</ul> + +<p>Kenneth Graunke (8):</p> +<ul> + <li>nir: Call nir_metadata_preserve from nir_lower_alu_to_scalar().</li> + <li>mesa: Expose RESET_NOTIFICATION_STRATEGY with KHR_robustness.</li> + <li>i965: Fix missing _NEW_TRANSFORM in Gen8+ 3DSTATE_DS atom.</li> + <li>i965: Add missing BRW_NEW_VS_PROG_DATA to 3DSTATE_CLIP.</li> + <li>i965: Move BRW_NEW_FRAGMENT_PROGRAM from 3DSTATE_PS to PS_EXTRA.</li> + <li>i965: Add missing BRW_NEW_CS_PROG_DATA to compute constant atom.</li> + <li>i965: Add missing BRW_CS_PROG_DATA to CS work group surface atom.</li> + <li>i965: Fix gl_InvocationID in dual object GS where invocations == 1.</li> +</ul> + +<p>Marek Olšák (12):</p> +<ul> + <li>radeonsi: fix cubemaps viewed as 2D</li> + <li>radeonsi: take compute shader and dispatch indirect memory usage into account</li> + <li>radeonsi: fix FP64 UBO loads with indirect uniform block indexing</li> + <li>mesa: fix glGetFramebufferAttachmentParameteriv w/ on-demand FRONT_BACK alloc</li> + <li>radeonsi: fix interpolateAt opcodes for .zw components</li> + <li>radeonsi: fix texture border colors for compute shaders</li> + <li>radeonsi: disable ReZ</li> + <li>gallium/radeon: make sure the address of separate CMASK is aligned properly</li> + <li>winsys/amdgpu: fix radeon_surf::macro_tile_index for imported textures</li> + <li>egl: use util/macros.h</li> + <li>egl: make interop ABI visible again</li> + <li>glx: make interop ABI visible again</li> +</ul> + +<p>Mario Kleiner (1):</p> +<ul> + <li>glx: Perform check for valid fbconfig against proper X-Screen.</li> +</ul> + +<p>Martin Peres (2):</p> +<ul> + <li>loader/dri3: add get_dri_screen() to the vtable</li> + <li>loader/dri3: import prime buffers in the currently-bound screen</li> +</ul> + +<p>Matt Whitlock (5):</p> +<ul> + <li>egl/android: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li> + <li>gallium/auxiliary: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li> + <li>st/dri: replace calls to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li> + <li>st/xa: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li> + <li>gallium/winsys: replace calls to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li> +</ul> + +<p>Max Staudt (1):</p> +<ul> + <li>r300g: Set R300_VAP_CNTL on RSxxx to avoid triangle flickering</li> +</ul> + +<p>Michel Dänzer (1):</p> +<ul> + <li>loader/dri3: Overhaul dri3_update_num_back</li> +</ul> + +<p>Nicholas Bishop (2):</p> +<ul> + <li>gbm: return appropriate error when queryImage() fails</li> + <li>st/dri: check pipe_screen->resource_get_handle() return value</li> +</ul> + +<p>Nicolai Hähnle (10):</p> +<ul> + <li>gallium/radeon: cleanup and fix branch emits</li> + <li>st/glsl_to_tgsi: disable on-the-fly peephole for 64-bit operations</li> + <li>st/glsl_to_tgsi: simplify translate_tex_offset</li> + <li>st/glsl_to_tgsi: fix textureGatherOffset with indirectly loaded offsets</li> + <li>st/mesa: fix vertex elements setup for doubles</li> + <li>radeonsi: fix indirect loads of 64 bit constants</li> + <li>st/glsl_to_tgsi: fix atomic counter addressing</li> + <li>st/glsl_to_tgsi: fix block copies of arrays of doubles</li> + <li>st/mesa: only set primitive_restart when the restart index is in range</li> + <li>radeonsi: fix 64-bit loads from LDS</li> +</ul> + +<p>Samuel Pitoiset (4):</p> +<ul> + <li>nvc0/ir: fix subops for IMAD</li> + <li>gk110/ir: fix wrong emission of OP_NOT</li> + <li>nvc0: use correct bufctx when invalidating CP textures</li> + <li>nvc0/ir: fix emission of IMAD with NEG modifiers</li> +</ul> + +<p>Stencel, Joanna (1):</p> +<ul> + <li>egl/wayland: add missing destroy_window callback</li> +</ul> + +<p>Tapani Pälli (5):</p> +<ul> + <li>egl: stop claiming support for pbuffer + msaa</li> + <li>egl/dri2: set max values for pbuffer width and height</li> + <li>egl: add check that eglCreateContext gets a valid config</li> + <li>mesa: fix error handling in DrawBuffers</li> + <li>egl: set preserved behavior for surface only if config supports it</li> +</ul> + +<p>Tim Rowley (1):</p> +<ul> + <li>configure.ac: add llvm inteljitevents component if enabled</li> +</ul> + +<p>Vedran Miletić (1):</p> +<ul> + <li>clover: Fix build against clang SVN >= r273191</li> +</ul> + +<p>Vinson Lee (1):</p> +<ul> + <li>Revert "mesa_glinterop: remove inclusion of GLX header"</li> +</ul> + + +</div> +</body> +</html> diff --git a/include/GL/mesa_glinterop.h b/include/GL/mesa_glinterop.h index c0c20d6..0b373c1 100644 --- a/include/GL/mesa_glinterop.h +++ b/include/GL/mesa_glinterop.h @@ -58,8 +58,8 @@ extern "C" { #endif /* Forward declarations to avoid inclusion of GL/glx.h */ -typedef struct _XDisplay Display; -typedef struct __GLXcontextRec *GLXContext; +struct _XDisplay; +struct __GLXcontextRec; /* Forward declarations to avoid inclusion of EGL/egl.h */ typedef void *EGLDisplay; @@ -246,7 +246,7 @@ struct mesa_glinterop_export_out { * \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error */ int -MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context, +MesaGLInteropGLXQueryDeviceInfo(struct _XDisplay *dpy, struct __GLXcontextRec *context, struct mesa_glinterop_device_info *out); @@ -271,7 +271,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context, * \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error */ int -MesaGLInteropGLXExportObject(Display *dpy, GLXContext context, +MesaGLInteropGLXExportObject(struct _XDisplay *dpy, struct __GLXcontextRec *context, struct mesa_glinterop_export_in *in, struct mesa_glinterop_export_out *out); @@ -286,11 +286,11 @@ MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context, struct mesa_glinterop_export_out *out); -typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(Display *dpy, GLXContext context, +typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(struct _XDisplay *dpy, struct __GLXcontextRec *context, struct mesa_glinterop_device_info *out); typedef int (PFNMESAGLINTEROPEGLQUERYDEVICEINFOPROC)(EGLDisplay dpy, EGLContext context, struct mesa_glinterop_device_info *out); -typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(Display *dpy, GLXContext context, +typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(struct _XDisplay *dpy, struct __GLXcontextRec *context, struct mesa_glinterop_export_in *in, struct mesa_glinterop_export_out *out); typedef int (PFNMESAGLINTEROPEGLEXPORTOBJECTPROC)(EGLDisplay dpy, EGLContext context, diff --git a/install-gallium-links.mk b/install-gallium-links.mk index ac5a499..fc2f75d 100644 --- a/install-gallium-links.mk +++ b/install-gallium-links.mk @@ -13,8 +13,8 @@ all-local : .install-gallium-links fi; \ $(MKDIR_P) $$link_dir; \ file_list="$(dri_LTLIBRARIES:%.la=.libs/%.so)"; \ - file_list+="$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \ - file_list+="$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \ + file_list="$$file_list$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \ + file_list="$$file_list$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \ for f in $$file_list; do \ if test -h .libs/$$f; then \ cp -d $$f $$link_dir; \ diff --git a/src/Makefile.am b/src/Makefile.am index b130f5b..c0aa115 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -47,6 +47,30 @@ CLEANFILES = $(BUILT_SOURCES) SUBDIRS = . gtest util mapi/glapi/gen mapi +if HAVE_OPENGL +gldir = $(includedir)/GL +gl_HEADERS = \ + $(top_srcdir)/include/GL/gl.h \ + $(top_srcdir)/include/GL/glext.h \ + $(top_srcdir)/include/GL/glcorearb.h \ + $(top_srcdir)/include/GL/gl_mangle.h +endif + +if HAVE_GLX +glxdir = $(includedir)/GL +glx_HEADERS = \ + $(top_srcdir)/include/GL/glx.h \ + $(top_srcdir)/include/GL/glxext.h \ + $(top_srcdir)/include/GL/glx_mangle.h +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = mesa/gl.pc +endif + +if HAVE_COMMON_OSMESA +osmesadir = $(includedir)/GL +osmesa_HEADERS = $(top_srcdir)/include/GL/osmesa.h +endif + # include only conditionally ? SUBDIRS += compiler @@ -93,7 +117,8 @@ SUBDIRS += gallium endif EXTRA_DIST = \ - getopt hgl SConscript + getopt hgl SConscript \ + $(top_srcdir)/include/GL/mesa_glinterop.h AM_CFLAGS = $(VISIBILITY_CFLAGS) AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 4022727..68544ae 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -278,10 +278,34 @@ control_line_success: HASH_TOKEN DEFINE_TOKEN define | HASH_TOKEN UNDEF IDENTIFIER NEWLINE { macro_t *macro; - if (strcmp("__LINE__", $3) == 0 - || strcmp("__FILE__", $3) == 0 - || strcmp("__VERSION__", $3) == 0 - || strncmp("GL_", $3, 3) == 0) + + /* Section 3.4 (Preprocessor) of the GLSL ES 3.00 spec says: + * + * It is an error to undefine or to redefine a built-in + * (pre-defined) macro name. + * + * The GLSL ES 1.00 spec does not contain this text. + * + * Section 3.3 (Preprocessor) of the GLSL 1.30 spec says: + * + * #define and #undef functionality are defined as is + * standard for C++ preprocessors for macro definitions + * both with and without macro parameters. + * + * At least as far as I can tell GCC allow '#undef __FILE__'. + * Furthermore, there are desktop OpenGL conformance tests + * that expect '#undef __VERSION__' and '#undef + * GL_core_profile' to work. + * + * Only disallow #undef of pre-defined macros on GLSL ES >= + * 3.00 shaders. + */ + if (parser->is_gles && + parser->version >= 300 && + (strcmp("__LINE__", $3) == 0 + || strcmp("__FILE__", $3) == 0 + || strcmp("__VERSION__", $3) == 0 + || strncmp("GL_", $3, 3) == 0)) glcpp_error(& @1, parser, "Built-in (pre-defined)" " macro names cannot be undefined."); @@ -396,13 +420,13 @@ control_line_success: _glcpp_parser_skip_stack_pop (parser, & @1); } NEWLINE | HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE { - if (parser->version_resolved) { + if (parser->version != 0) { glcpp_error(& @1, parser, "#version must appear on the first line"); } _glcpp_parser_handle_version_declaration(parser, $3, NULL, true); } | HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE { - if (parser->version_resolved) { + if (parser->version != 0) { glcpp_error(& @1, parser, "#version must appear on the first line"); } _glcpp_parser_handle_version_declaration(parser, $3, $4, true); @@ -1345,7 +1369,7 @@ glcpp_parser_create(const struct gl_extensions *extensions, gl_api api) parser->extensions = extensions; parser->api = api; - parser->version_resolved = false; + parser->version = 0; parser->has_new_line_number = 0; parser->new_line_number = 1; @@ -2281,10 +2305,10 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio { const struct gl_extensions *extensions = parser->extensions; - if (parser->version_resolved) + if (parser->version != 0) return; - parser->version_resolved = true; + parser->version = version; add_builtin_define (parser, "__VERSION__", version); diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h index d87e6b7..06f3521 100644 --- a/src/compiler/glsl/glcpp/glcpp.h +++ b/src/compiler/glsl/glcpp/glcpp.h @@ -196,7 +196,7 @@ struct glcpp_parser { int error; const struct gl_extensions *extensions; gl_api api; - bool version_resolved; + unsigned version; bool has_new_line_number; int new_line_number; bool has_new_source_number; diff --git a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c index 49e7696..f8ade19 100644 --- a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c +++ b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c @@ -1,3 +1,4 @@ +#version 300 es #undef __LINE__ #undef __FILE__ #undef __VERSION__ diff --git a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected index 3b736df..498dc0f 100644 --- a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected +++ b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected @@ -1,6 +1,7 @@ -0:1(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. 0:2(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. 0:3(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. +0:4(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. +#version 300 es diff --git a/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c new file mode 100644 index 0000000..e3af10d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c @@ -0,0 +1,4 @@ +#version 110 +#undef __LINE__ +#undef __FILE__ +#undef __VERSION__ diff --git a/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected new file mode 100644 index 0000000..cd0071f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected @@ -0,0 +1,4 @@ +#version 110 + + + diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll index 11711ee..c31958b 100644 --- a/src/compiler/glsl/glsl_lexer.ll +++ b/src/compiler/glsl/glsl_lexer.ll @@ -348,10 +348,10 @@ isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_mul usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY); /* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */ -samplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY); -isamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY); -usamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY); -samplerCubeArrayShadow KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW); +samplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY); +isamplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY); +usamplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY); +samplerCubeArrayShadow KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW); samplerExternalOES { if (yyextra->OES_EGL_image_external_enable) diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy index 3885688..c72f119 100644 --- a/src/compiler/glsl/glsl_parser.yy +++ b/src/compiler/glsl/glsl_parser.yy @@ -1784,8 +1784,10 @@ type_qualifier: * variables. As only outputs can be declared as invariant, an invariant * output from one shader stage will still match an input of a subsequent * stage without the input being declared as invariant." + * + * On the desktop side, this text first appears in GLSL 4.30. */ - if (state->es_shader && state->language_version >= 300 && $$.flags.q.in) + if (state->is_version(430, 300) && $$.flags.q.in) _mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs"); } | interpolation_qualifier type_qualifier diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index 93716c4..3809270 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -586,6 +586,13 @@ public: return this->u.state_slots; } + inline bool is_interpolation_flat() const + { + return this->data.interpolation == INTERP_QUALIFIER_FLAT || + this->type->contains_integer() || + this->type->contains_double(); + } + inline bool is_name_ralloced() const { return this->name != ir_variable::tmp_name; diff --git a/src/compiler/glsl/ir_hv_accept.cpp b/src/compiler/glsl/ir_hv_accept.cpp index 213992a..5cc6a34 100644 --- a/src/compiler/glsl/ir_hv_accept.cpp +++ b/src/compiler/glsl/ir_hv_accept.cpp @@ -147,7 +147,7 @@ ir_expression::accept(ir_hierarchical_visitor *v) goto done; case visit_stop: - return s; + return visit_stop; } } diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp b/src/compiler/glsl/ir_set_program_inouts.cpp index 183b13b..bca1e0a 100644 --- a/src/compiler/glsl/ir_set_program_inouts.cpp +++ b/src/compiler/glsl/ir_set_program_inouts.cpp @@ -260,15 +260,19 @@ ir_set_program_inouts_visitor::try_mark_partial_variable(ir_variable *var, * lowering passes (do_vec_index_to_swizzle() gets rid of indexing into * vectors, and lower_packed_varyings() gets rid of structs that occur in * varyings). + * + * However, we don't use varying packing in all cases - tessellation + * shaders bypass it. This means we'll see varying structs and arrays + * of structs here. For now, we just give up so the caller marks the + * entire variable as used. */ if (!(type->is_matrix() || (type->is_array() && (type->fields.array->is_numeric() || type->fields.array->is_boolean())))) { - assert(!"Unexpected indexing in ir_set_program_inouts"); - /* For safety in release builds, in case we ever encounter unexpected - * indexing, give up and let the caller mark the whole variable as used. + /* If we don't know how to handle this case, give up and let the + * caller mark the whole variable as used. */ return false; } diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp index 5a5adc0..ddf6aa2 100644 --- a/src/compiler/glsl/link_varyings.cpp +++ b/src/compiler/glsl/link_varyings.cpp @@ -308,7 +308,25 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog, return; } - if (!prog->IsES && input->data.invariant != output->data.invariant) { + /* The GLSL 4.30 and GLSL ES 3.00 specifications say: + * + * "As only outputs need be declared with invariant, an output from + * one shader stage will still match an input of a subsequent stage + * without the input being declared as invariant." + * + * while GLSL 4.20 says: + * + * "For variables leaving one shader and coming into another shader, + * the invariant keyword has to be used in both shaders, or a link + * error will result." + * + * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says: + * + * "The invariance of varyings that are declared in both the vertex + * and fragment shaders must match." + */ + if (input->data.invariant != output->data.invariant && + prog->Version < (prog->IsES ? 300 : 430)) { linker_error(prog, "%s shader output `%s' %s invariant qualifier, " "but %s shader input %s invariant qualifier\n", @@ -1610,7 +1628,8 @@ varying_matches::compute_packing_class(const ir_variable *var) unsigned packing_class = var->data.centroid | (var->data.sample << 1) | (var->data.patch << 2); packing_class *= 4; - packing_class += var->data.interpolation; + packing_class += var->is_interpolation_flat() + ? unsigned(INTERP_QUALIFIER_FLAT) : var->data.interpolation; return packing_class; } diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 6379ed2..02b3e00 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -3687,6 +3687,18 @@ create_shader_variable(struct gl_shader_program *shProg, if (in->data.mode == ir_var_system_value && in->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) { out->name = ralloc_strdup(shProg, "gl_VertexID"); + } else if ((in->data.mode == ir_var_shader_out && + in->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) || + (in->data.mode == ir_var_system_value && + in->data.location == SYSTEM_VALUE_TESS_LEVEL_OUTER)) { + out->name = ralloc_strdup(shProg, "gl_TessLevelOuter"); + type = glsl_type::get_array_instance(glsl_type::float_type, 4); + } else if ((in->data.mode == ir_var_shader_out && + in->data.location == VARYING_SLOT_TESS_LEVEL_INNER) || + (in->data.mode == ir_var_system_value && + in->data.location == SYSTEM_VALUE_TESS_LEVEL_INNER)) { + out->name = ralloc_strdup(shProg, "gl_TessLevelInner"); + type = glsl_type::get_array_instance(glsl_type::float_type, 2); } else { out->name = ralloc_strdup(shProg, name); } @@ -3839,6 +3851,9 @@ add_interface_variables(struct gl_shader_program *shProg, continue; }; + if (var->data.patch) + loc_bias = int(VARYING_SLOT_PATCH0); + /* Skip packed varyings, packed varyings are handled separately * by add_packed_varyings. */ diff --git a/src/compiler/glsl/lower_packed_varyings.cpp b/src/compiler/glsl/lower_packed_varyings.cpp index 41edada..1e7a8c2 100644 --- a/src/compiler/glsl/lower_packed_varyings.cpp +++ b/src/compiler/glsl/lower_packed_varyings.cpp @@ -273,11 +273,11 @@ lower_packed_varyings_visitor::run(struct gl_shader *shader) continue; /* This lowering pass is only capable of packing floats and ints - * together when their interpolation mode is "flat". Therefore, to be - * safe, caller should ensure that integral varyings always use flat - * interpolation, even when this is not required by GLSL. + * together when their interpolation mode is "flat". Treat integers as + * being flat when the interpolation mode is none. */ assert(var->data.interpolation == INTERP_QUALIFIER_FLAT || + var->data.interpolation == INTERP_QUALIFIER_NONE || !var->type->contains_integer()); /* Clone the variable for program resource list before @@ -607,7 +607,7 @@ lower_packed_varyings_visitor::get_packed_varying_deref( if (this->packed_varyings[slot] == NULL) { char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name); const glsl_type *packed_type; - if (unpacked_var->data.interpolation == INTERP_QUALIFIER_FLAT) + if (unpacked_var->is_interpolation_flat()) packed_type = glsl_type::ivec4_type; else packed_type = glsl_type::vec4_type; @@ -627,7 +627,8 @@ lower_packed_varyings_visitor::get_packed_varying_deref( packed_var->data.centroid = unpacked_var->data.centroid; packed_var->data.sample = unpacked_var->data.sample; packed_var->data.patch = unpacked_var->data.patch; - packed_var->data.interpolation = unpacked_var->data.interpolation; + packed_var->data.interpolation = packed_type == glsl_type::ivec4_type + ? unsigned(INTERP_QUALIFIER_FLAT) : unpacked_var->data.interpolation; packed_var->data.location = location; packed_var->data.precision = unpacked_var->data.precision; packed_var->data.always_active_io = unpacked_var->data.always_active_io; diff --git a/src/compiler/glsl/opt_conditional_discard.cpp b/src/compiler/glsl/opt_conditional_discard.cpp index 1ca8803..a27bead 100644 --- a/src/compiler/glsl/opt_conditional_discard.cpp +++ b/src/compiler/glsl/opt_conditional_discard.cpp @@ -72,7 +72,14 @@ opt_conditional_discard_visitor::visit_leave(ir_if *ir) /* Move the condition and replace the ir_if with the ir_discard. */ ir_discard *discard = (ir_discard *) ir->then_instructions.head; - discard->condition = ir->condition; + if (!discard->condition) + discard->condition = ir->condition; + else { + void *ctx = ralloc_parent(ir); + discard->condition = new(ctx) ir_expression(ir_binop_logic_and, + ir->condition, + discard->condition); + } ir->replace_with(discard); progress = true; diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index 11f1e85..83ce35e 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -1079,7 +1079,7 @@ function_key_compare(const void *a, const void *b) const glsl_type *const key2 = (glsl_type *) b; if (key1->length != key2->length) - return 1; + return false; return memcmp(key1->fields.parameters, key2->fields.parameters, (key1->length + 1) * sizeof(*key1->fields.parameters)) == 0; @@ -1090,20 +1090,8 @@ static uint32_t function_key_hash(const void *a) { const glsl_type *const key = (glsl_type *) a; - char hash_key[128]; - unsigned size = 0; - - size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length); - - for (unsigned i = 0; i < key->length; i++) { - if (size >= sizeof(hash_key)) - break; - - size += snprintf(& hash_key[size], sizeof(hash_key) - size, - "%p", (void *) key->fields.structure[i].type); - } - - return _mesa_hash_string(hash_key); + return _mesa_hash_data(key->fields.parameters, + (key->length + 1) * sizeof(*key->fields.parameters)); } const glsl_type * diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 3c8b4e0..158ccc1 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -659,6 +659,122 @@ nir_copy_deref(void *mem_ctx, nir_deref *deref) return NULL; } +/* This is the second step in the recursion. We've found the tail and made a + * copy. Now we need to iterate over all possible leaves and call the + * callback on each one. + */ +static bool +deref_foreach_leaf_build_recur(nir_deref_var *deref, nir_deref *tail, + nir_deref_foreach_leaf_cb cb, void *state) +{ + unsigned length; + union { + nir_deref_array arr; + nir_deref_struct str; + } tmp; + + assert(tail->child == NULL); + switch (glsl_get_base_type(tail->type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + if (glsl_type_is_vector_or_scalar(tail->type)) + return cb(deref, state); + /* Fall Through */ + + case GLSL_TYPE_ARRAY: + tmp.arr.deref.deref_type = nir_deref_type_array; + tmp.arr.deref.type = glsl_get_array_element(tail->type); + tmp.arr.deref_array_type = nir_deref_array_type_direct; + tmp.arr.indirect = NIR_SRC_INIT; + tail->child = &tmp.arr.deref; + + length = glsl_get_length(tail->type); + for (unsigned i = 0; i < length; i++) { + tmp.arr.deref.child = NULL; + tmp.arr.base_offset = i; + if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state)) + return false; + } + return true; + + case GLSL_TYPE_STRUCT: + tmp.str.deref.deref_type = nir_deref_type_struct; + tail->child = &tmp.str.deref; + + length = glsl_get_length(tail->type); + for (unsigned i = 0; i < length; i++) { + tmp.arr.deref.child = NULL; + tmp.str.deref.type = glsl_get_struct_field(tail->type, i); + tmp.str.index = i; + if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state)) + return false; + } + return true; + + default: + unreachable("Invalid type for dereference"); + } +} + +/* This is the first step of the foreach_leaf recursion. In this step we are + * walking to the end of the deref chain and making a copy in the stack as we + * go. This is because we don't want to mutate the deref chain that was + * passed in by the caller. The downside is that this deref chain is on the + * stack and , if the caller wants to do anything with it, they will have to + * make their own copy because this one will go away. + */ +static bool +deref_foreach_leaf_copy_recur(nir_deref_var *deref, nir_deref *tail, + nir_deref_foreach_leaf_cb cb, void *state) +{ + union { + nir_deref_array arr; + nir_deref_struct str; + } c; + + if (tail->child) { + switch (tail->child->deref_type) { + case nir_deref_type_array: + c.arr = *nir_deref_as_array(tail->child); + tail->child = &c.arr.deref; + return deref_foreach_leaf_copy_recur(deref, &c.arr.deref, cb, state); + + case nir_deref_type_struct: + c.str = *nir_deref_as_struct(tail->child); + tail->child = &c.str.deref; + return deref_foreach_leaf_copy_recur(deref, &c.str.deref, cb, state); + + case nir_deref_type_var: + default: + unreachable("Invalid deref type for a child"); + } + } else { + /* We've gotten to the end of the original deref. Time to start + * building our own derefs. + */ + return deref_foreach_leaf_build_recur(deref, tail, cb, state); + } +} + +/** + * This function iterates over all of the possible derefs that can be created + * with the given deref as the head. It then calls the provided callback with + * a full deref for each one. + * + * The deref passed to the callback will be allocated on the stack. You will + * need to make a copy if you want it to hang around. + */ +bool +nir_deref_foreach_leaf(nir_deref_var *deref, + nir_deref_foreach_leaf_cb cb, void *state) +{ + nir_deref_var copy = *deref; + return deref_foreach_leaf_copy_recur(©, ©.deref, cb, state); +} + /* Returns a load_const instruction that represents the constant * initializer for the given deref chain. The caller is responsible for * ensuring that there actually is a constant initializer. diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 9816ed6..dc03918 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1923,6 +1923,10 @@ nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index); nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref); +typedef bool (*nir_deref_foreach_leaf_cb)(nir_deref_var *deref, void *state); +bool nir_deref_foreach_leaf(nir_deref_var *deref, + nir_deref_foreach_leaf_cb cb, void *state); + nir_load_const_instr * nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref); diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 09cdf72..7497efc 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -318,6 +318,25 @@ nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) } static inline nir_ssa_def * +nir_bany_inequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1) +{ + switch (src0->num_components) { + case 1: return nir_ine(b, src0, src1); + case 2: return nir_bany_inequal2(b, src0, src1); + case 3: return nir_bany_inequal3(b, src0, src1); + case 4: return nir_bany_inequal4(b, src0, src1); + default: + unreachable("bad component size"); + } +} + +static inline nir_ssa_def * +nir_bany(nir_builder *b, nir_ssa_def *src) +{ + return nir_bany_inequal(b, src, nir_imm_int(b, 0)); +} + +static inline nir_ssa_def * nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c) { unsigned swizzle[4] = {c, c, c, c}; diff --git a/src/compiler/nir/nir_inline_functions.c b/src/compiler/nir/nir_inline_functions.c index c36748d..cf31e14 100644 --- a/src/compiler/nir/nir_inline_functions.c +++ b/src/compiler/nir/nir_inline_functions.c @@ -25,6 +25,20 @@ #include "nir_builder.h" #include "nir_control_flow.h" +static bool +deref_apply_constant_initializer(nir_deref_var *deref, void *state) +{ + struct nir_builder *b = state; + + nir_load_const_instr *initializer = + nir_deref_get_const_initializer_load(b->shader, deref); + nir_builder_instr_insert(b, &initializer->instr); + + nir_store_deref_var(b, deref, &initializer->def, 0xf); + + return true; +} + static bool inline_function_impl(nir_function_impl *impl, struct set *inlined); static void @@ -174,11 +188,35 @@ inline_functions_block(nir_block *block, nir_builder *b, /* Add copies of all in parameters */ assert(call->num_params == callee_copy->num_params); + b->cursor = nir_before_instr(&call->instr); + + /* Before we insert the copy of the function, we need to lower away + * constant initializers on local variables. This is because constant + * initializers happen (effectively) at the top of the function and, + * since these are about to become locals of the calling function, + * initialization will happen at the top of the caller rather than at + * the top of the callee. This isn't usually a problem, but if we are + * being inlined inside of a loop, it can result in the variable not + * getting re-initialized properly for all loop iterations. + */ + nir_foreach_variable(local, &callee_copy->locals) { + if (!local->constant_initializer) + continue; + + nir_deref_var deref; + deref.deref.deref_type = nir_deref_type_var, + deref.deref.child = NULL; + deref.deref.type = local->type, + deref.var = local; + + nir_deref_foreach_leaf(&deref, deref_apply_constant_initializer, b); + + local->constant_initializer = NULL; + } + exec_list_append(&b->impl->locals, &callee_copy->locals); exec_list_append(&b->impl->registers, &callee_copy->registers); - b->cursor = nir_before_instr(&call->instr); - /* We now need to tie the two functions together using the * parameters. There are two ways we do this: One is to turn the * parameter into a local variable and do a shadow-copy. The other diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 6f86c9f..9479060 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -41,6 +41,8 @@ #define ARR(...) { __VA_ARGS__ } +INTRINSIC(nop, 0, ARR(0), false, 0, 0, 0, xx, xx, xx, + NIR_INTRINSIC_CAN_ELIMINATE) INTRINSIC(load_var, 0, ARR(0), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0) @@ -266,16 +268,16 @@ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, xx, xx, xx, * in shared_atomic_add, etc). * 2: For CompSwap only: the second data parameter. */ -INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) -INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) +INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0) +INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 1, BASE, xx, xx, 0) #define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \ INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \ diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index 4f72cf7..a84fbdf 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -254,6 +254,9 @@ nir_lower_alu_to_scalar_impl(nir_function_impl *impl) lower_alu_instr_scalar(nir_instr_as_alu(instr), &builder); } } + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); } void diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c index d62cec0..937f8b3 100644 --- a/src/compiler/nir/nir_lower_vars_to_ssa.c +++ b/src/compiler/nir/nir_lower_vars_to_ssa.c @@ -471,7 +471,7 @@ lower_copies_to_load_store(struct deref_node *node, return true; } -/* Performs variable renaming by doing a DFS of the dominance tree +/* Performs variable renaming * * This algorithm is very similar to the one outlined in "Efficiently * Computing Static Single Assignment Form and the Control Dependence @@ -479,133 +479,132 @@ lower_copies_to_load_store(struct deref_node *node, * SSA def on the stack per block. */ static bool -rename_variables_block(nir_block *block, struct lower_variables_state *state) +rename_variables(struct lower_variables_state *state) { nir_builder b; nir_builder_init(&b, state->impl); - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - switch (intrin->intrinsic) { - case nir_intrinsic_load_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); - - if (node == NULL) { - /* If we hit this path then we are referencing an invalid - * value. Most likely, we unrolled something and are - * reading past the end of some array. In any case, this - * should result in an undefined value. - */ - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->shader, - intrin->num_components, - intrin->dest.ssa.bit_size); - - nir_instr_insert_before(&intrin->instr, &undef->instr); - nir_instr_remove(&intrin->instr); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&undef->def)); + nir_foreach_block(block, state->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) continue; - } - if (!node->lower_to_ssa) - continue; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); + + if (node == NULL) { + /* If we hit this path then we are referencing an invalid + * value. Most likely, we unrolled something and are + * reading past the end of some array. In any case, this + * should result in an undefined value. + */ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(state->shader, + intrin->num_components, + intrin->dest.ssa.bit_size); + + nir_instr_insert_before(&intrin->instr, &undef->instr); + nir_instr_remove(&intrin->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&undef->def)); + continue; + } - nir_alu_instr *mov = nir_alu_instr_create(state->shader, - nir_op_imov); - mov->src[0].src = nir_src_for_ssa( - nir_phi_builder_value_get_block_def(node->pb_value, block)); - for (unsigned i = intrin->num_components; i < 4; i++) - mov->src[0].swizzle[i] = 0; + if (!node->lower_to_ssa) + continue; - assert(intrin->dest.is_ssa); + nir_alu_instr *mov = nir_alu_instr_create(state->shader, + nir_op_imov); + mov->src[0].src = nir_src_for_ssa( + nir_phi_builder_value_get_block_def(node->pb_value, block)); + for (unsigned i = intrin->num_components; i < 4; i++) + mov->src[0].swizzle[i] = 0; - mov->dest.write_mask = (1 << intrin->num_components) - 1; - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, - intrin->num_components, - intrin->dest.ssa.bit_size, NULL); + assert(intrin->dest.is_ssa); - nir_instr_insert_before(&intrin->instr, &mov->instr); - nir_instr_remove(&intrin->instr); + mov->dest.write_mask = (1 << intrin->num_components) - 1; + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, + intrin->num_components, + intrin->dest.ssa.bit_size, NULL); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&mov->dest.dest.ssa)); - break; - } - - case nir_intrinsic_store_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); - - if (node == NULL) { - /* Probably an out-of-bounds array store. That should be a - * no-op. */ + nir_instr_insert_before(&intrin->instr, &mov->instr); nir_instr_remove(&intrin->instr); - continue; - } - if (!node->lower_to_ssa) - continue; - - assert(intrin->num_components == - glsl_get_vector_elements(node->type)); - - assert(intrin->src[0].is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&mov->dest.dest.ssa)); + break; + } - nir_ssa_def *new_def; - b.cursor = nir_before_instr(&intrin->instr); + case nir_intrinsic_store_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); - unsigned wrmask = nir_intrinsic_write_mask(intrin); - if (wrmask == (1 << intrin->num_components) - 1) { - /* Whole variable store - just copy the source. Note that - * intrin->num_components and intrin->src[0].ssa->num_components - * may differ. - */ - unsigned swiz[4]; - for (unsigned i = 0; i < 4; i++) - swiz[i] = i < intrin->num_components ? i : 0; + if (node == NULL) { + /* Probably an out-of-bounds array store. That should be a + * no-op. */ + nir_instr_remove(&intrin->instr); + continue; + } - new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, - intrin->num_components, false); - } else { - nir_ssa_def *old_def = - nir_phi_builder_value_get_block_def(node->pb_value, block); - /* For writemasked store_var intrinsics, we combine the newly - * written values with the existing contents of unwritten - * channels, creating a new SSA value for the whole vector. - */ - nir_ssa_def *srcs[4]; - for (unsigned i = 0; i < intrin->num_components; i++) { - if (wrmask & (1 << i)) { - srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); - } else { - srcs[i] = nir_channel(&b, old_def, i); + if (!node->lower_to_ssa) + continue; + + assert(intrin->num_components == + glsl_get_vector_elements(node->type)); + + assert(intrin->src[0].is_ssa); + + nir_ssa_def *new_def; + b.cursor = nir_before_instr(&intrin->instr); + + unsigned wrmask = nir_intrinsic_write_mask(intrin); + if (wrmask == (1 << intrin->num_components) - 1) { + /* Whole variable store - just copy the source. Note that + * intrin->num_components and intrin->src[0].ssa->num_components + * may differ. + */ + unsigned swiz[4]; + for (unsigned i = 0; i < 4; i++) + swiz[i] = i < intrin->num_components ? i : 0; + + new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, + intrin->num_components, false); + } else { + nir_ssa_def *old_def = + nir_phi_builder_value_get_block_def(node->pb_value, block); + /* For writemasked store_var intrinsics, we combine the newly + * written values with the existing contents of unwritten + * channels, creating a new SSA value for the whole vector. + */ + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < intrin->num_components; i++) { + if (wrmask & (1 << i)) { + srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); + } else { + srcs[i] = nir_channel(&b, old_def, i); + } } + new_def = nir_vec(&b, srcs, intrin->num_components); } - new_def = nir_vec(&b, srcs, intrin->num_components); - } - assert(new_def->num_components == intrin->num_components); + assert(new_def->num_components == intrin->num_components); - nir_phi_builder_value_set_block_def(node->pb_value, block, new_def); - nir_instr_remove(&intrin->instr); - break; - } + nir_phi_builder_value_set_block_def(node->pb_value, block, new_def); + nir_instr_remove(&intrin->instr); + break; + } - default: - break; + default: + break; + } } } - for (unsigned i = 0; i < block->num_dom_children; ++i) - rename_variables_block(block->dom_children[i], state); - return true; } @@ -737,7 +736,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) } } - rename_variables_block(nir_start_block(impl), &state); + rename_variables(&state); nir_phi_builder_finish(state.phi_builder); diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 15066c2..7045c95 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -257,7 +257,7 @@ unpack_4x8("unorm") unpack_2x16("half") unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """ -dst.x = (src0.x & 0xffff) | (src0.y >> 16); +dst.x = (src0.x & 0xffff) | (src0.y << 16); """) unop_horiz("pack_uvec4_to_uint", 1, tuint32, 4, tuint32, """ diff --git a/src/compiler/nir/nir_phi_builder.h b/src/compiler/nir/nir_phi_builder.h index edc5302..a4dc18a 100644 --- a/src/compiler/nir/nir_phi_builder.h +++ b/src/compiler/nir/nir_phi_builder.h @@ -44,7 +44,8 @@ * var.pb_val = nir_phi_builder_add_value(pb, var.defs) * * // Visit each block. This needs to visit dominators first; - * // nir_for_each_block() will be ok. + * // nir_foreach_block() will be ok. + * * foreach block: * foreach instruction: * foreach use of variable var: diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index f1bbfd5..0763cb8 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1718,8 +1718,8 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, break; case SpvOpAtomicCompareExchange: - intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); - intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); + intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); break; case SpvOpAtomicISub: @@ -1816,8 +1816,8 @@ fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode, break; case SpvOpAtomicCompareExchange: - src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); - src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); + src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def); + src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); break; /* Fall through */ diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c index d9096f4..62b9056 100644 --- a/src/compiler/spirv/vtn_cfg.c +++ b/src/compiler/spirv/vtn_cfg.c @@ -239,12 +239,12 @@ vtn_get_branch_type(struct vtn_block *block, swcase->fallthrough == block->switch_case); swcase->fallthrough = block->switch_case; return vtn_branch_type_switch_fallthrough; - } else if (block == switch_break) { - return vtn_branch_type_switch_break; } else if (block == loop_break) { return vtn_branch_type_loop_break; } else if (block == loop_cont) { return vtn_branch_type_loop_continue; + } else if (block == switch_break) { + return vtn_branch_type_switch_break; } else { return vtn_branch_type_none; } @@ -443,6 +443,19 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list, vtn_order_case(swtch, case_block->switch_case); } + enum vtn_branch_type branch_type = + vtn_get_branch_type(break_block, switch_case, NULL, + loop_break, loop_cont); + + if (branch_type != vtn_branch_type_none) { + /* It is possible that the break is actually the continue block + * for the containing loop. In this case, we need to bail and let + * the loop parsing code handle the continue properly. + */ + assert(branch_type == vtn_branch_type_loop_continue); + return; + } + block = break_block; continue; } @@ -518,7 +531,7 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, struct vtn_block *pred = vtn_value(b, w[i + 1], vtn_value_type_block)->block; - b->nb.cursor = nir_after_block_before_jump(pred->end_block); + b->nb.cursor = nir_after_instr(&pred->end_nop->instr); vtn_local_store(b, src, nir_deref_var_create(b, phi_var)); } @@ -576,7 +589,9 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list, vtn_foreach_instruction(b, block_start, block_end, handler); - block->end_block = nir_cursor_current_block(b->nb.cursor); + block->end_nop = nir_intrinsic_instr_create(b->nb.shader, + nir_intrinsic_nop); + nir_builder_instr_insert(&b->nb, &block->end_nop->instr); if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) { struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]); diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h index 7f5444e..6f34f09 100644 --- a/src/compiler/spirv/vtn_private.h +++ b/src/compiler/spirv/vtn_private.h @@ -149,8 +149,8 @@ struct vtn_block { /** Points to the switch case started by this block (if any) */ struct vtn_case *switch_case; - /** The last block in this SPIR-V block. */ - nir_block *end_block; + /** Every block ends in a nop intrinsic so that we can find it again */ + nir_intrinsic_instr *end_nop; }; struct vtn_function { diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index fe2494b..459e573 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -889,81 +889,9 @@ vtn_get_builtin_location(struct vtn_builder *b, } static void -var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, - const struct vtn_decoration *dec, void *void_var) +apply_var_decoration(struct vtn_builder *b, nir_variable *nir_var, + const struct vtn_decoration *dec) { - struct vtn_variable *vtn_var = void_var; - - /* Handle decorations that apply to a vtn_variable as a whole */ - switch (dec->decoration) { - case SpvDecorationBinding: - vtn_var->binding = dec->literals[0]; - return; - case SpvDecorationDescriptorSet: - vtn_var->descriptor_set = dec->literals[0]; - return; - default: - break; - } - - /* Now we handle decorations that apply to a particular nir_variable */ - nir_variable *nir_var = vtn_var->var; - if (val->value_type == vtn_value_type_access_chain) { - assert(val->access_chain->length == 0); - assert(val->access_chain->var == void_var); - assert(member == -1); - } else { - assert(val->value_type == vtn_value_type_type); - if (member != -1) - nir_var = vtn_var->members[member]; - } - - /* Location is odd in that it can apply in three different cases: To a - * non-split variable, to a whole split variable, or to one structure - * member of a split variable. - */ - if (dec->decoration == SpvDecorationLocation) { - unsigned location = dec->literals[0]; - bool is_vertex_input; - if (b->shader->stage == MESA_SHADER_FRAGMENT && - vtn_var->mode == vtn_variable_mode_output) { - is_vertex_input = false; - location += FRAG_RESULT_DATA0; - } else if (b->shader->stage == MESA_SHADER_VERTEX && - vtn_var->mode == vtn_variable_mode_input) { - is_vertex_input = true; - location += VERT_ATTRIB_GENERIC0; - } else if (vtn_var->mode == vtn_variable_mode_input || - vtn_var->mode == vtn_variable_mode_output) { - is_vertex_input = false; - location += VARYING_SLOT_VAR0; - } else { - assert(!"Location must be on input or output variable"); - } - - if (nir_var) { - /* This handles the member and lone variable cases */ - nir_var->data.location = location; - nir_var->data.explicit_location = true; - } else { - /* This handles the structure member case */ - assert(vtn_var->members); - unsigned length = - glsl_get_length(glsl_without_array(vtn_var->type->type)); - for (unsigned i = 0; i < length; i++) { - vtn_var->members[i]->data.location = location; - vtn_var->members[i]->data.explicit_location = true; - location += - glsl_count_attribute_slots(vtn_var->members[i]->interface_type, - is_vertex_input); - } - } - return; - } - - if (nir_var == NULL) - return; - switch (dec->decoration) { case SpvDecorationRelaxedPrecision: break; /* FIXME: Do nothing with this for now. */ @@ -1080,6 +1008,99 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, } } +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_var) +{ + struct vtn_variable *vtn_var = void_var; + + /* Handle decorations that apply to a vtn_variable as a whole */ + switch (dec->decoration) { + case SpvDecorationBinding: + vtn_var->binding = dec->literals[0]; + return; + case SpvDecorationDescriptorSet: + vtn_var->descriptor_set = dec->literals[0]; + return; + default: + break; + } + + if (val->value_type == vtn_value_type_access_chain) { + assert(val->access_chain->length == 0); + assert(val->access_chain->var == void_var); + assert(member == -1); + } else { + assert(val->value_type == vtn_value_type_type); + } + + /* Location is odd. If applied to a split structure, we have to walk the + * whole thing and accumulate the location. It's easier to handle as a + * special case. + */ + if (dec->decoration == SpvDecorationLocation) { + unsigned location = dec->literals[0]; + bool is_vertex_input; + if (b->shader->stage == MESA_SHADER_FRAGMENT && + vtn_var->mode == vtn_variable_mode_output) { + is_vertex_input = false; + location += FRAG_RESULT_DATA0; + } else if (b->shader->stage == MESA_SHADER_VERTEX && + vtn_var->mode == vtn_variable_mode_input) { + is_vertex_input = true; + location += VERT_ATTRIB_GENERIC0; + } else if (vtn_var->mode == vtn_variable_mode_input || + vtn_var->mode == vtn_variable_mode_output) { + is_vertex_input = false; + location += VARYING_SLOT_VAR0; + } else { + assert(!"Location must be on input or output variable"); + } + + if (vtn_var->var) { + /* This handles the member and lone variable cases */ + vtn_var->var->data.location = location; + vtn_var->var->data.explicit_location = true; + } else { + /* This handles the structure member case */ + assert(vtn_var->members); + unsigned length = + glsl_get_length(glsl_without_array(vtn_var->type->type)); + for (unsigned i = 0; i < length; i++) { + vtn_var->members[i]->data.location = location; + vtn_var->members[i]->data.explicit_location = true; + location += + glsl_count_attribute_slots(vtn_var->members[i]->interface_type, + is_vertex_input); + } + } + return; + } else { + if (vtn_var->var) { + assert(member <= 0); + apply_var_decoration(b, vtn_var->var, dec); + } else if (vtn_var->members) { + if (member >= 0) { + assert(vtn_var->members); + apply_var_decoration(b, vtn_var->members[member], dec); + } else { + unsigned length = + glsl_get_length(glsl_without_array(vtn_var->type->type)); + for (unsigned i = 0; i < length; i++) + apply_var_decoration(b, vtn_var->members[i], dec); + } + } else { + /* A few variables, those with external storage, have no actual + * nir_variables associated with them. Fortunately, all decorations + * we care about for those variables are on the type only. + */ + assert(vtn_var->mode == vtn_variable_mode_ubo || + vtn_var->mode == vtn_variable_mode_ssbo || + vtn_var->mode == vtn_variable_mode_push_constant); + } + } +} + /* Tries to compute the size of an interface block based on the strides and * offsets that are provided to us in the SPIR-V source. */ @@ -1173,7 +1194,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvStorageClassPushConstant: var->mode = vtn_variable_mode_push_constant; assert(b->shader->num_uniforms == 0); - b->shader->num_uniforms = vtn_type_block_size(var->type) * 4; + b->shader->num_uniforms = vtn_type_block_size(var->type); break; case SpvStorageClassInput: var->mode = vtn_variable_mode_input; diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index fe33ecd..c6d4fb5 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -242,6 +242,15 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, return NULL; break; + case __DRI_ATTRIB_MAX_PBUFFER_WIDTH: + _eglSetConfigKey(&base, EGL_MAX_PBUFFER_WIDTH, + _EGL_MAX_PBUFFER_WIDTH); + break; + case __DRI_ATTRIB_MAX_PBUFFER_HEIGHT: + _eglSetConfigKey(&base, EGL_MAX_PBUFFER_HEIGHT, + _EGL_MAX_PBUFFER_HEIGHT); + break; + default: key = dri2_to_egl_attribute_map[attrib]; if (key != 0) @@ -320,6 +329,15 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, surface_type &= ~EGL_PIXMAP_BIT; } + /* No support for pbuffer + MSAA for now. + * + * XXX TODO: pbuffer + MSAA does not work and causes crashes. + * See QT bugreport: https://bugreports.qt.io/browse/QTBUG-47509 + */ + if (base.Samples) { + surface_type &= ~EGL_PBUFFER_BIT; + } + conf->base.SurfaceType |= surface_type; return conf; @@ -758,64 +776,99 @@ dri2_create_screen(_EGLDisplay *disp) /** * Called via eglInitialize(), GLX_drv->API.Initialize(). + * + * This must be guaranteed to be called exactly once, even if eglInitialize is + * called many times (without a eglTerminate in between). */ static EGLBoolean dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp) { + EGLBoolean ret = EGL_FALSE; + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + + /* In the case where the application calls eglMakeCurrent(context1), + * eglTerminate, then eglInitialize again (without a call to eglReleaseThread + * or eglMakeCurrent(NULL) before that), dri2_dpy structure is still + * initialized, as we need it to be able to free context1 correctly. + * + * It would probably be safest to forcibly release the display with + * dri2_display_release, to make sure the display is reinitialized correctly. + * However, the EGL spec states that we need to keep a reference to the + * current context (so we cannot call dri2_make_current(NULL)), and therefore + * we would leak context1 as we would be missing the old display connection + * to free it up correctly. + */ + if (dri2_dpy) { + dri2_dpy->ref_count++; + return EGL_TRUE; + } + /* not until swrast_dri is supported */ if (disp->Options.UseFallback) return EGL_FALSE; + /* Nothing to initialize for a test only display */ + if (disp->Options.TestOnly) + return EGL_TRUE; + switch (disp->Platform) { #ifdef HAVE_SURFACELESS_PLATFORM case _EGL_PLATFORM_SURFACELESS: - if (disp->Options.TestOnly) - return EGL_TRUE; - return dri2_initialize_surfaceless(drv, disp); + ret = dri2_initialize_surfaceless(drv, disp); + break; #endif - #ifdef HAVE_X11_PLATFORM case _EGL_PLATFORM_X11: - if (disp->Options.TestOnly) - return EGL_TRUE; - return dri2_initialize_x11(drv, disp); + ret = dri2_initialize_x11(drv, disp); + break; #endif - #ifdef HAVE_DRM_PLATFORM case _EGL_PLATFORM_DRM: - if (disp->Options.TestOnly) - return EGL_TRUE; - return dri2_initialize_drm(drv, disp); + ret = dri2_initialize_drm(drv, disp); + break; #endif #ifdef HAVE_WAYLAND_PLATFORM case _EGL_PLATFORM_WAYLAND: - if (disp->Options.TestOnly) - return EGL_TRUE; - return dri2_initialize_wayland(drv, disp); + ret = dri2_initialize_wayland(drv, disp); + break; #endif #ifdef HAVE_ANDROID_PLATFORM case _EGL_PLATFORM_ANDROID: - if (disp->Options.TestOnly) - return EGL_TRUE; - return dri2_initialize_android(drv, disp); + ret = dri2_initialize_android(drv, disp); + break; #endif - default: _eglLog(_EGL_WARNING, "No EGL platform enabled."); return EGL_FALSE; } + + if (ret) { + dri2_dpy = dri2_egl_display(disp); + + if (!dri2_dpy) { + return EGL_FALSE; + } + + dri2_dpy->ref_count++; + } + + return ret; } /** - * Called via eglTerminate(), drv->API.Terminate(). + * Decrement display reference count, and free up display if necessary. */ -static EGLBoolean -dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp) -{ +static void +dri2_display_release(_EGLDisplay *disp) { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); unsigned i; - _eglReleaseDisplayResources(drv, disp); + assert(dri2_dpy->ref_count > 0); + dri2_dpy->ref_count--; + + if (dri2_dpy->ref_count > 0) + return; + _eglCleanupDisplay(disp); if (dri2_dpy->own_dri_screen) @@ -870,6 +923,21 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp) } free(dri2_dpy); disp->DriverData = NULL; +} + +/** + * Called via eglTerminate(), drv->API.Terminate(). + * + * This must be guaranteed to be called exactly once, even if eglTerminate is + * called many times (without a eglInitialize in between). + */ +static EGLBoolean +dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp) +{ + /* Release all non-current Context/Surfaces. */ + _eglReleaseDisplayResources(drv, disp); + + dri2_display_release(disp); return EGL_TRUE; } @@ -1189,10 +1257,16 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf, _EGLSurface *tmp_dsurf, *tmp_rsurf; __DRIdrawable *ddraw, *rdraw; __DRIcontext *cctx; + EGLBoolean unbind; + + if (!dri2_dpy) + return _eglError(EGL_NOT_INITIALIZED, "eglMakeCurrent"); /* make new bindings */ - if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf)) + if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf)) { + /* _eglBindContext already sets the EGL error (in _eglCheckMakeCurrent) */ return EGL_FALSE; + } /* flush before context switch */ if (old_ctx && dri2_drv->glFlush) @@ -1207,14 +1281,21 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf, dri2_dpy->core->unbindContext(old_cctx); } - if ((cctx == NULL && ddraw == NULL && rdraw == NULL) || - dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) { + unbind = (cctx == NULL && ddraw == NULL && rdraw == NULL); + + if (unbind || dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) { if (old_dsurf) drv->API.DestroySurface(drv, disp, old_dsurf); if (old_rsurf) drv->API.DestroySurface(drv, disp, old_rsurf); - if (old_ctx) + + if (!unbind) + dri2_dpy->ref_count++; + if (old_ctx) { + EGLDisplay old_disp = _eglGetDisplayHandle(old_ctx->Resource.Display); drv->API.DestroyContext(drv, disp, old_ctx); + dri2_display_release(old_disp); + } return EGL_TRUE; } else { @@ -1232,7 +1313,11 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf, _eglPutSurface(old_rsurf); _eglPutContext(old_ctx); - return EGL_FALSE; + /* dri2_dpy->core->bindContext failed. We cannot tell for sure why, but + * setting the error to EGL_BAD_MATCH is surely better than leaving it + * as EGL_SUCCESS. + */ + return _eglError(EGL_BAD_MATCH, "eglMakeCurrent"); } } diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 925294b..6099bc2 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -80,8 +80,6 @@ #include "eglimage.h" #include "eglsync.h" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) - struct wl_buffer; struct dri2_egl_driver @@ -177,6 +175,10 @@ struct dri2_egl_display const __DRI2interopExtension *interop; int fd; + /* dri2_initialize/dri2_terminate increment/decrement this count, so does + * dri2_make_current (tracks if there are active contexts/surfaces). */ + int ref_count; + int own_device; int swap_available; int invalidate_available; diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index 87bd19b..351fd0f 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -29,7 +29,7 @@ #include <errno.h> #include <dlfcn.h> - +#include <fcntl.h> #if 0 #include <xf86drm.h> #endif @@ -170,6 +170,8 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf) static EGLBoolean droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf) { + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + /* To avoid blocking other EGL calls, release the display mutex before * we enter droid_window_enqueue_buffer() and re-acquire the mutex upon * return. @@ -200,6 +202,12 @@ droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_sur dri2_surf->buffer = NULL; mtx_lock(&disp->Mutex); + + if (dri2_surf->dri_image) { + dri2_dpy->image->destroyImage(dri2_surf->dri_image); + dri2_surf->dri_image = NULL; + } + return EGL_TRUE; } @@ -291,6 +299,8 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT, dri2_surf->base.GLColorspace); + if (!config) + goto cleanup_surface; if (dri2_dpy->dri2) { dri2_surf->dri_drawable = @@ -384,6 +394,9 @@ get_back_bo(struct dri2_egl_surface *dri2_surf) int fourcc, pitch; int offset = 0, fd; + if (dri2_surf->dri_image) + return 0; + if (!dri2_surf->buffer) return -1; @@ -442,10 +455,8 @@ droid_image_get_buffers(__DRIdrawable *driDrawable, static EGLBoolean droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) { - struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw); - _EGLContext *ctx; if (dri2_surf->base.Type != EGL_WINDOW_BIT) return EGL_TRUE; @@ -986,7 +997,7 @@ droid_open_device(void) fd = -1; } - return (fd >= 0) ? dup(fd) : -1; + return (fd >= 0) ? fcntl(fd, F_DUPFD_CLOEXEC, 3) : -1; } /* support versions < JellyBean */ @@ -1134,6 +1145,7 @@ cleanup_device: close(dri2_dpy->fd); cleanup_display: free(dri2_dpy); + dpy->DriverData = NULL; return _eglError(EGL_NOT_INITIALIZED, err); } diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c index 9373496..1ce282f 100644 --- a/src/egl/drivers/dri2/platform_drm.c +++ b/src/egl/drivers/dri2/platform_drm.c @@ -726,5 +726,6 @@ cleanup: close(fd); free(dri2_dpy); + disp->DriverData = NULL; return EGL_FALSE; } diff --git a/src/egl/drivers/dri2/platform_surfaceless.c b/src/egl/drivers/dri2/platform_surfaceless.c index e0ddc12..323a8d7 100644 --- a/src/egl/drivers/dri2/platform_surfaceless.c +++ b/src/egl/drivers/dri2/platform_surfaceless.c @@ -157,6 +157,7 @@ cleanup_driver: close(dri2_dpy->fd); cleanup_display: free(dri2_dpy); + disp->DriverData = NULL; return _eglError(EGL_NOT_INITIALIZED, err); } diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index ff0d5c8..1a295d5 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -118,6 +118,13 @@ resize_callback(struct wl_egl_window *wl_win, void *data) (*dri2_dpy->flush->invalidate)(dri2_surf->dri_drawable); } +static void +destroy_window_callback(void *data) +{ + struct dri2_egl_surface *dri2_surf = data; + dri2_surf->wl_win = NULL; +} + /** * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface(). */ @@ -159,6 +166,7 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp, dri2_surf->wl_win->private = dri2_surf; dri2_surf->wl_win->resize_callback = resize_callback; + dri2_surf->wl_win->destroy_window_callback = destroy_window_callback; dri2_surf->base.Width = -1; dri2_surf->base.Height = -1; @@ -257,8 +265,11 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) if (dri2_surf->throttle_callback) wl_callback_destroy(dri2_surf->throttle_callback); - dri2_surf->wl_win->private = NULL; - dri2_surf->wl_win->resize_callback = NULL; + if (dri2_surf->wl_win) { + dri2_surf->wl_win->private = NULL; + dri2_surf->wl_win->resize_callback = NULL; + dri2_surf->wl_win->destroy_window_callback = NULL; + } free(surf); @@ -1238,6 +1249,7 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp) wl_event_queue_destroy(dri2_dpy->wl_queue); cleanup_dpy: free(dri2_dpy); + disp->DriverData = NULL; return EGL_FALSE; } @@ -1883,6 +1895,7 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp) wl_event_queue_destroy(dri2_dpy->wl_queue); cleanup_dpy: free(dri2_dpy); + disp->DriverData = NULL; return EGL_FALSE; } diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index c0a4005..792cabe 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -1231,6 +1231,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp) xcb_disconnect(dri2_dpy->conn); cleanup_dpy: free(dri2_dpy); + disp->DriverData = NULL; return EGL_FALSE; } @@ -1302,15 +1303,13 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp) dri2_dpy->screen = DefaultScreen(dpy); } - if (xcb_connection_has_error(dri2_dpy->conn)) { + if (!dri2_dpy->conn || xcb_connection_has_error(dri2_dpy->conn)) { _eglLog(_EGL_WARNING, "DRI3: xcb_connect failed"); goto cleanup_dpy; } - if (dri2_dpy->conn) { - if (!dri3_x11_connect(dri2_dpy)) - goto cleanup_conn; - } + if (!dri3_x11_connect(dri2_dpy)) + goto cleanup_conn; if (!dri2_load_driver_dri3(disp)) goto cleanup_conn; @@ -1338,10 +1337,8 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp) disp->Extensions.WL_bind_wayland_display = EGL_TRUE; #endif - if (dri2_dpy->conn) { - if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false)) - goto cleanup_configs; - } + if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false)) + goto cleanup_configs; dri2_dpy->loader_dri3_ext.core = dri2_dpy->core; dri2_dpy->loader_dri3_ext.image_driver = dri2_dpy->image_driver; @@ -1370,6 +1367,7 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp) xcb_disconnect(dri2_dpy->conn); cleanup_dpy: free(dri2_dpy); + disp->DriverData = NULL; return EGL_FALSE; } @@ -1467,6 +1465,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp) xcb_disconnect(dri2_dpy->conn); cleanup_dpy: free(dri2_dpy); + disp->DriverData = NULL; return EGL_FALSE; } diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c b/src/egl/drivers/dri2/platform_x11_dri3.c index 9363a8a..69bfcd8 100644 --- a/src/egl/drivers/dri2/platform_x11_dri3.c +++ b/src/egl/drivers/dri2/platform_x11_dri3.c @@ -103,6 +103,17 @@ egl_dri3_get_dri_context(struct loader_dri3_drawable *draw) return dri2_ctx->dri_context; } +static __DRIscreen * +egl_dri3_get_dri_screen(struct loader_dri3_drawable *draw) +{ + _EGLContext *ctx = _eglGetCurrentContext(); + struct dri2_egl_context *dri2_ctx; + if (!ctx) + return NULL; + dri2_ctx = dri2_egl_context(ctx); + return dri2_egl_display(dri2_ctx->base.Resource.Display)->dri_screen; +} + static void egl_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags) { @@ -119,6 +130,7 @@ static struct loader_dri3_vtable egl_dri3_vtable = { .set_drawable_size = egl_dri3_set_drawable_size, .in_current_context = egl_dri3_in_current_context, .get_dri_context = egl_dri3_get_dri_context, + .get_dri_screen = egl_dri3_get_dri_screen, .flush_drawable = egl_dri3_flush_drawable, .show_fps = NULL, }; diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 4700dbe..127ca1e 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -627,7 +627,9 @@ eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_list, _EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv); - if (!config && !disp->Extensions.MESA_configless_context) + if (config) + _EGL_CHECK_CONFIG(disp, conf, EGL_NO_CONTEXT, drv); + else if (!disp->Extensions.MESA_configless_context) RETURN_EGL_ERROR(disp, EGL_BAD_CONFIG, EGL_NO_CONTEXT); if (!share && share_list != EGL_NO_CONTEXT) @@ -1937,7 +1939,7 @@ _eglLockDisplayInterop(EGLDisplay dpy, EGLContext context, return MESA_GLINTEROP_SUCCESS; } -int +PUBLIC int MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context, struct mesa_glinterop_device_info *out) { @@ -1959,7 +1961,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context, return ret; } -int +PUBLIC int MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context, struct mesa_glinterop_export_in *in, struct mesa_glinterop_export_out *out) diff --git a/src/egl/main/egldefines.h b/src/egl/main/egldefines.h index 13a7563..6090fc3 100644 --- a/src/egl/main/egldefines.h +++ b/src/egl/main/egldefines.h @@ -34,6 +34,8 @@ #ifndef EGLDEFINES_INCLUDED #define EGLDEFINES_INCLUDED +#include "util/macros.h" + #ifdef __cplusplus extern "C" { #endif @@ -48,7 +50,6 @@ extern "C" { #define _EGL_VENDOR_STRING "Mesa Project" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) #define MIN2(A, B) (((A) < (B)) ? (A) : (B)) #ifdef __cplusplus diff --git a/src/egl/main/eglglobals.c b/src/egl/main/eglglobals.c index 938d953..1be6797 100644 --- a/src/egl/main/eglglobals.c +++ b/src/egl/main/eglglobals.c @@ -53,10 +53,16 @@ struct _egl_global _eglGlobal = /* ClientExtensionsString */ "EGL_EXT_client_extensions" " EGL_EXT_platform_base" +#ifdef HAVE_WAYLAND_PLATFORM " EGL_EXT_platform_wayland" +#endif +#ifdef HAVE_X11_PLATFORM " EGL_EXT_platform_x11" - " EGL_KHR_client_get_all_proc_addresses" +#endif +#ifdef HAVE_DRM_PLATFORM " EGL_MESA_platform_gbm" +#endif + " EGL_KHR_client_get_all_proc_addresses" }; diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c index 17d7907..e8ee49c 100644 --- a/src/egl/main/eglsurface.c +++ b/src/egl/main/eglsurface.c @@ -262,9 +262,13 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type, { const char *func; EGLint renderBuffer = EGL_BACK_BUFFER; - EGLint swapBehavior = EGL_BUFFER_PRESERVED; + EGLint swapBehavior = EGL_BUFFER_DESTROYED; EGLint err; + /* Swap behavior can be preserved only if config supports this. */ + if (conf->SurfaceType & EGL_SWAP_BEHAVIOR_PRESERVED_BIT) + swapBehavior = EGL_BUFFER_PRESERVED; + switch (type) { case EGL_WINDOW_BIT: func = "eglCreateWindowSurface"; diff --git a/src/egl/main/eglsync.c b/src/egl/main/eglsync.c index 33625e9..f325031 100644 --- a/src/egl/main/eglsync.c +++ b/src/egl/main/eglsync.c @@ -26,6 +26,7 @@ **************************************************************************/ +#include <inttypes.h> #include <string.h> #include "eglsync.h" @@ -75,8 +76,8 @@ _eglParseSyncAttribList64(_EGLSync *sync, const EGLAttrib *attrib_list) return EGL_SUCCESS; for (i = 0; attrib_list[i] != EGL_NONE; i++) { - EGLint attr = attrib_list[i++]; - EGLint val = attrib_list[i]; + EGLAttrib attr = attrib_list[i++]; + EGLAttrib val = attrib_list[i]; switch (attr) { case EGL_CL_EVENT_HANDLE_KHR: @@ -92,7 +93,7 @@ _eglParseSyncAttribList64(_EGLSync *sync, const EGLAttrib *attrib_list) } if (err != EGL_SUCCESS) { - _eglLog(_EGL_DEBUG, "bad sync attribute 0x%04x", attr); + _eglLog(_EGL_DEBUG, "bad sync attribute 0x%" PRIxPTR, attr); break; } } diff --git a/src/egl/wayland/wayland-egl/wayland-egl-priv.h b/src/egl/wayland/wayland-egl/wayland-egl-priv.h index f1e3ba2..c91f9cd 100644 --- a/src/egl/wayland/wayland-egl/wayland-egl-priv.h +++ b/src/egl/wayland/wayland-egl/wayland-egl-priv.h @@ -27,6 +27,7 @@ struct wl_egl_window { void *private; void (*resize_callback)(struct wl_egl_window *, void *); + void (*destroy_window_callback)(void *); }; #ifdef __cplusplus diff --git a/src/egl/wayland/wayland-egl/wayland-egl.c b/src/egl/wayland/wayland-egl/wayland-egl.c index 80a5be5..4a4701a 100644 --- a/src/egl/wayland/wayland-egl/wayland-egl.c +++ b/src/egl/wayland/wayland-egl/wayland-egl.c @@ -66,6 +66,7 @@ wl_egl_window_create(struct wl_surface *surface, egl_window->surface = surface; egl_window->private = NULL; egl_window->resize_callback = NULL; + egl_window->destroy_window_callback = NULL; wl_egl_window_resize(egl_window, width, height, 0, 0); egl_window->attached_width = 0; egl_window->attached_height = 0; @@ -76,6 +77,8 @@ wl_egl_window_create(struct wl_surface *surface, WL_EGL_EXPORT void wl_egl_window_destroy(struct wl_egl_window *egl_window) { + if (egl_window->destroy_window_callback) + egl_window->destroy_window_callback(egl_window->private); free(egl_window); } diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 6077976..013bc88 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -310,7 +310,8 @@ C_SOURCES := \ util/u_upload_mgr.h \ util/u_vbuf.c \ util/u_vbuf.h \ - util/u_video.h + util/u_video.h \ + util/u_viewport.h NIR_SOURCES := \ nir/tgsi_to_nir.c \ diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c index c8e1f13..0fbc78e 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c @@ -45,6 +45,7 @@ struct pipe_loader_sw_device { struct util_dl_library *lib; #endif struct sw_winsys *ws; + int fd; }; #define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev) @@ -92,6 +93,7 @@ pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev) sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE; sdev->base.driver_name = "swrast"; sdev->base.ops = &pipe_loader_sw_ops; + sdev->fd = -1; #ifdef GALLIUM_STATIC_TARGETS sdev->dd = &driver_descriptors; @@ -169,6 +171,8 @@ pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd) if (!pipe_loader_sw_probe_init_common(sdev)) goto fail; + sdev->fd = fd; + for (i = 0; sdev->dd->winsys[i].name; i++) { if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) { sdev->ws = sdev->dd->winsys[i].create_winsys(fd); @@ -273,6 +277,11 @@ pipe_loader_sw_release(struct pipe_loader_device **dev) util_dl_close(sdev->lib); #endif +#ifdef HAVE_PIPE_LOADER_KMS + if (sdev->fd != -1) + close(sdev->fd); +#endif + FREE(sdev); *dev = NULL; } diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h index 00f231d..55da21f 100644 --- a/src/gallium/auxiliary/util/u_box.h +++ b/src/gallium/auxiliary/util/u_box.h @@ -140,11 +140,15 @@ static inline void u_box_union_2d(struct pipe_box *dst, const struct pipe_box *a, const struct pipe_box *b) { - dst->x = MIN2(a->x, b->x); - dst->y = MIN2(a->y, b->y); + int x, y; - dst->width = MAX2(a->x + a->width, b->x + b->width) - dst->x; - dst->height = MAX2(a->y + a->height, b->y + b->height) - dst->y; + x = MIN2(a->x, b->x); + y = MIN2(a->y, b->y); + + dst->width = MAX2(a->x + a->width, b->x + b->width) - x; + dst->height = MAX2(a->y + a->height, b->y + b->height) - y; + dst->x = x; + dst->y = y; } /* Aliasing of @dst permitted. */ @@ -152,13 +156,18 @@ static inline void u_box_union_3d(struct pipe_box *dst, const struct pipe_box *a, const struct pipe_box *b) { - dst->x = MIN2(a->x, b->x); - dst->y = MIN2(a->y, b->y); - dst->z = MIN2(a->z, b->z); - - dst->width = MAX2(a->x + a->width, b->x + b->width) - dst->x; - dst->height = MAX2(a->y + a->height, b->y + b->height) - dst->y; - dst->depth = MAX2(a->z + a->depth, b->z + b->depth) - dst->z; + int x, y, z; + + x = MIN2(a->x, b->x); + y = MIN2(a->y, b->y); + z = MIN2(a->z, b->z); + + dst->width = MAX2(a->x + a->width, b->x + b->width) - x; + dst->height = MAX2(a->y + a->height, b->y + b->height) - y; + dst->depth = MAX2(a->z + a->depth, b->z + b->depth) - z; + dst->x = x; + dst->y = y; + dst->z = z; } static inline boolean diff --git a/src/gallium/auxiliary/util/u_format_r11g11b10f.h b/src/gallium/auxiliary/util/u_format_r11g11b10f.h index 218822b..074783a 100644 --- a/src/gallium/auxiliary/util/u_format_r11g11b10f.h +++ b/src/gallium/auxiliary/util/u_format_r11g11b10f.h @@ -194,7 +194,7 @@ static inline float uf10_to_f32(uint16_t val) if (exponent == 0) { if (mantissa != 0) { - const float scale = 1.0 / (1 << 20); + const float scale = 1.0 / (1 << 19); f32.f = scale * mantissa; } } diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 8916a96..55343e8 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -831,7 +831,7 @@ align(int value, int alignment) static inline uint64_t align64(uint64_t value, unsigned alignment) { - return (value + alignment - 1) & ~(alignment - 1); + return (value + alignment - 1) & ~((uint64_t)alignment - 1); } /** diff --git a/src/gallium/auxiliary/util/u_viewport.h b/src/gallium/auxiliary/util/u_viewport.h new file mode 100644 index 0000000..a731b34 --- /dev/null +++ b/src/gallium/auxiliary/util/u_viewport.h @@ -0,0 +1,59 @@ +/************************************************************************** + * + * Copyright 2016 Ilia Mirkin. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_VIEWPORT_H +#define U_VIEWPORT_H + +#include "c99_compat.h" +#include "pipe/p_state.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void +util_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz, + float *zmin, float *zmax) +{ + float a, b; + if (halfz) { + a = vp->translate[2]; + b = vp->translate[2] + vp->scale[2]; + } else { + a = vp->translate[2] - vp->scale[2]; + b = vp->translate[2] + vp->scale[2]; + } + + *zmin = a < b ? a : b; + *zmax = a < b ? b : a; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c b/src/gallium/auxiliary/vl/vl_winsys_dri3.c index f7f572e..493e645 100644 --- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c +++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c @@ -89,6 +89,7 @@ dri3_free_front_buffer(struct vl_dri3_screen *scrn, { xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence); xshmfence_unmap_shm(buffer->shm_fence); + pipe_resource_reference(&buffer->texture, NULL); FREE(buffer); } diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c b/src/gallium/auxiliary/vl/vl_winsys_drm.c index 6a759ae..df8809c 100644 --- a/src/gallium/auxiliary/vl/vl_winsys_drm.c +++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c @@ -26,6 +26,7 @@ **************************************************************************/ #include <assert.h> +#include <fcntl.h> #include "pipe/p_screen.h" #include "pipe-loader/pipe_loader.h" @@ -47,7 +48,7 @@ vl_drm_screen_create(int fd) if (!vscreen) return NULL; - if (fd < 0 || (new_fd = dup(fd)) < 0) + if (fd < 0 || (new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3)) < 0) goto free_screen; if (pipe_loader_drm_probe_fd(&vscreen->dev, new_fd)) diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index dcb6dfb..bf787d1 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -1472,7 +1472,7 @@ static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val) { return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK; } -#define A3XX_RB_DEPTH_CONTROL_BF_ENABLE 0x00000080 +#define A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE 0x00000080 #define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000 #define REG_A3XX_RB_DEPTH_CLEAR 0x00002101 diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 6723941..51fe4bc 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -158,6 +158,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, }; + if (fd3_needs_manual_clipping(ctx->prog.vp, ctx->rasterizer)) + emit.key.ucp_enables = ctx->rasterizer->clip_plane_enable; + fixup_shader_state(ctx, &emit.key); unsigned dirty = ctx->dirty; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 4a5242a..45bab58 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -31,6 +31,7 @@ #include "util/u_memory.h" #include "util/u_helpers.h" #include "util/u_format.h" +#include "util/u_viewport.h" #include "freedreno_resource.h" #include "freedreno_query_hw.h" @@ -529,7 +530,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1])); } - if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) { + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control; if (fp->writes_pos) { val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z; @@ -538,6 +539,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (fp->has_kill) { val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; } + if (!ctx->rasterizer->depth_clip) { + val |= A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE; + } OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, val); } @@ -561,20 +565,24 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer) ->gras_cl_clip_cntl; + uint8_t planes = ctx->rasterizer->clip_plane_enable; val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE); val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD | A3XX_GRAS_CL_CLIP_CNTL_WCOORD); - /* TODO only use if prog doesn't use clipvertex/clipdist */ - val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES( - MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6)); + if (!emit->key.ucp_enables) + val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES( + MIN2(util_bitcount(planes), 6)); OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, val); } - if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) { + if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_UCP)) { uint32_t planes = ctx->rasterizer->clip_plane_enable; int count = 0; + if (emit->key.ucp_enables) + planes = 0; + while (planes && count < 6) { int i = ffs(planes) - 1; @@ -615,19 +623,35 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, val); } - if (dirty & FD_DIRTY_SCISSOR) { + if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER | FD_DIRTY_VIEWPORT)) { struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); + int minx = scissor->minx; + int miny = scissor->miny; + int maxx = scissor->maxx; + int maxy = scissor->maxy; + + /* Unfortunately there is no separate depth clip disable, only an all + * or nothing deal. So when we disable clipping, we must handle the + * viewport clip via scissors. + */ + if (!ctx->rasterizer->depth_clip) { + struct pipe_viewport_state *vp = &ctx->viewport; + minx = MAX2(minx, (int)floorf(vp->translate[0] - fabsf(vp->scale[0]))); + miny = MAX2(miny, (int)floorf(vp->translate[1] - fabsf(vp->scale[1]))); + maxx = MIN2(maxx, (int)ceilf(vp->translate[0] + fabsf(vp->scale[0]))); + maxy = MIN2(maxy, (int)ceilf(vp->translate[1] + fabsf(vp->scale[1]))); + } OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); - OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) | - A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny)); - OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) | - A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1)); - - ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx); - ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny); - ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx); - ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(minx) | + A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(miny)); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(maxx - 1) | + A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(maxy - 1)); + + ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, minx); + ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, miny); + ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, maxx); + ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, maxy); } if (dirty & FD_DIRTY_VIEWPORT) { @@ -641,6 +665,30 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2])); } + if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) { + float zmin, zmax; + int depth = 24; + if (ctx->framebuffer.zsbuf) { + depth = util_format_get_component_bits( + pipe_surface_format(ctx->framebuffer.zsbuf), + UTIL_FORMAT_COLORSPACE_ZS, 0); + } + util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz, + &zmin, &zmax); + + OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2); + if (depth == 32) { + OUT_RING(ring, (uint32_t)(zmin * 0xffffffff)); + OUT_RING(ring, (uint32_t)(zmax * 0xffffffff)); + } else if (depth == 16) { + OUT_RING(ring, (uint32_t)(zmin * 0xffff)); + OUT_RING(ring, (uint32_t)(zmax * 0xffff)); + } else { + OUT_RING(ring, (uint32_t)(zmin * 0xffffff)); + OUT_RING(ring, (uint32_t)(zmax * 0xffffff)); + } + } + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; int nr_cbufs = pfb->nr_cbufs; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 8152f8f..e9059ce 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -28,6 +28,7 @@ #include "pipe/p_state.h" #include "util/u_string.h" +#include "util/u_math.h" #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_format.h" @@ -85,6 +86,20 @@ fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso) delete_shader_stateobj(so); } +bool +fd3_needs_manual_clipping(const struct fd3_shader_stateobj *so, + const struct pipe_rasterizer_state *rast) +{ + uint64_t outputs = ir3_shader_outputs(so->shader); + + return (!rast->depth_clip || + util_bitcount(rast->clip_plane_enable) > 6 || + outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) | + (1ULL << VARYING_SLOT_CLIP_DIST0) | + (1ULL << VARYING_SLOT_CLIP_DIST1))); +} + + static void emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index b3fcc0c..b95df4c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -44,4 +44,7 @@ void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, void fd3_prog_init(struct pipe_context *pctx); +bool fd3_needs_manual_clipping(const struct fd3_shader_stateobj *, + const struct pipe_rasterizer_state *); + #endif /* FD3_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h index d9a7bb5..aeb61e7 100644 --- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h +++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h @@ -1376,7 +1376,7 @@ static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val) { return ((val) << A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK; } -#define A4XX_RB_DEPTH_CONTROL_BF_ENABLE 0x00000080 +#define A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE 0x00000080 #define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE 0x00010000 #define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS 0x00020000 #define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000 @@ -3145,6 +3145,8 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val) #define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000 #define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000 +#define A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE 0x00010000 +#define A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 0x00020000 #define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 #define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003 diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 00e985d..8b350ae 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -31,6 +31,7 @@ #include "util/u_memory.h" #include "util/u_helpers.h" #include "util/u_format.h" +#include "util/u_viewport.h" #include "freedreno_resource.h" #include "freedreno_query_hw.h" @@ -544,12 +545,14 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1])); } - if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) { + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); bool fragz = fp->has_kill | fp->writes_pos; + bool clamp = !ctx->rasterizer->depth_clip; OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, zsa->rb_depth_control | + COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) | COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) | COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS)); @@ -636,6 +639,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); } + if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) { + float zmin, zmax; + int depth = 24; + if (ctx->framebuffer.zsbuf) { + depth = util_format_get_component_bits( + pipe_surface_format(ctx->framebuffer.zsbuf), + UTIL_FORMAT_COLORSPACE_ZS, 0); + } + util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz, + &zmin, &zmax); + + OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2); + if (depth == 32) { + OUT_RING(ring, fui(zmin)); + OUT_RING(ring, fui(zmax)); + } else if (depth == 16) { + OUT_RING(ring, (uint32_t)(zmin * 0xffff)); + OUT_RING(ring, (uint32_t)(zmax * 0xffff)); + } else { + OUT_RING(ring, (uint32_t)(zmin * 0xffffff)); + OUT_RING(ring, (uint32_t)(zmax * 0xffffff)); + } + } + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; unsigned n = pfb->nr_cbufs; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c index 7456c63..b3a4292 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c @@ -98,7 +98,8 @@ fd4_rasterizer_state_create(struct pipe_context *pctx, so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET; if (!cso->depth_clip) - so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE; + so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE | + A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE; if (cso->clip_halfz) so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index ee0018f..fc423ec 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -469,6 +469,12 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin) debug_printf("\n"); } +uint64_t +ir3_shader_outputs(const struct ir3_shader *so) +{ + return so->nir->info.outputs_written; +} + /* This has to reach into the fd_context a bit more than the rest of * ir3, but it needs to be aligned with the compiler, so both agree * on which const regs hold what. And the logic is identical between diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index c17a76b..f430b6b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -272,6 +272,7 @@ void ir3_shader_destroy(struct ir3_shader *shader); struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key, struct pipe_debug_callback *debug); void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin); +uint64_t ir3_shader_outputs(const struct ir3_shader *so); struct fd_ringbuffer; struct fd_context; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 80e0990..3a23a9a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -726,7 +726,7 @@ void CodeEmitterGK110::emitIMAD(const Instruction *i) { uint8_t addOp = - (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg()); + i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1); emitForm_21(i, 0x100, 0xa00); @@ -773,7 +773,7 @@ CodeEmitterGK110::emitNOT(const Instruction *i) break; case FILE_MEMORY_CONST: code[1] |= 0x4 << 28; - setCAddress14(i->src(1)); + setCAddress14(i->src(0)); break; default: assert(0); @@ -1321,15 +1321,12 @@ void CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) { code[0] = 0x00000002 | ((qOp & 1) << 31); - code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12); + code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall defId(i->def(0), 2); srcId(i->src(0), 10); srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23); - if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) - code[1] |= 1 << 9; // dall - emitPredicate(i); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index e62d807..d3e1708 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -1682,7 +1682,7 @@ CodeEmitterGM107::emitNOT() void CodeEmitterGM107::emitIADD() { - if (!longIMMD(insn->src(1))) { + if (insn->src(1).getFile() != FILE_IMMEDIATE) { switch (insn->src(1).getFile()) { case FILE_GPR: emitInsn(0x5c100000); @@ -1707,6 +1707,7 @@ CodeEmitterGM107::emitIADD() emitX (0x2b); } else { emitInsn(0x1c000000); + emitNEG (0x38, insn->src(0)); emitSAT (0x36); emitX (0x35); emitCC (0x34); @@ -2300,6 +2301,7 @@ CodeEmitterGM107::emitAL2P() { emitInsn (0xefa00000); emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1); + emitPRED (0x2c); emitO (0x20); emitField(0x14, 11, insn->src(0).get()->reg.data.offset); emitGPR (0x08, insn->src(0).getIndirect(0)); @@ -2523,7 +2525,7 @@ CodeEmitterGM107::emitTEX() if (insn->tex.rIndirectSrc >= 0) { emitInsn (0xdeb80000); - emitField(0x35, 2, lodm); + emitField(0x25, 2, lodm); emitField(0x24, 1, insn->tex.useOffsets == 1); } else { emitInsn (0xc0380000); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 5d68e99..ca10848 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -2112,7 +2112,7 @@ makeInstructionLong(Instruction *insn) insn->encSize = 8; for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) { - fn->bbArray[i]->binPos += 4; + fn->bbArray[i]->binPos += adj; } fn->binSize += adj; insn->bb->binSize += adj; @@ -2164,9 +2164,16 @@ replaceExitWithModifier(Function *func) return; } } - epilogue->binSize -= 8; - func->binSize -= 8; + + int adj = epilogue->getExit()->encSize; + epilogue->binSize -= adj; + func->binSize -= adj; delete_Instruction(func->getProgram(), epilogue->getExit()); + + // There may be BB's that are laid out after the exit block + for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) { + func->bbArray[i]->binPos -= adj; + } } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index bc94285..be5ee4f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -736,9 +736,15 @@ CodeEmitterNVC0::emitUADD(const Instruction *i) void CodeEmitterNVC0::emitIMAD(const Instruction *i) { + uint8_t addOp = + i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1); + assert(i->encSize == 8); emitForm_A(i, HEX64(20000000, 00000003)); + assert(addOp != 3); + code[0] |= addOp << 8; + if (isSignedType(i->dType)) code[0] |= 1 << 7; if (isSignedType(i->sType)) @@ -749,10 +755,6 @@ CodeEmitterNVC0::emitIMAD(const Instruction *i) if (i->flagsDef >= 0) code[1] |= 1 << 16; if (i->flagsSrc >= 0) code[1] |= 1 << 23; - if (i->src(2).mod.neg()) code[0] |= 0x10; - if (i->src(1).mod.neg() ^ - i->src(0).mod.neg()) code[0] |= 0x20; - if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) code[0] |= 1 << 6; } @@ -1356,16 +1358,13 @@ CodeEmitterNVC0::emitTXQ(const TexInstruction *i) void CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) { - code[0] = 0x00000000 | (laneMask << 6); + code[0] = 0x00000200 | (laneMask << 6); // dall code[1] = 0x48000000 | qOp; defId(i->def(0), 14); srcId(i->src(0), 20); srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26); - if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) - code[0] |= 1 << 9; // dall - emitPredicate(i); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index beb7b53..899a5cd 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -182,6 +182,7 @@ public: // mask of used components of source s unsigned int srcMask(unsigned int s) const; + unsigned int texOffsetMask() const; SrcRegister getSrc(unsigned int s) const { @@ -234,6 +235,35 @@ private: const struct tgsi_full_instruction *insn; }; +unsigned int Instruction::texOffsetMask() const +{ + const struct tgsi_instruction_texture *tex = &insn->Texture; + assert(insn->Instruction.Texture); + + switch (tex->Texture) { + case TGSI_TEXTURE_BUFFER: + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_1D_ARRAY: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + return 0x1; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_2D_ARRAY: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_2D_MSAA: + case TGSI_TEXTURE_2D_ARRAY_MSAA: + return 0x3; + case TGSI_TEXTURE_3D: + return 0x7; + default: + assert(!"Unexpected texture target"); + return 0xf; + } +} + unsigned int Instruction::srcMask(unsigned int s) const { unsigned int mask = insn->Dst[0].Register.WriteMask; @@ -942,6 +972,9 @@ private: int inferSysValDirection(unsigned sn) const; bool scanDeclaration(const struct tgsi_full_declaration *); bool scanInstruction(const struct tgsi_full_instruction *); + void scanInstructionSrc(const Instruction& insn, + const Instruction::SrcRegister& src, + unsigned mask); void scanProperty(const struct tgsi_full_property *); void scanImmediate(const struct tgsi_full_immediate *); @@ -1351,6 +1384,61 @@ inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const insn.getSrc(0).getFile() == TGSI_FILE_INPUT; } +void Source::scanInstructionSrc(const Instruction& insn, + const Instruction::SrcRegister& src, + unsigned mask) +{ + if (src.getFile() == TGSI_FILE_TEMPORARY) { + if (src.isIndirect(0)) + indirectTempArrays.insert(src.getArrayId()); + } else + if (src.getFile() == TGSI_FILE_BUFFER || + src.getFile() == TGSI_FILE_IMAGE || + (src.getFile() == TGSI_FILE_MEMORY && + memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { + info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? + 0x1 : 0x2; + } else + if (src.getFile() == TGSI_FILE_OUTPUT) { + if (src.isIndirect(0)) { + // We don't know which one is accessed, just mark everything for + // reading. This is an extremely unlikely occurrence. + for (unsigned i = 0; i < info->numOutputs; ++i) + info->out[i].oread = 1; + } else { + info->out[src.getIndex(0)].oread = 1; + } + } + if (src.getFile() != TGSI_FILE_INPUT) + return; + + if (src.isIndirect(0)) { + for (unsigned i = 0; i < info->numInputs; ++i) + info->in[i].mask = 0xf; + } else { + const int i = src.getIndex(0); + for (unsigned c = 0; c < 4; ++c) { + if (!(mask & (1 << c))) + continue; + int k = src.getSwizzle(c); + if (k <= TGSI_SWIZZLE_W) + info->in[i].mask |= 1 << k; + } + switch (info->in[i].sn) { + case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_PRIMID: + case TGSI_SEMANTIC_FOG: + info->in[i].mask &= 0x1; + break; + case TGSI_SEMANTIC_PCOORD: + info->in[i].mask &= 0x3; + break; + default: + break; + } + } +} + bool Source::scanInstruction(const struct tgsi_full_instruction *inst) { Instruction insn(inst); @@ -1383,66 +1471,19 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) indirectTempArrays.insert(dst.getArrayId()); } else if (dst.getFile() == TGSI_FILE_BUFFER || - dst.getFile() == TGSI_FILE_IMAGE || + dst.getFile() == TGSI_FILE_IMAGE || (dst.getFile() == TGSI_FILE_MEMORY && memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { info->io.globalAccess |= 0x2; } } - for (unsigned s = 0; s < insn.srcCount(); ++s) { - Instruction::SrcRegister src = insn.getSrc(s); - if (src.getFile() == TGSI_FILE_TEMPORARY) { - if (src.isIndirect(0)) - indirectTempArrays.insert(src.getArrayId()); - } else - if (src.getFile() == TGSI_FILE_BUFFER || - src.getFile() == TGSI_FILE_IMAGE || - (src.getFile() == TGSI_FILE_MEMORY && - memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { - info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? - 0x1 : 0x2; - } else - if (src.getFile() == TGSI_FILE_OUTPUT) { - if (src.isIndirect(0)) { - // We don't know which one is accessed, just mark everything for - // reading. This is an extremely unlikely occurrence. - for (unsigned i = 0; i < info->numOutputs; ++i) - info->out[i].oread = 1; - } else { - info->out[src.getIndex(0)].oread = 1; - } - } - if (src.getFile() != TGSI_FILE_INPUT) - continue; - unsigned mask = insn.srcMask(s); + for (unsigned s = 0; s < insn.srcCount(); ++s) + scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s)); + + for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s) + scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask()); - if (src.isIndirect(0)) { - for (unsigned i = 0; i < info->numInputs; ++i) - info->in[i].mask = 0xf; - } else { - const int i = src.getIndex(0); - for (unsigned c = 0; c < 4; ++c) { - if (!(mask & (1 << c))) - continue; - int k = src.getSwizzle(c); - if (k <= TGSI_SWIZZLE_W) - info->in[i].mask |= 1 << k; - } - switch (info->in[i].sn) { - case TGSI_SEMANTIC_PSIZE: - case TGSI_SEMANTIC_PRIMID: - case TGSI_SEMANTIC_FOG: - info->in[i].mask &= 0x1; - break; - case TGSI_SEMANTIC_PCOORD: - info->in[i].mask &= 0x3; - break; - default: - break; - } - } - } return true; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp index 23414d5..b1076cf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp @@ -287,7 +287,10 @@ private: bb.push(node); - while (bb.getSize()) { + while (bb.getSize() || cross.getSize()) { + if (bb.getSize() == 0) + cross.moveTo(bb); + node = reinterpret_cast<Graph::Node *>(bb.pop().u.p); assert(node); if (!node->visit(sequence)) @@ -314,9 +317,6 @@ private: } } nodes[count++] = node; - - if (bb.getSize() == 0) - cross.moveTo(bb); } } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 6a6b44c..4e60b1c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -750,6 +750,16 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) i->tex.rIndirectSrc = 0; i->tex.sIndirectSrc = -1; } + // Move the indirect reference to right after the coords + else if (i->tex.rIndirectSrc >= 0 && chipset >= NVISA_GM107_CHIPSET) { + Value *hnd = i->getIndirectR(); + + i->setIndirectR(NULL); + i->moveSources(arg, 1); + i->setSrc(arg, hnd); + i->tex.rIndirectSrc = 0; + i->tex.sIndirectSrc = -1; + } } else // (nvc0) generate and move the tsc/tic/array source to the front if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { @@ -823,7 +833,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) for (n = 0; n < i->tex.useOffsets; n++) { for (c = 0; c < 2; ++c) { if ((n % 2) == 0 && c == 0) - offs[n / 2] = i->offset[n][c].get(); + bld.mkMov(offs[n / 2] = bld.getScratch(), i->offset[n][c].get()); else bld.mkOp3(OP_INSBF, TYPE_U32, offs[n / 2], @@ -2056,6 +2066,13 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su) base = 0; } + if (ind) { + Value *ptr; + ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ind, bld.mkImm(su->tex.r)); + ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7)); + su->setIndirectR(ptr); + } + // get surface coordinates for (c = 0; c < arg; ++c) src[c] = su->getSrc(c); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 6b52d7b..bf260bb 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -1902,8 +1902,10 @@ GCRA::resolveSplitsAndMerges() // their registers should be identical. if (v->getInsn()->op == OP_PHI || v->getInsn()->op == OP_UNION) { Instruction *phi = v->getInsn(); - for (int phis = 0; phi->srcExists(phis); ++phis) + for (int phis = 0; phi->srcExists(phis); ++phis) { phi->getSrc(phis)->join = v; + phi->getSrc(phis)->reg.data.id = v->reg.data.id; + } } reg += v->reg.size; } diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index 7b0d074..1c71534 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -127,6 +127,8 @@ nv30_render_draw_elements(struct vbuf_render *render, struct nouveau_pushbuf *push = nv30->screen->base.pushbuf; unsigned i; + pipe_mutex_lock(nv30->screen->base.push_mutex); + BEGIN_NV04(push, NV30_3D(VTXBUF(0)), r->vertex_info.num_attribs); for (i = 0; i < r->vertex_info.num_attribs; i++) { PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP, @@ -134,8 +136,10 @@ nv30_render_draw_elements(struct vbuf_render *render, NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1); } - if (!nv30_state_validate(nv30, ~0, false)) + if (!nv30_state_validate(nv30, ~0, false)) { + pipe_mutex_unlock(nv30->screen->base.push_mutex); return; + } BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); PUSH_DATA (push, r->prim); @@ -160,6 +164,8 @@ nv30_render_draw_elements(struct vbuf_render *render, BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP); PUSH_RESET(push, BUFCTX_VTXTMP); + + pipe_mutex_unlock(nv30->screen->base.push_mutex); } static void @@ -172,6 +178,8 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr) unsigned ps = fn + (pn ? 1 : 0); unsigned i; + pipe_mutex_lock(nv30->screen->base.push_mutex); + BEGIN_NV04(push, NV30_3D(VTXBUF(0)), r->vertex_info.num_attribs); for (i = 0; i < r->vertex_info.num_attribs; i++) { PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP, @@ -179,8 +187,10 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr) NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1); } - if (!nv30_state_validate(nv30, ~0, false)) + if (!nv30_state_validate(nv30, ~0, false)) { + pipe_mutex_unlock(nv30->screen->base.push_mutex); return; + } BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); PUSH_DATA (push, r->prim); @@ -197,6 +207,8 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr) BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP); PUSH_RESET(push, BUFCTX_VTXTMP); + + pipe_mutex_unlock(nv30->screen->base.push_mutex); } static void @@ -383,6 +395,8 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nv30_render_validate(nv30); + pipe_mutex_unlock(nv30->screen->base.push_mutex); + if (nv30->draw_dirty & NV30_NEW_VIEWPORT) draw_set_viewport_states(draw, 0, 1, &nv30->viewport); if (nv30->draw_dirty & NV30_NEW_RASTERIZER) @@ -448,6 +462,8 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (transfer[i]) pipe_buffer_unmap(pipe, transfer[i]); + pipe_mutex_lock(nv30->screen->base.push_mutex); + nv30->draw_dirty = 0; nv30_state_release(nv30); } diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c index 6de61bc..fd21f99 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c @@ -38,6 +38,8 @@ nv30_fragprog_upload(struct nv30_context *nv30) struct nv30_fragprog *fp = nv30->fragprog.program; struct pipe_context *pipe = &nv30->base.pipe; + pipe_mutex_unlock(nv->screen->push_mutex); + if (unlikely(!fp->buffer)) fp->buffer = pipe_buffer_create(pipe->screen, 0, 0, fp->insn_len * 4); @@ -60,6 +62,8 @@ nv30_fragprog_upload(struct nv30_context *nv30) if (nv04_resource(fp->buffer)->domain != NOUVEAU_BO_VRAM) nouveau_buffer_migrate(nv, nv04_resource(fp->buffer), NOUVEAU_BO_VRAM); + + pipe_mutex_lock(nv->screen->push_mutex); } void diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state.c b/src/gallium/drivers/nouveau/nv30/nv30_state.c index fd604c2..43ecaac 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_state.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_state.c @@ -379,8 +379,9 @@ nv30_set_framebuffer_state(struct pipe_context *pipe, struct nv30_miptree *zeta_mt = nv30_miptree(fb->zsbuf->texture); if (color_mt->swizzled != zeta_mt->swizzled || - (util_format_get_blocksize(fb->zsbuf->format) > 2) != - (util_format_get_blocksize(fb->cbufs[0]->format) > 2)) { + (color_mt->swizzled && + (util_format_get_blocksize(fb->zsbuf->format) > 2) != + (util_format_get_blocksize(fb->cbufs[0]->format) > 2))) { nv30->framebuffer.zsbuf = NULL; debug_printf("Mismatched color and zeta formats, ignoring zeta.\n"); } diff --git a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c index 9ecbcd1..24fa3bb 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c @@ -115,7 +115,8 @@ nv30_transfer_rect_fragprog(struct nv30_context *nv30) struct pipe_context *pipe = &nv30->base.pipe; if (!fp) { - nv30->blit_fp = pipe_buffer_create(pipe->screen, 0, 0, 12 * 4); + nv30->blit_fp = + pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STAGING, 12 * 4); if (nv30->blit_fp) { struct pipe_transfer *transfer; u32 *map = pipe_buffer_map(pipe, nv30->blit_fp, diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c index 34d32d1..6ea5a47 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c @@ -161,7 +161,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = F3(A, R11G11B10_FLOAT, R11G11B10_FLOAT, R, G, B, xx, FLOAT, BF10GF11RF11, IB), F3(A, L8_UNORM, R8_UNORM, R, R, R, xx, UNORM, R8, TB), - F3(A, L8_SRGB, R8_UNORM, R, R, R, xx, UNORM, R8, TB), + F3(A, L8_SRGB, NONE, R, R, R, xx, UNORM, R8, T), F3(A, L8_SNORM, R8_SNORM, R, R, R, xx, SNORM, R8, TC), I3(A, L8_SINT, R8_SINT, R, R, R, xx, SINT, R8, TR), I3(A, L8_UINT, R8_UINT, R, R, R, xx, UINT, R8, TR), @@ -203,7 +203,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = C4(A, L4A4_UNORM, NONE, R, R, R, G, UNORM, G4R4, T), C4(A, L8A8_UNORM, RG8_UNORM, R, R, R, G, UNORM, G8R8, T), C4(A, L8A8_SNORM, RG8_SNORM, R, R, R, G, SNORM, G8R8, T), - C4(A, L8A8_SRGB, RG8_UNORM, R, R, R, G, UNORM, G8R8, T), + C4(A, L8A8_SRGB, NONE, R, R, R, G, UNORM, G8R8, T), C4(A, L8A8_SINT, RG8_SINT, R, R, R, G, SINT, G8R8, T), C4(A, L8A8_UINT, RG8_UINT, R, R, R, G, UINT, G8R8, T), C4(A, L16A16_UNORM, RG16_UNORM, R, R, R, G, UNORM, R16_G16, T), diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index c764f5c..383bee3 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -307,6 +307,9 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info, const unsigned r = pso->output[i].register_index; b = pso->output[i].output_buffer; + if (r >= info->numOutputs) + continue; + for (c = 0; c < pso->output[i].num_components; ++c) so->map[base[b] + p + c] = info->out[r].slot[s + c]; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index 65f7338..a67a390 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -1,5 +1,6 @@ #include "util/u_format.h" +#include "util/u_viewport.h" #include "nv50/nv50_context.h" @@ -265,8 +266,12 @@ nv50_validate_viewport(struct nv50_context *nv50) PUSH_DATAf(push, vpt->scale[1]); PUSH_DATAf(push, vpt->scale[2]); - zmin = vpt->translate[2] - fabsf(vpt->scale[2]); - zmax = vpt->translate[2] + fabsf(vpt->scale[2]); + /* If the halfz setting ever changes, the viewports will also get + * updated. The rast will get updated before the validate function has a + * chance to hit, so we can just use it directly without an atom + * dependency. + */ + util_viewport_zmin_zmax(vpt, nv50->rast->pipe.clip_halfz, &zmin, &zmax); #ifdef NV50_SCISSORS_CLIPPING BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 28d6fec..12d5b0e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -514,10 +514,8 @@ nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx, NOUVEAU_DRV_STAT(&nvc0->screen->base, resource_validate_count, count); } -static void -nvc0_context_get_sample_position(struct pipe_context *pipe, - unsigned sample_count, unsigned sample_index, - float *xy) +const void * +nvc0_get_sample_locations(unsigned sample_count) { static const uint8_t ms1[1][2] = { { 0x8, 0x8 } }; static const uint8_t ms2[2][2] = { @@ -549,8 +547,22 @@ nvc0_context_get_sample_position(struct pipe_context *pipe, case 8: ptr = ms8; break; default: assert(0); - return; /* bad sample count -> undefined locations */ + return NULL; /* bad sample count -> undefined locations */ } + return ptr; +} + +static void +nvc0_context_get_sample_position(struct pipe_context *pipe, + unsigned sample_count, unsigned sample_index, + float *xy) +{ + const uint8_t (*ptr)[2]; + + ptr = nvc0_get_sample_locations(sample_count); + if (!ptr) + return; + xy[0] = ptr[sample_index][0] * 0.0625f; xy[1] = ptr[sample_index][1] * 0.0625f; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 8d27300..ff5467c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -278,6 +278,7 @@ struct pipe_context *nvc0_create(struct pipe_screen *, void *, unsigned flags); void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *, bool on_flush); void nvc0_default_kick_notify(struct nouveau_pushbuf *); +const void *nvc0_get_sample_locations(unsigned); /* nvc0_draw.c */ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index aba9511..e1ff3b7 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -500,11 +500,14 @@ nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info, for (i = 0; i < pso->num_outputs; ++i) { unsigned s = pso->output[i].start_component; unsigned p = pso->output[i].dst_offset; + const unsigned r = pso->output[i].register_index; b = pso->output[i].output_buffer; + if (r >= info->numOutputs) + continue; + for (c = 0; c < pso->output[i].num_components; ++c) - tfb->varying_index[b][p++] = - info->out[pso->output[i].register_index].slot[s + c]; + tfb->varying_index[b][p++] = info->out[r].slot[s + c]; tfb->varying_count[b] = MAX2(tfb->varying_count[b], p); tfb->stream[b] = pso->output[i].stream; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index ad44e85..ce59484 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -2,6 +2,7 @@ #include "util/u_format.h" #include "util/u_framebuffer.h" #include "util/u_math.h" +#include "util/u_viewport.h" #include "nvc0/nvc0_context.h" @@ -211,6 +212,19 @@ nvc0_validate_fb(struct nvc0_context *nvc0) PUSH_DATAf(push, xy[1]); } + if (screen->base.class_3d >= GM200_3D_CLASS) { + const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms); + uint32_t val[4] = {}; + + for (i = 0; i < 16; i++) { + val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0); + val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4); + } + + BEGIN_NVC0(push, SUBC_3D(0x11e0), 4); + PUSH_DATAp(push, val, 4); + } + if (serialize) IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); @@ -316,8 +330,12 @@ nvc0_validate_viewport(struct nvc0_context *nvc0) PUSH_DATA (push, (w << 16) | x); PUSH_DATA (push, (h << 16) | y); - zmin = vp->translate[2] - fabsf(vp->scale[2]); - zmax = vp->translate[2] + fabsf(vp->scale[2]); + /* If the halfz setting ever changes, the viewports will also get + * updated. The rast will get updated before the validate function has a + * chance to hit, so we can just use it directly without an atom + * dependency. + */ + util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax); BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2); PUSH_DATAf(push, zmin); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index 1a5d8ec..efd90de 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -589,13 +589,11 @@ void nvc0_validate_textures(struct nvc0_context *nvc0) PUSH_DATA (nvc0->base.pushbuf, 0); } - if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) { - /* Invalidate all CP textures because they are aliased. */ - for (int i = 0; i < nvc0->num_textures[5]; i++) - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CP_TEX(i)); - nvc0->textures_dirty[5] = ~0; - nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES; - } + /* Invalidate all CP textures because they are aliased. */ + for (int i = 0; i < nvc0->num_textures[5]; i++) + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEX(i)); + nvc0->textures_dirty[5] = ~0; + nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES; } bool @@ -709,11 +707,9 @@ void nvc0_validate_samplers(struct nvc0_context *nvc0) PUSH_DATA (nvc0->base.pushbuf, 0); } - if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) { - /* Invalidate all CP samplers because they are aliased. */ - nvc0->samplers_dirty[5] = ~0; - nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS; - } + /* Invalidate all CP samplers because they are aliased. */ + nvc0->samplers_dirty[5] = ~0; + nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS; } /* Upload the "diagonal" entries for the possible texture sources ($t == $s). diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index cae621c..3d20c68 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -835,7 +835,7 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) /* Queue things up to let the macros write params to the driver constbuf */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, 512); + PUSH_DATA (push, 2048); PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0)); PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0)); BEGIN_NVC0(push, NVC0_3D(CB_POS), 1); @@ -981,7 +981,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (nvc0->vertprog->vp.need_draw_parameters) { PUSH_SPACE(push, 9); BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, 512); + PUSH_DATA (push, 2048); PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0)); PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0)); if (!info->indirect) { diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index a1d1d3e..d172d73 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -237,7 +237,13 @@ nve4_compute_validate_samplers(struct nvc0_context *nvc0) BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1); PUSH_DATA (nvc0->base.pushbuf, 0); } + + /* Invalidate all 3D samplers because they are aliased. */ + for (int s = 0; s < 5; s++) + nvc0->samplers_dirty[s] = ~0; + nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS; } + /* (Code duplicated at bottom for various non-convincing reasons. * E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC * entries to avoid a subchannel switch. @@ -690,6 +696,14 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0) } nvc0->state.num_textures[s] = nvc0->num_textures[s]; + + /* Invalidate all 3D textures because they are aliased. */ + for (int s = 0; s < 5; s++) { + for (int i = 0; i < nvc0->num_textures[s]; i++) + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); + nvc0->textures_dirty[s] = ~0; + } + nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; } diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d100a9d..341f406 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -190,7 +190,7 @@ static boolean r300_setup_atoms(struct r300_context* r300) /* VAP. */ R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(pvs_flush, 2); - R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9); + R300_INIT_ATOM(vap_invariant_state, is_r500 || !has_tcl ? 11 : 9); R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); @@ -314,6 +314,14 @@ static void r300_init_states(struct pipe_context *pipe) if (r300->screen->caps.is_r500) { OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0); + } else if (!r300->screen->caps.has_tcl) { + /* RSxxx: + * Static VAP setup since r300_emit_vs_state() is never called. + */ + OUT_CB_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) | + R300_PVS_NUM_CNTLRS(5) | + R300_PVS_NUM_FPUS(2) | + R300_PVS_VF_MAX_VTX_NUM(5)); } END_CB; } diff --git a/src/gallium/drivers/radeon/cayman_msaa.c b/src/gallium/drivers/radeon/cayman_msaa.c index 9412e89..6d6998e 100644 --- a/src/gallium/drivers/radeon/cayman_msaa.c +++ b/src/gallium/drivers/radeon/cayman_msaa.c @@ -143,6 +143,13 @@ void cayman_init_msaa(struct pipe_context *ctx) void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples) { switch (nr_samples) { + default: + case 1: + radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0); + radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0); + radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0); + radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0); + break; case 2: radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]); radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]); diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 23ddff4..8de3b18 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -703,8 +703,9 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen } rtex->cmask_buffer = (struct r600_resource *) - pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, - PIPE_USAGE_DEFAULT, rtex->cmask.size); + r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT, + rtex->cmask.size, + rtex->cmask.alignment); if (rtex->cmask_buffer == NULL) { rtex->cmask.size = 0; return; diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 74b36ec..9ab5af9 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -513,6 +513,16 @@ void radeon_llvm_emit_store( } } +/* Emit a branch to the given default target for the current block if + * applicable -- that is, if the current block does not already contain a + * branch from a break or continue. + */ +static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target) +{ + if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder))) + LLVMBuildBr(builder, target); +} + static void bgnloop_emit( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, @@ -577,28 +587,8 @@ static void else_emit( struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); struct gallivm_state * gallivm = bld_base->base.gallivm; struct radeon_llvm_branch * current_branch = get_current_branch(ctx); - LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder); - - /* We need to add a terminator to the current block if the previous - * instruction was an ENDIF.Example: - * IF - * [code] - * IF - * [code] - * ELSE - * [code] - * ENDIF <-- - * ELSE<-- - * [code] - * ENDIF - */ - if (current_block != current_branch->if_block) { - LLVMBuildBr(gallivm->builder, current_branch->endif_block); - } - if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) { - LLVMBuildBr(gallivm->builder, current_branch->endif_block); - } + emit_default_branch(gallivm->builder, current_branch->endif_block); current_branch->has_else = 1; LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block); } @@ -611,26 +601,15 @@ static void endif_emit( struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); struct gallivm_state * gallivm = bld_base->base.gallivm; struct radeon_llvm_branch * current_branch = get_current_branch(ctx); - LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder); - /* If we have consecutive ENDIF instructions, then the first ENDIF - * will not have a terminator, so we need to add one. */ - if (current_block != current_branch->if_block - && current_block != current_branch->else_block - && !LLVMGetBasicBlockTerminator(current_block)) { + emit_default_branch(gallivm->builder, current_branch->endif_block); - LLVMBuildBr(gallivm->builder, current_branch->endif_block); - } + /* Need to fixup an empty else block if there was no ELSE opcode. */ if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) { LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block); LLVMBuildBr(gallivm->builder, current_branch->endif_block); } - if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) { - LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block); - LLVMBuildBr(gallivm->builder, current_branch->endif_block); - } - LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block); ctx->branch_depth--; } @@ -644,9 +623,7 @@ static void endloop_emit( struct gallivm_state * gallivm = bld_base->base.gallivm; struct radeon_llvm_loop * current_loop = get_current_loop(ctx); - if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) { - LLVMBuildBr(gallivm->builder, current_loop->loop_block); - } + emit_default_branch(gallivm->builder, current_loop->loop_block); LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block); ctx->loop_depth--; @@ -1326,23 +1303,32 @@ static void emit_lsb(const struct lp_build_tgsi_action * action, struct lp_build_emit_data * emit_data) { struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef args[2] = { emit_data->args[0], /* The value of 1 means that ffs(x=0) = undef, so LLVM won't * add special code to check for x=0. The reason is that * the LLVM behavior for x=0 is different from what we - * need here. - * - * The hardware already implements the correct behavior. + * need here. However, LLVM also assumes that ffs(x) is + * in [0, 31], but GLSL expects that ffs(0) = -1, so + * a conditional assignment to handle 0 is still required. */ - lp_build_const_int32(gallivm, 1) + LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0) }; - emit_data->output[emit_data->chan] = + LLVMValueRef lsb = lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32", emit_data->dst_type, args, ARRAY_SIZE(args), LLVMReadNoneAttribute); + + /* TODO: We need an intrinsic to skip this conditional. */ + /* Check for zero: */ + emit_data->output[emit_data->chan] = + LLVMBuildSelect(builder, + LLVMBuildICmp(builder, LLVMIntEQ, args[0], + bld_base->uint_bld.zero, ""), + lp_build_const_int32(gallivm, -1), lsb, ""); } /* Find the last bit set. */ diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index d8ec2a3..8e23b61 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -519,6 +519,12 @@ static void cik_sdma_copy(struct pipe_context *ctx, return; } + /* Carrizo SDMA texture copying is very broken for some users. + * https://bugs.freedesktop.org/show_bug.cgi?id=97029 + */ + if (sctx->b.family == CHIP_CARRIZO) + goto fallback; + if (cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box)) return; diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 484b252..c279c77 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -158,6 +158,7 @@ static void si_set_global_binding( static void si_initialize_compute(struct si_context *sctx) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; + uint64_t bc_va; radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3); radeon_emit(cs, 0); @@ -193,6 +194,17 @@ static void si_initialize_compute(struct si_context *sctx) 0x190 /* Default value */); } + /* Set the pointer to border colors. */ + bc_va = sctx->border_color_buffer->gpu_address; + + if (sctx->b.chip_class >= CIK) { + radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2); + radeon_emit(cs, bc_va >> 8); /* R_030E00_TA_CS_BC_BASE_ADDR */ + radeon_emit(cs, bc_va >> 40); /* R_030E04_TA_CS_BC_BASE_ADDR_HI */ + } else { + radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8); + } + sctx->cs_shader_state.emitted_program = NULL; sctx->cs_shader_state.initialized = true; } @@ -459,6 +471,20 @@ static void si_launch_grid( si_decompress_compute_textures(sctx); + /* Add buffer sizes for memory checking in need_cs_space. */ + r600_context_add_resource_size(ctx, &program->shader.bo->b.b); + /* TODO: add the scratch buffer */ + + if (info->indirect) { + r600_context_add_resource_size(ctx, info->indirect); + + /* The hw doesn't read the indirect buffer via TC L2. */ + if (r600_resource(info->indirect)->TC_L2_dirty) { + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + r600_resource(info->indirect)->TC_L2_dirty = false; + } + } + si_need_cs_space(sctx); if (!sctx->cs_shader_state.initialized) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index bbd02e9..fe4cb29 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -819,9 +819,9 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf util_memcpy_cpu_to_le32(tmp, ptr, size); } -void si_set_constant_buffer(struct si_context *sctx, - struct si_buffer_resources *buffers, - uint slot, struct pipe_constant_buffer *input) +static void si_set_constant_buffer(struct si_context *sctx, + struct si_buffer_resources *buffers, + uint slot, struct pipe_constant_buffer *input) { assert(slot < buffers->desc.num_elements); pipe_resource_reference(&buffers->buffers[slot], NULL); @@ -881,6 +881,12 @@ void si_set_constant_buffer(struct si_context *sctx, buffers->desc.dirty_mask |= 1u << slot; } +void si_set_rw_buffer(struct si_context *sctx, + uint slot, struct pipe_constant_buffer *input) +{ + si_set_constant_buffer(sctx, &sctx->rw_buffers, slot, input); +} + static void si_pipe_set_constant_buffer(struct pipe_context *ctx, uint shader, uint slot, struct pipe_constant_buffer *input) @@ -1052,10 +1058,10 @@ static void si_set_streamout_targets(struct pipe_context *ctx, * and most other clients can use TC L2 as well, we don't need * to flush it. * - * The only case which requires flushing it is VGT DMA index - * fetching, which is a rare case. Thus, flag the TC L2 - * dirtiness in the resource and handle it when index fetching - * is used. + * The only cases which requires flushing it is VGT DMA index + * fetching (on <= CIK) and indirect draw data, which are rare + * cases. Thus, flag the TC L2 dirtiness in the resource and + * handle it at draw call time. */ for (i = 0; i < sctx->b.streamout.num_targets; i++) if (sctx->b.streamout.targets[i]) @@ -1177,8 +1183,7 @@ static void si_set_polygon_stipple(struct pipe_context *ctx, cb.user_buffer = stipple; cb.buffer_size = sizeof(stipple); - si_set_constant_buffer(sctx, &sctx->rw_buffers, - SI_PS_CONST_POLY_STIPPLE, &cb); + si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb); } /* TEXTURE METADATA ENABLE/DISABLE */ diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index e8e0403..69478a8 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -216,7 +216,8 @@ void si_begin_new_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, &ctx->clip_regs); si_mark_atom_dirty(ctx, &ctx->clip_state.atom); - si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs); + ctx->msaa_sample_locs.nr_samples = 0; + si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom); si_mark_atom_dirty(ctx, &ctx->msaa_config); si_mark_atom_dirty(ctx, &ctx->sample_mask.atom); si_mark_atom_dirty(ctx, &ctx->cb_render_state); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 88f4f20..9dd9ef5 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -212,8 +212,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, si_begin_new_cs(sctx); r600_query_init_backend_mask(&sctx->b); /* this emits commands and must be last */ - /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy - * with a NULL buffer). We need to use a dummy buffer instead. */ + /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads + * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */ if (sctx->b.chip_class == CIK) { sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DEFAULT, 16); @@ -228,6 +228,15 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, } } + si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, + &sctx->null_const_buf); + si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, + &sctx->null_const_buf); + si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, + &sctx->null_const_buf); + si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, + &sctx->null_const_buf); + /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, 0, diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index dbbf9b6..2661972 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -168,6 +168,11 @@ struct si_clip_state { struct pipe_clip_state state; }; +struct si_sample_locs { + struct r600_atom atom; + unsigned nr_samples; +}; + struct si_sample_mask { struct r600_atom atom; uint16_t sample_mask; @@ -212,7 +217,7 @@ struct si_context { /* Atom declarations. */ struct r600_atom cache_flush; struct si_framebuffer framebuffer; - struct r600_atom msaa_sample_locs; + struct si_sample_locs msaa_sample_locs; struct r600_atom db_render_state; struct r600_atom msaa_config; struct si_sample_mask sample_mask; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5e5bf68..5ead940 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1014,7 +1014,7 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base, if (type == TGSI_TYPE_DOUBLE) { LLVMValueRef value2; dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, - lp_build_const_int32(gallivm, swizzle + 1)); + lp_build_const_int32(gallivm, 1)); value2 = build_indexed_load(ctx, ctx->lds, dw_addr, false); return radeon_llvm_emit_fetch_double(bld_base, value, value2); } @@ -1846,13 +1846,13 @@ static LLVMValueRef fetch_constant( result = bitcast(bld_base, type, result); else { LLVMValueRef addr2, result2; - addr2 = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1]; + addr2 = ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle]; addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2"); addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16); addr2 = lp_build_add(&bld_base->uint_bld, addr2, - lp_build_const_int32(base->gallivm, idx * 4)); + lp_build_const_int32(base->gallivm, (idx + 1) * 4)); - result2 = buffer_load_const(base->gallivm->builder, ctx->const_buffers[buf], + result2 = buffer_load_const(base->gallivm->builder, bufp, addr2, ctx->f32); result = radeon_llvm_emit_fetch_double(bld_base, @@ -5072,7 +5072,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, } intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; - for (chan = 0; chan < 2; chan++) { + for (chan = 0; chan < 4; chan++) { LLVMValueRef args[4]; LLVMValueRef llvm_chan; unsigned schan; @@ -6567,6 +6567,41 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, radeon_llvm_dispose(&ctx.radeon_bld); + /* Validate SGPR and VGPR usage for compute to detect compiler bugs. + * LLVM 3.9svn has this bug. + */ + if (sel->type == PIPE_SHADER_COMPUTE) { + unsigned *props = sel->info.properties; + unsigned wave_size = 64; + unsigned max_vgprs = 256; + unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512; + unsigned max_sgprs_per_wave = 128; + unsigned min_waves_per_cu = + DIV_ROUND_UP(props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] * + props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] * + props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH], + wave_size); + unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4); + + max_vgprs = max_vgprs / min_waves_per_simd; + max_sgprs = MIN2(max_sgprs / min_waves_per_simd, max_sgprs_per_wave); + + if (shader->config.num_sgprs > max_sgprs || + shader->config.num_vgprs > max_vgprs) { + fprintf(stderr, "LLVM failed to compile a shader correctly: " + "SGPR:VGPR usage is %u:%u, but the hw limit is %u:%u\n", + shader->config.num_sgprs, shader->config.num_vgprs, + max_sgprs, max_vgprs); + + /* Just terminate the process, because dependent + * shaders can hang due to bad input data, but use + * the env var to allow shader-db to work. + */ + if (!debug_get_bool_option("SI_PASS_BAD_SHADERS", false)) + abort(); + } + } + /* Add the scratch offset to input SGPRs. */ if (shader->config.scratch_bytes_per_wave) shader->info.num_input_sgprs += 1; /* scratch byte offset */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 3bbb81a..a641b5d 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -460,6 +460,10 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); + /* Only set dual source blending for MRT0 to avoid a hang. */ + if (i >= 1 && blend->dual_src_blend) + continue; + if (!state->rt[j].colormask) continue; @@ -620,8 +624,7 @@ static void si_set_clip_state(struct pipe_context *ctx, cb.user_buffer = state->ucp; cb.buffer_offset = 0; cb.buffer_size = 4*4*8; - si_set_constant_buffer(sctx, &sctx->rw_buffers, - SI_VS_CONST_CLIP_PLANES, &cb); + si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); pipe_resource_reference(&cb.buffer, NULL); } @@ -847,9 +850,13 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) return; if (sctx->framebuffer.nr_samples > 1 && - (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) + (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) { si_mark_atom_dirty(sctx, &sctx->db_render_state); + if (sctx->b.family >= CHIP_POLARIS10) + si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); + } + r600_set_scissor_enable(&sctx->b, rs->scissor_enable); si_pm4_bind_state(sctx, rasterizer, rs); @@ -1586,6 +1593,10 @@ static unsigned si_tex_dim(unsigned res_target, unsigned view_target, if (view_target == PIPE_TEXTURE_CUBE || view_target == PIPE_TEXTURE_CUBE_ARRAY) res_target = view_target; + /* If interpreting cubemaps as something else, set 2D_ARRAY. */ + else if (res_target == PIPE_TEXTURE_CUBE || + res_target == PIPE_TEXTURE_CUBE_ARRAY) + res_target = PIPE_TEXTURE_2D_ARRAY; switch (res_target) { default: @@ -2395,21 +2406,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, assert(0); } constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; - si_set_constant_buffer(sctx, &sctx->rw_buffers, - SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); + si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); - /* Smoothing (only possible with nr_samples == 1) uses the same - * sample locations as the MSAA it simulates. - * - * Therefore, don't update the sample locations when - * transitioning from no AA to smoothing-equivalent AA, and - * vice versa. - */ - if ((sctx->framebuffer.nr_samples != 1 || - old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) && - (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES || - old_nr_samples != 1)) - si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs); + si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); } } @@ -2536,8 +2535,37 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx, struct radeon_winsys_cs *cs = sctx->b.gfx.cs; unsigned nr_samples = sctx->framebuffer.nr_samples; - cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples : - SI_NUM_SMOOTH_AA_SAMPLES); + /* Smoothing (only possible with nr_samples == 1) uses the same + * sample locations as the MSAA it simulates. + */ + if (nr_samples <= 1 && sctx->smoothing_enabled) + nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; + + /* On Polaris, the small primitive filter uses the sample locations + * even when MSAA is off, so we need to make sure they're set to 0. + */ + if ((nr_samples > 1 || sctx->b.family >= CHIP_POLARIS10) && + (nr_samples != sctx->msaa_sample_locs.nr_samples)) { + sctx->msaa_sample_locs.nr_samples = nr_samples; + cayman_emit_msaa_sample_locs(cs, nr_samples); + } + + if (sctx->b.family >= CHIP_POLARIS10) { + struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; + unsigned small_prim_filter_cntl = + S_028830_SMALL_PRIM_FILTER_ENABLE(1) | + S_028830_LINE_FILTER_DISABLE(1); /* line bug */ + + /* The alternative of setting sample locations to 0 would + * require a DB flush to avoid Z errors, see + * https://bugs.freedesktop.org/show_bug.cgi?id=96908 + */ + if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable) + small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; + + radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, + small_prim_filter_cntl); + } } static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) @@ -3246,8 +3274,7 @@ static void si_set_tess_state(struct pipe_context *ctx, (void*)array, sizeof(array), &cb.buffer_offset); - si_set_constant_buffer(sctx, &sctx->rw_buffers, - SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); + si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); pipe_resource_reference(&cb.buffer, NULL); } @@ -3337,7 +3364,7 @@ void si_init_state_functions(struct si_context *sctx) si_init_atom(sctx, &sctx->cache_flush, &sctx->atoms.s.cache_flush, si_emit_cache_flush); si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state); - si_init_atom(sctx, &sctx->msaa_sample_locs, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); + si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state); si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config); si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask); @@ -3810,11 +3837,6 @@ static void si_init_config(struct si_context *sctx) if (sctx->b.family == CHIP_STONEY) si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); - if (sctx->b.family >= CHIP_POLARIS10) - si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, - S_028830_SMALL_PRIM_FILTER_ENABLE(1) | - S_028830_LINE_FILTER_DISABLE(1)); /* line bug */ - si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); if (sctx->b.chip_class >= CIK) si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index aea98ae..97f6dfa 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -269,9 +269,8 @@ void si_update_compressed_colortex_masks(struct si_context *sctx); void si_emit_graphics_shader_userdata(struct si_context *sctx, struct r600_atom *atom); void si_emit_compute_shader_userdata(struct si_context *sctx); -void si_set_constant_buffer(struct si_context *sctx, - struct si_buffer_resources *buffers, - uint slot, struct pipe_constant_buffer *input); +void si_set_rw_buffer(struct si_context *sctx, + uint slot, struct pipe_constant_buffer *input); /* si_state.c */ struct si_shader_selector; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 3c11d14..4e1c599 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -494,13 +494,13 @@ static void si_emit_draw_registers(struct si_context *sctx, radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart); sctx->last_primitive_restart_en = info->primitive_restart; - if (info->primitive_restart && - (info->restart_index != sctx->last_restart_index || - sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN)) { - radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, - info->restart_index); - sctx->last_restart_index = info->restart_index; - } + } + if (info->primitive_restart && + (info->restart_index != sctx->last_restart_index || + sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN)) { + radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, + info->restart_index); + sctx->last_restart_index = info->restart_index; } } @@ -963,6 +963,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_resource(ib.buffer)->TC_L2_dirty = false; } + if (info->indirect && r600_resource(info->indirect)->TC_L2_dirty) { + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + r600_resource(info->indirect)->TC_L2_dirty = false; + } + /* Check flush flags. */ if (sctx->b.flags) si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index a7af76d..932d017 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -759,10 +759,10 @@ static void si_shader_ps(struct si_shader *shader) S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) | S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); - /* Prefer RE_Z if the shader is complex enough. The requirement is either: - * - the shader uses at least 2 VMEM instructions, or - * - the code size is at least 50 2-dword instructions or 100 1-dword - * instructions. + /* DON'T USE EARLY_Z_THEN_RE_Z !!! + * + * It decreases performance by 15% in DiRT: Showdown on Ultra settings. + * And it has pretty complex shaders. * * Shaders with side effects that must execute independently of the * depth test require LATE_Z. @@ -770,9 +770,6 @@ static void si_shader_ps(struct si_shader *shader) if (info->writes_memory && !info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) shader->z_order = V_02880C_LATE_Z; - else if (info->num_memory_instructions >= 2 || - shader->binary.code_size > 100*4) - shader->z_order = V_02880C_EARLY_Z_THEN_RE_Z; else shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z; } @@ -2052,6 +2049,9 @@ bool si_update_shaders(struct si_context *sctx) if (sctx->b.chip_class == SI) si_mark_atom_dirty(sctx, &sctx->db_render_state); + + if (sctx->framebuffer.nr_samples <= 1) + si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); } } diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 8d49b9d..341ee12 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -7209,6 +7209,7 @@ /* */ #define R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL 0x028830 /* Polaris */ #define S_028830_SMALL_PRIM_FILTER_ENABLE(x) (((x) & 0x1) << 0) +#define C_028830_SMALL_PRIM_FILTER_ENABLE 0xFFFFFFFE #define S_028830_TRIANGLE_FILTER_DISABLE(x) (((x) & 0x1) << 1) #define S_028830_LINE_FILTER_DISABLE(x) (((x) & 0x1) << 2) #define S_028830_POINT_FILTER_DISABLE(x) (((x) & 0x1) << 3) diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c index abfef0f..9e37e23 100644 --- a/src/gallium/drivers/svga/svga_shader.c +++ b/src/gallium/drivers/svga/svga_shader.c @@ -173,10 +173,16 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader, assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views)); - for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) { + /* In case the number of samplers and sampler_views doesn't match, + * loop over the lower of the two counts. + */ + key->num_textures = MIN2(svga->curr.num_sampler_views[shader], + svga->curr.num_samplers[shader]); + + for (i = 0; i < key->num_textures; i++) { struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i]; - if (view) { - assert(svga->curr.sampler[shader][i]); + const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i]; + if (view && sampler) { assert(view->texture); assert(view->texture->target < (1 << 4)); /* texture_target:4 */ @@ -195,7 +201,7 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader, } } - if (!svga->curr.sampler[shader][i]->normalized_coords) { + if (!sampler->normalized_coords) { assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */ key->tex[i].width_height_idx = idx++; key->tex[i].unnormalized = TRUE; @@ -208,7 +214,6 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader, key->tex[i].swizzle_a = view->swizzle_a; } } - key->num_textures = svga->curr.num_sampler_views[shader]; } diff --git a/src/gallium/drivers/swr/Makefile.am b/src/gallium/drivers/swr/Makefile.am index b67413a..3459af3 100644 --- a/src/gallium/drivers/swr/Makefile.am +++ b/src/gallium/drivers/swr/Makefile.am @@ -22,7 +22,7 @@ include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc -AM_CXXFLAGS = $(GALLIUM_DRIVER_CFLAGS) -std=c++11 +AM_CXXFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(SWR_CXX11_CXXFLAGS) noinst_LTLIBRARIES = libmesaswr.la @@ -31,7 +31,7 @@ libmesaswr_la_SOURCES = $(LOADER_SOURCES) COMMON_CXXFLAGS = \ $(GALLIUM_DRIVER_CFLAGS) \ $(LLVM_CXXFLAGS) \ - -std=c++11 \ + $(SWR_CXX11_CXXFLAGS) \ -I$(builddir)/rasterizer/scripts \ -I$(builddir)/rasterizer/jitter \ -I$(srcdir)/rasterizer \ @@ -148,7 +148,7 @@ distclean-local: lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la libswrAVX_la_CXXFLAGS = \ - -march=core-avx-i \ + $(SWR_AVX_CXXFLAGS) \ -DKNOB_ARCH=KNOB_ARCH_AVX \ $(COMMON_CXXFLAGS) @@ -171,7 +171,7 @@ libswrAVX_la_LDFLAGS = \ $(COMMON_LDFLAGS) libswrAVX2_la_CXXFLAGS = \ - -march=core-avx2 \ + $(SWR_AVX2_CXXFLAGS) \ -DKNOB_ARCH=KNOB_ARCH_AVX2 \ $(COMMON_CXXFLAGS) diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index 21e3bde..f6bacfd 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -28,6 +28,7 @@ #include <xf86drm.h> #include <xf86drmMode.h> +#include "util/u_hash_table.h" #include "util/u_memory.h" #include "util/ralloc.h" @@ -329,10 +330,19 @@ vc4_bo_open_handle(struct vc4_screen *screen, uint32_t winsys_stride, uint32_t handle, uint32_t size) { - struct vc4_bo *bo = CALLOC_STRUCT(vc4_bo); + struct vc4_bo *bo; assert(size); + pipe_mutex_lock(screen->bo_handles_mutex); + + bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle); + if (bo) { + pipe_reference(NULL, &bo->reference); + goto done; + } + + bo = CALLOC_STRUCT(vc4_bo); pipe_reference_init(&bo->reference, 1); bo->screen = screen; bo->handle = handle; @@ -347,6 +357,10 @@ vc4_bo_open_handle(struct vc4_screen *screen, bo->map = malloc(bo->size); #endif + util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo); + +done: + pipe_mutex_unlock(screen->bo_handles_mutex); return bo; } @@ -399,7 +413,11 @@ vc4_bo_get_dmabuf(struct vc4_bo *bo) bo->handle); return -1; } + + pipe_mutex_lock(bo->screen->bo_handles_mutex); bo->private = false; + util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo); + pipe_mutex_unlock(bo->screen->bo_handles_mutex); return fd; } diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h index b77506e..71a4426 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.h +++ b/src/gallium/drivers/vc4/vc4_bufmgr.h @@ -25,6 +25,7 @@ #define VC4_BUFMGR_H #include <stdint.h> +#include "util/u_hash_table.h" #include "util/u_inlines.h" #include "vc4_qir.h" @@ -87,11 +88,27 @@ vc4_bo_reference(struct vc4_bo *bo) static inline void vc4_bo_unreference(struct vc4_bo **bo) { + struct vc4_screen *screen; if (!*bo) return; - if (pipe_reference(&(*bo)->reference, NULL)) - vc4_bo_last_unreference(*bo); + if ((*bo)->private) { + /* Avoid the mutex for private BOs */ + if (pipe_reference(&(*bo)->reference, NULL)) + vc4_bo_last_unreference(*bo); + } else { + screen = (*bo)->screen; + pipe_mutex_lock(screen->bo_handles_mutex); + + if (pipe_reference(&(*bo)->reference, NULL)) { + util_hash_table_remove(screen->bo_handles, + (void *)(uintptr_t)(*bo)->handle); + vc4_bo_last_unreference(*bo); + } + + pipe_mutex_unlock(screen->bo_handles_mutex); + } + *bo = NULL; } diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index c271a95..77013d9 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -143,6 +143,8 @@ struct vc4_compiled_shader { /** bitmask of which inputs are color inputs, for flat shade handling. */ uint32_t color_inputs; + bool disable_early_z; + uint8_t num_inputs; /* Byte offsets for the start of the vertex attributes 0-7, and the diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c index 5d64797..8b192da 100644 --- a/src/gallium/drivers/vc4/vc4_emit.c +++ b/src/gallium/drivers/vc4/vc4_emit.c @@ -71,7 +71,9 @@ vc4_emit_state(struct pipe_context *pctx) vc4->draw_max_y = MAX2(vc4->draw_max_y, maxy); } - if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) { + if (vc4->dirty & (VC4_DIRTY_RASTERIZER | + VC4_DIRTY_ZSA | + VC4_DIRTY_COMPILED_FS)) { uint8_t ez_enable_mask_out = ~0; /* HW-2905: If the RCL ends up doing a full-res load when @@ -83,7 +85,7 @@ vc4_emit_state(struct pipe_context *pctx) * was seeing bad rendering on glxgears -samples 4 even in * that case. */ - if (vc4->msaa) + if (vc4->msaa || vc4->prog.fs->disable_early_z) ez_enable_mask_out &= ~VC4_CONFIG_BITS_EARLY_Z; cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS); diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm.c b/src/gallium/drivers/vc4/vc4_opt_vpm.c index d31b673..5df798a 100644 --- a/src/gallium/drivers/vc4/vc4_opt_vpm.c +++ b/src/gallium/drivers/vc4/vc4_opt_vpm.c @@ -110,11 +110,12 @@ qir_opt_vpm(struct vc4_compile *c) * sources are independent of previous instructions */ if (temps == 1) { - list_del(&inst->link); inst->src[j] = mov->src[0]; - list_replace(&mov->link, &inst->link); - c->defs[temp] = NULL; - free(mov); + + list_del(&inst->link); + list_addtail(&inst->link, &mov->link); + qir_remove_instruction(c, mov); + progress = true; break; } diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 5d036eb..3572cf7 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2032,6 +2032,11 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, shader->input_slots[shader->num_inputs] = *slot; shader->num_inputs++; } + + /* Note: the temporary clone in c->s has been freed. */ + nir_shader *orig_shader = key->shader_state->base.ir.nir; + if (orig_shader->info.outputs_written & (1 << FRAG_RESULT_DEPTH)) + shader->disable_early_z = true; } else { shader->num_inputs = c->num_inputs; diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 20f137a..aabe593 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -534,8 +534,8 @@ vc4_resource_from_handle(struct pipe_screen *pscreen, struct vc4_resource *rsc = vc4_resource_setup(pscreen, tmpl); struct pipe_resource *prsc = &rsc->base.b; struct vc4_resource_slice *slice = &rsc->slices[0]; - uint32_t expected_stride = align(prsc->width0 / rsc->cpp, - vc4_utile_width(rsc->cpp)); + uint32_t expected_stride = + align(prsc->width0, vc4_utile_width(rsc->cpp)) * rsc->cpp; if (!rsc) return NULL; diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 733275a..0fb2bbc 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -30,6 +30,7 @@ #include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_format.h" +#include "util/u_hash_table.h" #include "util/ralloc.h" #include "vc4_screen.h" @@ -82,7 +83,11 @@ vc4_screen_get_vendor(struct pipe_screen *pscreen) static void vc4_screen_destroy(struct pipe_screen *pscreen) { + struct vc4_screen *screen = vc4_screen(pscreen); + + util_hash_table_destroy(screen->bo_handles); vc4_bufmgr_destroy(pscreen); + close(screen->fd); ralloc_free(pscreen); } @@ -488,6 +493,18 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen, return retval == usage; } +#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) + +static unsigned handle_hash(void *key) +{ + return PTR_TO_UINT(key); +} + +static int handle_compare(void *key1, void *key2) +{ + return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); +} + struct pipe_screen * vc4_screen_create(int fd) { @@ -505,6 +522,8 @@ vc4_screen_create(int fd) screen->fd = fd; list_inithead(&screen->bo_cache.time_list); + pipe_mutex_init(screen->bo_handles_mutex); + screen->bo_handles = util_hash_table_create(handle_hash, handle_compare); vc4_fence_init(screen); diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h index 03f76b2..281d254 100644 --- a/src/gallium/drivers/vc4/vc4_screen.h +++ b/src/gallium/drivers/vc4/vc4_screen.h @@ -73,6 +73,9 @@ struct vc4_screen { uint32_t bo_count; } bo_cache; + struct util_hash_table *bo_handles; + pipe_mutex bo_handles_mutex; + uint32_t bo_size; uint32_t bo_count; }; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index ca5812ba..396f563 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -318,17 +318,7 @@ struct pipe_blend_state struct pipe_blend_color { - /** - * Making the color array explicitly 16-byte aligned provides a hint to - * compilers to make more efficient auto-vectorization optimizations. - * The actual performance gains from vectorizing the blend color array are - * fairly minimal, if any, but the alignment is necessary to work around - * buggy vectorization in some compilers which fail to generate the correct - * unaligned accessors resulting in a segfault. Specifically several - * versions of the Intel compiler are known to be affected but it's likely - * others are as well. - */ - PIPE_ALIGN_VAR(16) float color[4]; + float color[4]; }; diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index e2cadda..57eaaaa 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -207,7 +207,7 @@ namespace { c.getDiagnosticOpts().ShowCarets = false; c.getInvocation().setLangDefaults(c.getLangOpts(), clang::IK_OpenCL, #if HAVE_LLVM >= 0x0309 - llvm::Triple(triple), + llvm::Triple(triple), c.getPreprocessorOpts(), #endif clang::LangStandard::lang_opencl11); c.createDiagnostics( diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index e1afc4d..a07ecd1 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -30,6 +30,7 @@ #include <xf86drm.h> #include <dlfcn.h> +#include <fcntl.h> #include "GL/mesa_glinterop.h" #include "util/u_memory.h" #include "util/u_inlines.h" @@ -979,8 +980,10 @@ dri2_query_image(__DRIimage *image, int attrib, int *value) return GL_TRUE; case __DRI_IMAGE_ATTRIB_FD: whandle.type= DRM_API_HANDLE_TYPE_FD; - image->texture->screen->resource_get_handle(image->texture->screen, - image->texture, &whandle, usage); + if (!image->texture->screen->resource_get_handle(image->texture->screen, + image->texture, &whandle, usage)) + return GL_FALSE; + *value = whandle.handle; return GL_TRUE; case __DRI_IMAGE_ATTRIB_FORMAT: @@ -1798,7 +1801,7 @@ dri2_init_screen(__DRIscreen * sPriv) sPriv->driverPrivate = (void *)screen; - if (screen->fd < 0 || (fd = dup(screen->fd)) < 0) + if (screen->fd < 0 || (fd = fcntl(screen->fd, F_DUPFD_CLOEXEC, 3)) < 0) goto free_screen; pscreen = load_pipe_screen(&screen->dev, screen->fd); @@ -1879,7 +1882,7 @@ dri_kms_init_screen(__DRIscreen * sPriv) sPriv->driverPrivate = (void *)screen; - if (screen->fd < 0 || (fd = dup(screen->fd)) < 0) + if (screen->fd < 0 || (fd = fcntl(screen->fd, F_DUPFD_CLOEXEC, 3)) < 0) goto free_screen; if (pipe_loader_sw_probe_kms(&screen->dev, fd)) diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index bd373d7..e2855d7 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -2031,7 +2031,7 @@ DECL_SPECIAL(DCL) ureg_DECL_vs_input(ureg, sem.reg.idx); assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map)); tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem); - tx->info->num_inputs = sem.reg.idx + 1; + tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1); /* NOTE: preserving order in case of indirect access */ } else if (tx->version.major >= 3) { diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c index 2606dbf..aff4d4c 100644 --- a/src/gallium/state_trackers/nine/surface9.c +++ b/src/gallium/state_trackers/nine/surface9.c @@ -405,6 +405,7 @@ NineSurface9_LockRect( struct NineSurface9 *This, } else { u_box_origin_2d(This->desc.Width, This->desc.Height, &box); } + box.z = This->layer; user_warn(This->desc.Format == D3DFMT_NULL); diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c index 1fdc638..faeeec1 100644 --- a/src/gallium/state_trackers/nine/volume9.c +++ b/src/gallium/state_trackers/nine/volume9.c @@ -374,7 +374,7 @@ NineVolume9_UnlockBox( struct NineVolume9 *This ) This->layer_stride_conversion, 0, 0, 0, This->desc.Width, This->desc.Height, - This->desc.Height); + This->desc.Depth); if (!This->data) pipe_transfer_unmap(This->pipe, transfer); diff --git a/src/gallium/state_trackers/omx/vid_enc.c b/src/gallium/state_trackers/omx/vid_enc.c index d70439a..c9d9ab1 100644 --- a/src/gallium/state_trackers/omx/vid_enc.c +++ b/src/gallium/state_trackers/omx/vid_enc.c @@ -874,8 +874,8 @@ static void enc_ReleaseTasks(struct list_head *head) { struct encode_task *i, *next; - if (!head) - return; + if (!head || !head->next) + return; LIST_FOR_EACH_ENTRY_SAFE(i, next, head, list) { pipe_resource_reference(&i->bitstream, NULL); diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c index 8a6a397..1ad0d71 100644 --- a/src/gallium/state_trackers/va/surface.c +++ b/src/gallium/state_trackers/va/surface.c @@ -584,24 +584,26 @@ vlVaCreateSurfaces2(VADriverContextP ctx, unsigned int format, memset(&templat, 0, sizeof(templat)); + templat.buffer_format = pscreen->get_video_param( + pscreen, + PIPE_VIDEO_PROFILE_UNKNOWN, + PIPE_VIDEO_ENTRYPOINT_BITSTREAM, + PIPE_VIDEO_CAP_PREFERED_FORMAT + ); + templat.interlaced = pscreen->get_video_param( + pscreen, + PIPE_VIDEO_PROFILE_UNKNOWN, + PIPE_VIDEO_ENTRYPOINT_BITSTREAM, + PIPE_VIDEO_CAP_PREFERS_INTERLACED + ); + if (expected_fourcc) { - templat.buffer_format = VaFourccToPipeFormat(expected_fourcc); - templat.interlaced = 0; - } else { - templat.buffer_format = pscreen->get_video_param - ( - pscreen, - PIPE_VIDEO_PROFILE_UNKNOWN, - PIPE_VIDEO_ENTRYPOINT_BITSTREAM, - PIPE_VIDEO_CAP_PREFERED_FORMAT - ); - templat.interlaced = pscreen->get_video_param - ( - pscreen, - PIPE_VIDEO_PROFILE_UNKNOWN, - PIPE_VIDEO_ENTRYPOINT_BITSTREAM, - PIPE_VIDEO_CAP_PREFERS_INTERLACED - ); + enum pipe_format expected_format = VaFourccToPipeFormat(expected_fourcc); + + if (expected_format != templat.buffer_format || memory_attibute) + templat.interlaced = 0; + + templat.buffer_format = expected_format; } templat.chroma_format = ChromaToPipe(format); diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c index c644cc8..b278288 100644 --- a/src/gallium/state_trackers/vdpau/output.c +++ b/src/gallium/state_trackers/vdpau/output.c @@ -82,7 +82,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device, res_tmpl.depth0 = 1; res_tmpl.array_size = 1; res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET | - PIPE_BIND_LINEAR | PIPE_BIND_SHARED; + PIPE_BIND_SHARED; res_tmpl.usage = PIPE_USAGE_DEFAULT; pipe_mutex_lock(dev->mutex); diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c index e091b083..c97c0ff 100644 --- a/src/gallium/state_trackers/xa/xa_tracker.c +++ b/src/gallium/state_trackers/xa/xa_tracker.c @@ -27,6 +27,7 @@ */ #include <unistd.h> +#include <fcntl.h> #include "xa_tracker.h" #include "xa_priv.h" #include "pipe/p_state.h" @@ -157,7 +158,7 @@ xa_tracker_create(int drm_fd) if (!xa) return NULL; - if (drm_fd < 0 || (fd = dup(drm_fd)) < 0) + if (drm_fd < 0 || (fd = fcntl(drm_fd, F_DUPFD_CLOEXEC, 3)) < 0) goto out_no_fd; if (pipe_loader_drm_probe_fd(&xa->dev, fd)) diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am index f42dd25..06ade45 100644 --- a/src/gallium/targets/dri/Makefile.am +++ b/src/gallium/targets/dri/Makefile.am @@ -1,5 +1,11 @@ include $(top_srcdir)/src/gallium/Automake.inc +if HAVE_ANDROID +if HAVE_SHARED_GLAPI +SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la +endif +endif + AM_CFLAGS = \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa \ @@ -49,6 +55,7 @@ gallium_dri_la_LIBADD = \ $(top_builddir)/src/gallium/drivers/noop/libnoop.la \ $(top_builddir)/src/gallium/drivers/rbug/librbug.la \ $(top_builddir)/src/gallium/drivers/trace/libtrace.la \ + $(SHARED_GLAPI_LIB) \ $(SELINUX_LIBS) \ $(EXPAT_LIBS) \ $(LIBDRM_LIBS) \ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c index 0ce010e..3f6e280 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c @@ -253,6 +253,20 @@ static int compute_level(struct amdgpu_winsys *ws, return 0; } +static unsigned cik_get_macro_tile_index(struct radeon_surf *surf) +{ + unsigned index, tileb; + + tileb = 8 * 8 * surf->bpe; + tileb = MIN2(surf->tile_split, tileb); + + for (index = 0; tileb > 64; index++) + tileb >>= 1; + + assert(index < 16); + return index; +} + static int amdgpu_surface_init(struct radeon_winsys *rws, struct radeon_surf *surf) { @@ -345,7 +359,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_SCANOUT) && !(surf->flags & RADEON_SURF_DISABLE_DCC) && - !compressed && AddrDccIn.numSamples <= 1; + !compressed && AddrDccIn.numSamples <= 1 && + surf->last_level == 0; /* This disables incorrect calculations (hacks) in addrlib. */ AddrSurfInfoIn.flags.noStencil = 1; @@ -380,6 +395,9 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */ else AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */ + + /* Addrlib doesn't set this if tileIndex is forced like above. */ + AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf); } surf->bo_size = 0; diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c index 598ffcb..52b873f 100644 --- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c @@ -1,5 +1,6 @@ #include <sys/stat.h> #include <unistd.h> +#include <fcntl.h> #include "pipe/p_context.h" #include "pipe/p_state.h" #include "util/u_format.h" @@ -91,7 +92,7 @@ nouveau_drm_screen_create(int fd) * nouveau_device_wrap does not close the fd in case of a device * creation error. */ - dupfd = dup(fd); + dupfd = fcntl(fd, F_DUPFD_CLOEXEC, 3); ret = nouveau_drm_new(dupfd, &drm); if (ret) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 5c85c8f..7619873 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -44,6 +44,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> +#include <fcntl.h> #include <radeon_surface.h> #ifndef RADEON_INFO_ACTIVE_CU_COUNT @@ -790,7 +791,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) return NULL; } - ws->fd = dup(fd); + ws->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); if (!do_winsys_init(ws)) goto fail1; diff --git a/src/gallium/winsys/svga/drm/vmw_screen.c b/src/gallium/winsys/svga/drm/vmw_screen.c index 7fcb6d2..d0bfcd7 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen.c +++ b/src/gallium/winsys/svga/drm/vmw_screen.c @@ -31,9 +31,15 @@ #include "util/u_memory.h" #include "pipe/p_compiler.h" #include "util/u_hash_table.h" -#include <sys/types.h> +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#endif +#ifdef MAJOR_IN_SYSMACROS +#include <sys/sysmacros.h> +#endif #include <sys/stat.h> #include <unistd.h> +#include <fcntl.h> static struct util_hash_table *dev_hash = NULL; @@ -83,7 +89,7 @@ vmw_winsys_create( int fd ) vws->device = stat_buf.st_rdev; vws->open_count = 1; - vws->ioctl.drm_fd = dup(fd); + vws->ioctl.drm_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); vws->base.have_gb_dma = TRUE; vws->base.need_to_rebind_resources = FALSE; diff --git a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c index 21ac0d7..07eca99 100644 --- a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c +++ b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c @@ -211,7 +211,29 @@ kms_sw_displaytarget_map(struct sw_winsys *ws, } static struct kms_sw_displaytarget * -kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd) +kms_sw_displaytarget_find_and_ref(struct kms_sw_winsys *kms_sw, + unsigned int kms_handle) +{ + struct kms_sw_displaytarget *kms_sw_dt; + + LIST_FOR_EACH_ENTRY(kms_sw_dt, &kms_sw->bo_list, link) { + if (kms_sw_dt->handle == kms_handle) { + kms_sw_dt->ref_count++; + + DEBUG_PRINT("KMS-DEBUG: imported buffer %u (size %u)\n", + kms_sw_dt->handle, kms_sw_dt->size); + + return kms_sw_dt; + } + } + + return NULL; +} + +static struct kms_sw_displaytarget * +kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd, + unsigned width, unsigned height, + unsigned stride) { uint32_t handle = -1; struct kms_sw_displaytarget * kms_sw_dt; @@ -222,6 +244,10 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd) if (ret) return NULL; + kms_sw_dt = kms_sw_displaytarget_find_and_ref(kms_sw, handle); + if (kms_sw_dt) + return kms_sw_dt; + kms_sw_dt = CALLOC_STRUCT(kms_sw_displaytarget); if (!kms_sw_dt) return NULL; @@ -229,6 +255,9 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd) kms_sw_dt->ref_count = 1; kms_sw_dt->handle = handle; kms_sw_dt->size = lseek(fd, 0, SEEK_END); + kms_sw_dt->width = width; + kms_sw_dt->height = height; + kms_sw_dt->stride = stride; if (kms_sw_dt->size == (off_t)-1) { FREE(kms_sw_dt); @@ -274,25 +303,18 @@ kms_sw_displaytarget_from_handle(struct sw_winsys *ws, switch(whandle->type) { case DRM_API_HANDLE_TYPE_FD: - kms_sw_dt = kms_sw_displaytarget_add_from_prime(kms_sw, whandle->handle); - if (kms_sw_dt) { - kms_sw_dt->ref_count++; - kms_sw_dt->width = templ->width0; - kms_sw_dt->height = templ->height0; - kms_sw_dt->stride = whandle->stride; + kms_sw_dt = kms_sw_displaytarget_add_from_prime(kms_sw, whandle->handle, + templ->width0, + templ->height0, + whandle->stride); + if (kms_sw_dt) *stride = kms_sw_dt->stride; - } return (struct sw_displaytarget *)kms_sw_dt; case DRM_API_HANDLE_TYPE_KMS: - LIST_FOR_EACH_ENTRY(kms_sw_dt, &kms_sw->bo_list, link) { - if (kms_sw_dt->handle == whandle->handle) { - kms_sw_dt->ref_count++; - - DEBUG_PRINT("KMS-DEBUG: imported buffer %u (size %u)\n", kms_sw_dt->handle, kms_sw_dt->size); - - *stride = kms_sw_dt->stride; - return (struct sw_displaytarget *)kms_sw_dt; - } + kms_sw_dt = kms_sw_displaytarget_find_and_ref(kms_sw, whandle->handle); + if (kms_sw_dt) { + *stride = kms_sw_dt->stride; + return (struct sw_displaytarget *)kms_sw_dt; } /* fallthrough */ default: diff --git a/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c b/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c index c5434ad..23fe8e7 100644 --- a/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c +++ b/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c @@ -22,6 +22,7 @@ */ #include <unistd.h> +#include <fcntl.h> #include "vc4_drm_public.h" @@ -30,5 +31,5 @@ struct pipe_screen * vc4_drm_screen_create(int fd) { - return vc4_screen_create(dup(fd)); + return vc4_screen_create(fcntl(fd, F_DUPFD_CLOEXEC, 3)); } diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c index dc203cd..f866b24 100644 --- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c @@ -903,7 +903,7 @@ virgl_drm_screen_create(int fd) virgl_screen(pscreen)->refcnt++; } else { struct virgl_winsys *vws; - int dup_fd = dup(fd); + int dup_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); vws = virgl_drm_winsys_create(dup_fd); diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c index c3626e3..2330f1b 100644 --- a/src/gbm/backends/dri/gbm_dri.c +++ b/src/gbm/backends/dri/gbm_dri.c @@ -589,7 +589,8 @@ gbm_dri_bo_get_fd(struct gbm_bo *_bo) if (bo->image == NULL) return -1; - dri->image->queryImage(bo->image, __DRI_IMAGE_ATTRIB_FD, &fd); + if (!dri->image->queryImage(bo->image, __DRI_IMAGE_ATTRIB_FD, &fd)) + return -1; return fd; } @@ -941,7 +942,7 @@ gbm_dri_bo_map(struct gbm_bo *_bo, return *map_data; } - if (!dri->image || dri->image->base.version < 12) { + if (!dri->image || dri->image->base.version < 12 || !dri->image->mapImage) { errno = ENOSYS; return NULL; } @@ -972,7 +973,8 @@ gbm_dri_bo_unmap(struct gbm_bo *_bo, void *map_data) return; } - if (!dri->context || !dri->image || dri->image->base.version < 12) + if (!dri->context || !dri->image || + dri->image->base.version < 12 || !dri->image->unmapImage) return; dri->image->unmapImage(dri->context, bo->image, map_data); diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c index 0f4657a..c3a2ec33 100644 --- a/src/gbm/main/gbm.c +++ b/src/gbm/main/gbm.c @@ -31,7 +31,12 @@ #include <string.h> #include <stdint.h> -#include <sys/types.h> +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#endif +#ifdef MAJOR_IN_SYSMACROS +#include <sys/sysmacros.h> +#endif #include <sys/stat.h> #include <unistd.h> #include <errno.h> @@ -237,7 +242,8 @@ gbm_bo_get_handle(struct gbm_bo *bo) * descriptor. * \param bo The buffer object - * \return Returns a file descriptor referring to the underlying buffer + * \return Returns a file descriptor referring to the underlying buffer or -1 + * if an error occurs. */ GBM_EXPORT int gbm_bo_get_fd(struct gbm_bo *bo) diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c index 90d7bba..51b6b1c 100644 --- a/src/glx/dri3_glx.c +++ b/src/glx/dri3_glx.c @@ -132,6 +132,16 @@ glx_dri3_get_dri_context(struct loader_dri3_drawable *draw) return (gc != &dummyContext) ? dri3Ctx->driContext : NULL; } +static __DRIscreen * +glx_dri3_get_dri_screen(struct loader_dri3_drawable *draw) +{ + struct glx_context *gc = __glXGetCurrentContext(); + struct dri3_context *pcp = (struct dri3_context *) gc; + struct dri3_screen *psc = (struct dri3_screen *) pcp->base.psc; + + return (gc != &dummyContext && psc) ? psc->driScreen : NULL; +} + static void glx_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags) { @@ -169,6 +179,7 @@ static struct loader_dri3_vtable glx_dri3_vtable = { .set_drawable_size = glx_dri3_set_drawable_size, .in_current_context = glx_dri3_in_current_context, .get_dri_context = glx_dri3_get_dri_context, + .get_dri_screen = glx_dri3_get_dri_screen, .flush_drawable = glx_dri3_flush_drawable, .show_fps = glx_dri3_show_fps, }; diff --git a/src/glx/glx_error.c b/src/glx/glx_error.c index b3860db..653cbeb 100644 --- a/src/glx/glx_error.c +++ b/src/glx/glx_error.c @@ -39,11 +39,9 @@ __glXSendError(Display * dpy, int_fast8_t errorCode, uint_fast32_t resourceID, uint_fast16_t minorCode, bool coreX11error) { struct glx_display *glx_dpy = __glXInitialize(dpy); - struct glx_context *gc = __glXGetCurrentContext(); xError error; assert(glx_dpy); - assert(gc); LockDisplay(dpy); @@ -59,7 +57,7 @@ __glXSendError(Display * dpy, int_fast8_t errorCode, uint_fast32_t resourceID, error.sequenceNumber = dpy->request; error.resourceID = resourceID; error.minorCode = minorCode; - error.majorCode = gc ? gc->majorOpcode : 0; + error.majorCode = glx_dpy->majorOpcode; _XError(dpy, &error); diff --git a/src/glx/glx_pbuffer.c b/src/glx/glx_pbuffer.c index a0c1e3d..24c073c 100644 --- a/src/glx/glx_pbuffer.c +++ b/src/glx/glx_pbuffer.c @@ -328,7 +328,7 @@ GetDrawableAttribute(Display * dpy, GLXDrawable drawable, * the calling thread's current context a GLXBadDrawable error is * generated." */ - if (pdraw == NULL || gc == NULL || gc->currentDpy != dpy || + if (pdraw == NULL || gc == &dummyContext || gc->currentDpy != dpy || (gc->currentDrawable != drawable && gc->currentReadable != drawable)) { __glXSendError(dpy, GLXBadDrawable, drawable, diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c index 3856032..5ff4dd3 100644 --- a/src/glx/glxcmds.c +++ b/src/glx/glxcmds.c @@ -524,7 +524,7 @@ glXWaitGL(void) { struct glx_context *gc = __glXGetCurrentContext(); - if (gc && gc->vtable->wait_gl) + if (gc != &dummyContext && gc->vtable->wait_gl) gc->vtable->wait_gl(gc); } @@ -537,7 +537,7 @@ glXWaitX(void) { struct glx_context *gc = __glXGetCurrentContext(); - if (gc && gc->vtable->wait_x) + if (gc != &dummyContext && gc->vtable->wait_x) gc->vtable->wait_x(gc); } @@ -546,7 +546,7 @@ glXUseXFont(Font font, int first, int count, int listBase) { struct glx_context *gc = __glXGetCurrentContext(); - if (gc && gc->vtable->use_x_font) + if (gc != &dummyContext && gc->vtable->use_x_font) gc->vtable->use_x_font(gc, font, first, count, listBase); } @@ -838,7 +838,7 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable) __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable); if (pdraw != NULL) { - Bool flush = gc && drawable == gc->currentDrawable; + Bool flush = gc != &dummyContext && drawable == gc->currentDrawable; (*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0, flush); return; @@ -855,7 +855,7 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable) ** The calling thread may or may not have a current context. If it ** does, send the context tag so the server can do a flush. */ - if ((gc != NULL) && (dpy == gc->currentDpy) && + if ((gc != &dummyContext) && (dpy == gc->currentDpy) && ((drawable == gc->currentDrawable) || (drawable == gc->currentReadable))) { tag = gc->currentContextTag; @@ -1388,7 +1388,7 @@ _GLX_PUBLIC Display * glXGetCurrentDisplay(void) { struct glx_context *gc = __glXGetCurrentContext(); - if (NULL == gc) + if (gc == &dummyContext) return NULL; return gc->currentDpy; } @@ -1630,7 +1630,6 @@ glXCreateNewContext(Display * dpy, GLXFBConfig fbconfig, int renderType, GLXContext shareList, Bool allowDirect) { struct glx_config *config = (struct glx_config *) fbconfig; - int screen = DefaultScreen(dpy); struct glx_config **config_list; int list_size; unsigned i; @@ -1641,7 +1640,7 @@ glXCreateNewContext(Display * dpy, GLXFBConfig fbconfig, } config_list = (struct glx_config **) - glXGetFBConfigs(dpy, screen, &list_size); + glXGetFBConfigs(dpy, config->screen, &list_size); for (i = 0; i < list_size; i++) { if (config_list[i] == config) @@ -1751,7 +1750,7 @@ __glXSwapIntervalSGI(int interval) CARD32 *interval_ptr; CARD8 opcode; - if (gc == NULL) { + if (gc == &dummyContext) { return GLX_BAD_CONTEXT; } @@ -1805,7 +1804,7 @@ __glXSwapIntervalMESA(unsigned int interval) #ifdef GLX_DIRECT_RENDERING struct glx_context *gc = __glXGetCurrentContext(); - if (gc != NULL && gc->isDirect) { + if (gc != &dummyContext && gc->isDirect) { struct glx_screen *psc; psc = GetGLXScreenConfigs( gc->currentDpy, gc->screen); @@ -1827,7 +1826,7 @@ __glXGetSwapIntervalMESA(void) #ifdef GLX_DIRECT_RENDERING struct glx_context *gc = __glXGetCurrentContext(); - if (gc != NULL && gc->isDirect) { + if (gc != &dummyContext && gc->isDirect) { struct glx_screen *psc; psc = GetGLXScreenConfigs( gc->currentDpy, gc->screen); @@ -1857,7 +1856,7 @@ __glXGetVideoSyncSGI(unsigned int *count) __GLXDRIdrawable *pdraw; #endif - if (!gc) + if (gc == &dummyContext) return GLX_BAD_CONTEXT; #ifdef GLX_DIRECT_RENDERING @@ -1899,7 +1898,7 @@ __glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count) if (divisor <= 0 || remainder < 0) return GLX_BAD_VALUE; - if (!gc) + if (gc == &dummyContext) return GLX_BAD_CONTEXT; #ifdef GLX_DIRECT_RENDERING @@ -2212,7 +2211,7 @@ __glXSwapBuffersMscOML(Display * dpy, GLXDrawable drawable, struct glx_screen *psc = pdraw ? pdraw->psc : NULL; #endif - if (!gc) /* no GLX for this */ + if (gc == &dummyContext) /* no GLX for this */ return -1; #ifdef GLX_DIRECT_RENDERING @@ -2392,7 +2391,7 @@ __glXCopySubBufferMESA(Display * dpy, GLXDrawable drawable, ** does, send the context tag so the server can do a flush. */ gc = __glXGetCurrentContext(); - if ((gc != NULL) && (dpy == gc->currentDpy) && + if ((gc != &dummyContext) && (dpy == gc->currentDpy) && ((drawable == gc->currentDrawable) || (drawable == gc->currentReadable))) { tag = gc->currentContextTag; @@ -2431,7 +2430,7 @@ __glXBindTexImageEXT(Display * dpy, { struct glx_context *gc = __glXGetCurrentContext(); - if (gc == NULL || gc->vtable->bind_tex_image == NULL) + if (gc == &dummyContext || gc->vtable->bind_tex_image == NULL) return; gc->vtable->bind_tex_image(dpy, drawable, buffer, attrib_list); @@ -2442,7 +2441,7 @@ __glXReleaseTexImageEXT(Display * dpy, GLXDrawable drawable, int buffer) { struct glx_context *gc = __glXGetCurrentContext(); - if (gc == NULL || gc->vtable->release_tex_image == NULL) + if (gc == &dummyContext || gc->vtable->release_tex_image == NULL) return; gc->vtable->release_tex_image(dpy, drawable, buffer); @@ -2718,7 +2717,7 @@ __glXGetUST(int64_t * ust) #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL) -int +PUBLIC int MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context, struct mesa_glinterop_device_info *out) { @@ -2742,7 +2741,7 @@ MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context, return ret; } -int +PUBLIC int MesaGLInteropGLXExportObject(Display *dpy, GLXContext context, struct mesa_glinterop_export_in *in, struct mesa_glinterop_export_out *out) diff --git a/src/glx/glxglvnd.c b/src/glx/glxglvnd.c index b7252a7..098304d 100644 --- a/src/glx/glxglvnd.c +++ b/src/glx/glxglvnd.c @@ -19,13 +19,13 @@ static void *__glXGLVNDGetProcAddress(const GLubyte *procName) static unsigned FindGLXFunction(const GLubyte *name) { - unsigned first = 0; - unsigned last = DI_FUNCTION_COUNT - 1; + int first = 0; + int last = DI_FUNCTION_COUNT - 1; while (first <= last) { - unsigned middle = (first + last) / 2; - int comp = strcmp((const char *) name, - __glXDispatchTableStrings[middle]); + int middle = (first + last) / 2; + int comp = strcmp(__glXDispatchTableStrings[middle], + (const char *) name); if (comp < 0) first = middle + 1; diff --git a/src/glx/query_renderer.c b/src/glx/query_renderer.c index 9108ec2..4debf06 100644 --- a/src/glx/query_renderer.c +++ b/src/glx/query_renderer.c @@ -106,7 +106,7 @@ glXQueryCurrentRendererIntegerMESA(int attribute, unsigned int *value) { struct glx_context *gc = __glXGetCurrentContext(); - if (gc == NULL) + if (gc == &dummyContext) return False; return __glXQueryRendererInteger(gc->psc, attribute, value); @@ -166,7 +166,7 @@ glXQueryCurrentRendererStringMESA(int attribute) { struct glx_context *gc = __glXGetCurrentContext(); - if (gc == NULL) + if (gc == &dummyContext) return False; return __glXQueryRendererString(gc->psc, attribute); diff --git a/src/glx/tests/fake_glx_screen.cpp b/src/glx/tests/fake_glx_screen.cpp index db20749..801f54a 100644 --- a/src/glx/tests/fake_glx_screen.cpp +++ b/src/glx/tests/fake_glx_screen.cpp @@ -75,7 +75,20 @@ indirect_create_context_attribs(struct glx_screen *base, return indirect_create_context(base, config_base, shareList, 0); } -__thread void *__glX_tls_Context = NULL; +/* This is necessary so that we don't have to link with glxcurrent.c + * which would require us to link with X libraries and what not. + */ +GLubyte dummyBuffer[__GLX_BUFFER_LIMIT_SIZE]; +struct glx_context_vtable dummyVtable; +struct glx_context dummyContext = { + &dummyBuffer[0], + &dummyBuffer[0], + &dummyBuffer[0], + &dummyBuffer[__GLX_BUFFER_LIMIT_SIZE], + sizeof(dummyBuffer), + &dummyVtable +}; +__thread void *__glX_tls_Context = &dummyContext; #if !defined(GLX_USE_TLS) extern "C" struct glx_context * diff --git a/src/intel/genxml/Makefile.am b/src/intel/genxml/Makefile.am index d6c1c5b..e8bd84c 100644 --- a/src/intel/genxml/Makefile.am +++ b/src/intel/genxml/Makefile.am @@ -35,6 +35,7 @@ $(BUILT_SOURCES): gen_pack_header.py CLEANFILES = $(BUILT_SOURCES) EXTRA_DIST = \ + $(GENXML_GENERATED_FILES) \ gen6.xml \ gen7.xml \ gen75.xml \ diff --git a/src/intel/genxml/gen6.xml b/src/intel/genxml/gen6.xml index 44e2804..8bc28a9 100644 --- a/src/intel/genxml/gen6.xml +++ b/src/intel/genxml/gen6.xml @@ -79,7 +79,7 @@ </group> </struct> - <struct name="BLEND_STATE" length="2"> + <struct name="BLEND_STATE_ENTRY" length="2"> <field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/> <field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/> <field name="Alpha Blend Function" start="26" end="28" type="uint"> @@ -169,6 +169,12 @@ <field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/> </struct> + <struct name="BLEND_STATE" length="16"> + <group count="8" start="0" size="64"> + <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/> + </group> + </struct> + <struct name="CC_VIEWPORT" length="2"> <field name="Minimum Depth" start="0" end="31" type="float"/> <field name="Maximum Depth" start="32" end="63" type="float"/> @@ -781,6 +787,7 @@ <field name="CLIP Enable" start="95" end="95" type="bool"/> <field name="API Mode" start="94" end="94" type="uint"> <value name="APIMODE_OGL" value="0"/> + <value name="APIMODE_D3D" value="1"/> </field> <field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/> <field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/> diff --git a/src/intel/genxml/gen7.xml b/src/intel/genxml/gen7.xml index 2bbfcb7..cbeb2e1 100644 --- a/src/intel/genxml/gen7.xml +++ b/src/intel/genxml/gen7.xml @@ -102,7 +102,7 @@ </group> </struct> - <struct name="BLEND_STATE" length="2"> + <struct name="BLEND_STATE_ENTRY" length="2"> <field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/> <field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/> <field name="Alpha Blend Function" start="26" end="28" type="uint"> @@ -192,6 +192,12 @@ <field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/> </struct> + <struct name="BLEND_STATE" length="16"> + <group count="8" start="0" size="64"> + <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/> + </group> + </struct> + <struct name="CC_VIEWPORT" length="2"> <field name="Minimum Depth" start="0" end="31" type="float"/> <field name="Maximum Depth" start="32" end="63" type="float"/> @@ -953,6 +959,7 @@ <field name="Clip Enable" start="95" end="95" type="bool"/> <field name="API Mode" start="94" end="94" type="uint"> <value name="APIMODE_OGL" value="0"/> + <value name="APIMODE_D3D" value="1"/> </field> <field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/> <field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/> diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml index 9ab432c..40f3c31 100644 --- a/src/intel/genxml/gen75.xml +++ b/src/intel/genxml/gen75.xml @@ -112,7 +112,7 @@ </group> </struct> - <struct name="BLEND_STATE" length="2"> + <struct name="BLEND_STATE_ENTRY" length="2"> <field name="Color Buffer Blend Enable" start="31" end="31" type="bool"/> <field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/> <field name="Alpha Blend Function" start="26" end="28" type="uint"> @@ -202,6 +202,12 @@ <field name="Post-Blend Color Clamp Enable" start="32" end="32" type="bool"/> </struct> + <struct name="BLEND_STATE" length="16"> + <group count="8" start="0" size="64"> + <field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/> + </group> + </struct> + <struct name="CC_VIEWPORT" length="2"> <field name="Minimum Depth" start="0" end="31" type="float"/> <field name="Maximum Depth" start="32" end="63" type="float"/> @@ -1062,6 +1068,7 @@ <field name="Clip Enable" start="95" end="95" type="bool"/> <field name="API Mode" start="94" end="94" type="uint"> <value name="APIMODE_OGL" value="0"/> + <value name="APIMODE_D3D" value="1"/> </field> <field name="Viewport XY ClipTest Enable" start="92" end="92" type="bool"/> <field name="Viewport Z ClipTest Enable" start="91" end="91" type="bool"/> diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index 80d40fb..e3b0cdb 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -1115,15 +1115,16 @@ <field name="User Clip Distance Cull Test Enable Bitmask" start="32" end="39" type="uint"/> <field name="Clip Enable" start="95" end="95" type="bool"/> <field name="API Mode" start="94" end="94" type="uint"> - <value name="OGL" value="0"/> + <value name="APIMODE_OGL" value="0"/> + <value name="APIMODE_D3D" value="1"/> </field> <field name="Viewport XY Clip Test Enable" start="92" end="92" type="bool"/> <field name="Guardband Clip Test Enable" start="90" end="90" type="bool"/> <field name="User Clip Distance Clip Test Enable Bitmask" start="80" end="87" type="uint"/> <field name="Clip Mode" start="77" end="79" type="uint"> - <value name="NORMAL" value="0"/> - <value name="REJECT_ALL" value="3"/> - <value name="ACCEPT_ALL" value="4"/> + <value name="CLIPMODE_NORMAL" value="0"/> + <value name="CLIPMODE_REJECT_ALL" value="3"/> + <value name="CLIPMODE_ACCEPT_ALL" value="4"/> </field> <field name="Perspective Divide Disable" start="73" end="73" type="bool"/> <field name="Non-Perspective Barycentric Enable" start="72" end="72" type="bool"/> @@ -2035,7 +2036,7 @@ <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="30"/> <field name="DWord Length" start="0" end="7" type="uint" default="3"/> <field name="SO Function Enable" start="63" end="63" type="uint"/> - <field name="API Rendering Disable" start="62" end="62" type="uint"/> + <field name="Rendering Disable" start="62" end="62" type="uint"/> <field name="Render Stream Select" start="59" end="60" type="uint"/> <field name="Reorder Mode" start="58" end="58" type="uint"> <value name="LEADING" value="0"/> diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index 94b7d28..4333b89 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -1167,15 +1167,16 @@ <field name="User Clip Distance Cull Test Enable Bitmask" start="32" end="39" type="uint"/> <field name="Clip Enable" start="95" end="95" type="bool"/> <field name="API Mode" start="94" end="94" type="uint"> - <value name="OGL" value="0"/> + <value name="APIMODE_OGL" value="0"/> + <value name="APIMODE_D3D" value="1"/> </field> <field name="Viewport XY Clip Test Enable" start="92" end="92" type="bool"/> <field name="Guardband Clip Test Enable" start="90" end="90" type="bool"/> <field name="User Clip Distance Clip Test Enable Bitmask" start="80" end="87" type="uint"/> <field name="Clip Mode" start="77" end="79" type="uint"> - <value name="NORMAL" value="0"/> - <value name="REJECT_ALL" value="3"/> - <value name="ACCEPT_ALL" value="4"/> + <value name="CLIPMODE_NORMAL" value="0"/> + <value name="CLIPMODE_REJECT_ALL" value="3"/> + <value name="CLIPMODE_ACCEPT_ALL" value="4"/> </field> <field name="Perspective Divide Disable" start="73" end="73" type="bool"/> <field name="Non-Perspective Barycentric Enable" start="72" end="72" type="bool"/> @@ -2238,7 +2239,7 @@ <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="30"/> <field name="DWord Length" start="0" end="7" type="uint" default="3"/> <field name="SO Function Enable" start="63" end="63" type="uint"/> - <field name="API Rendering Disable" start="62" end="62" type="uint"/> + <field name="Rendering Disable" start="62" end="62" type="uint"/> <field name="Render Stream Select" start="59" end="60" type="uint"/> <field name="Reorder Mode" start="58" end="58" type="uint"> <value name="LEADING" value="0"/> diff --git a/src/intel/isl/Makefile.am b/src/intel/isl/Makefile.am index 1fd6683..8e03ee6 100644 --- a/src/intel/isl/Makefile.am +++ b/src/intel/isl/Makefile.am @@ -46,23 +46,25 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/gallium/include -libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init +AM_CFLAGS = \ + $(VISIBILITY_CFLAGS) \ + -Wno-override-init libisl_la_LIBADD = $(ISL_GEN_LIBS) libisl_la_SOURCES = $(ISL_FILES) $(ISL_GENERATED_FILES) libisl_gen7_la_SOURCES = $(ISL_GEN7_FILES) -libisl_gen7_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=70 +libisl_gen7_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=70 libisl_gen75_la_SOURCES = $(ISL_GEN75_FILES) -libisl_gen75_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=75 +libisl_gen75_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=75 libisl_gen8_la_SOURCES = $(ISL_GEN8_FILES) -libisl_gen8_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=80 +libisl_gen8_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=80 libisl_gen9_la_SOURCES = $(ISL_GEN9_FILES) -libisl_gen9_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=90 +libisl_gen9_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=90 BUILT_SOURCES = $(ISL_GENERATED_FILES) diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 77b570d..0bdfa9d 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -490,27 +490,27 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, case ISL_MSAA_LAYOUT_ARRAY: assert(info->depth == 1); - assert(info->array_len == 1); + assert(info->levels == 1); assert(!isl_format_is_compressed(info->format)); *phys_level0_sa = (struct isl_extent4d) { .w = info->width, .h = info->height, .d = 1, - .a = info->samples, + .a = info->array_len * info->samples, }; break; case ISL_MSAA_LAYOUT_INTERLEAVED: assert(info->depth == 1); - assert(info->array_len == 1); + assert(info->levels == 1); assert(!isl_format_is_compressed(info->format)); *phys_level0_sa = (struct isl_extent4d) { .w = info->width, .h = info->height, .d = 1, - .a = 1, + .a = info->array_len, }; isl_msaa_interleaved_scale_px_to_sa(info->samples, diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index ef86228..64aced8 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -989,7 +989,7 @@ isl_has_matching_typed_storage_image_format(const struct brw_device_info *devinf static inline bool isl_tiling_is_any_y(enum isl_tiling tiling) { - return (1u << tiling) & ISL_TILING_ANY_MASK; + return (1u << tiling) & ISL_TILING_ANY_Y_MASK; } static inline bool diff --git a/src/intel/isl/isl_gen6.c b/src/intel/isl/isl_gen6.c index 24c3939..cc246f5 100644 --- a/src/intel/isl/isl_gen6.c +++ b/src/intel/isl/isl_gen6.c @@ -37,7 +37,7 @@ gen6_choose_msaa_layout(const struct isl_device *dev, if (info->samples == 1) { *msaa_layout = ISL_MSAA_LAYOUT_NONE; - return false; + return true; } /* From the Sandybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Surface diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index b2317d8..1c985b6 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -430,8 +430,15 @@ isl_genX(buffer_fill_state_s)(void *state, uint32_t num_elements = info->size / info->stride; if (GEN_GEN >= 7) { + /* From the IVB PRM, SURFACE_STATE::Height, + * + * For typed buffer and structured buffer surfaces, the number + * of entries in the buffer ranges from 1 to 2^27. For raw buffer + * surfaces, the number of entries in the buffer is the number of bytes + * which can range from 1 to 2^30. + */ if (info->format == ISL_FORMAT_RAW) { - assert(num_elements <= (1ull << 31)); + assert(num_elements <= (1ull << 30)); assert((num_elements & 3) == 0); } else { assert(num_elements <= (1ull << 27)); diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am index 0e521cf..6b1015a 100644 --- a/src/intel/vulkan/Makefile.am +++ b/src/intel/vulkan/Makefile.am @@ -61,7 +61,9 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src/intel \ -I$(top_srcdir)/src/intel -AM_CFLAGS = -Wno-override-init -msse2 +AM_CFLAGS = \ + $(VISIBILITY_CFLAGS) \ + -Wno-override-init -msse2 libanv_gen7_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=70 libanv_gen7_la_SOURCES = $(GEN7_FILES) @@ -159,6 +161,7 @@ libvulkan_intel_la_LDFLAGS = \ -module \ -no-undefined \ -avoid-version \ + $(BSYMBOLIC) \ $(GC_SECTIONS) \ $(LD_NO_UNDEFINED) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 20d3af1..ca78c09 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -359,7 +359,7 @@ anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) switch (cmd_buffer->device->info.gen) { case 7: if (cmd_buffer->device->info.is_haswell) - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + return gen75_cmd_buffer_emit_state_base_address(cmd_buffer); else return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); case 8: @@ -741,20 +741,26 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, { struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_pipeline_bind_map *map; + struct anv_pipeline *pipeline; uint32_t bias, state_offset; switch (stage) { case MESA_SHADER_COMPUTE: - map = &cmd_buffer->state.compute_pipeline->bindings[stage]; + pipeline = cmd_buffer->state.compute_pipeline; bias = 1; break; default: - map = &cmd_buffer->state.pipeline->bindings[stage]; + pipeline = cmd_buffer->state.pipeline; bias = 0; break; } + if (!anv_pipeline_has_stage(pipeline, stage)) { + *bt_state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map; if (bias + map->surface_count == 0) { *bt_state = (struct anv_state) { 0, }; return VK_SUCCESS; @@ -907,13 +913,19 @@ VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage, struct anv_state *state) { - struct anv_pipeline_bind_map *map; + struct anv_pipeline *pipeline; if (stage == MESA_SHADER_COMPUTE) - map = &cmd_buffer->state.compute_pipeline->bindings[stage]; + pipeline = cmd_buffer->state.compute_pipeline; else - map = &cmd_buffer->state.pipeline->bindings[stage]; + pipeline = cmd_buffer->state.pipeline; + if (!anv_pipeline_has_stage(pipeline, stage)) { + *state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map; if (map->sampler_count == 0) { *state = (struct anv_state) { 0, }; return VK_SUCCESS; @@ -1080,10 +1092,14 @@ struct anv_state anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage) { + /* If we don't have this stage, bail. */ + if (!anv_pipeline_has_stage(cmd_buffer->state.pipeline, stage)) + return (struct anv_state) { .offset = 0 }; + struct anv_push_constants *data = cmd_buffer->state.push_constants[stage]; const struct brw_stage_prog_data *prog_data = - cmd_buffer->state.pipeline->prog_data[stage]; + anv_shader_bin_get_prog_data(cmd_buffer->state.pipeline->shaders[stage]); /* If we don't actually have any push constants, bail. */ if (data == NULL || prog_data == NULL || prog_data->nr_params == 0) diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 448ae0e..4ab1802 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -27,6 +27,8 @@ #include <unistd.h> #include <fcntl.h> +#include "util/mesa-sha1.h" + #include "anv_private.h" /* @@ -65,9 +67,8 @@ VkResult anv_CreateDescriptorSetLayout( struct anv_sampler **samplers = (struct anv_sampler **)&set_layout->binding[max_binding + 1]; + memset(set_layout, 0, sizeof(*set_layout)); set_layout->binding_count = max_binding + 1; - set_layout->shader_stages = 0; - set_layout->size = 0; for (uint32_t b = 0; b <= max_binding; b++) { /* Initialize all binding_layout entries to -1 */ @@ -202,6 +203,15 @@ void anv_DestroyDescriptorSetLayout( anv_free2(&device->alloc, pAllocator, set_layout); } +static void +sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx, + const struct anv_descriptor_set_layout *layout) +{ + size_t size = sizeof(*layout) + + sizeof(layout->binding[0]) * layout->binding_count; + _mesa_sha1_update(ctx, layout, size); +} + /* * Pipeline layouts. These have nothing to do with the pipeline. They are * just muttiple descriptor set layouts pasted together @@ -246,6 +256,19 @@ VkResult anv_CreatePipelineLayout( } } + struct mesa_sha1 *ctx = _mesa_sha1_init(); + for (unsigned s = 0; s < layout->num_sets; s++) { + sha1_update_descriptor_set_layout(ctx, layout->set[s].layout); + _mesa_sha1_update(ctx, &layout->set[s].dynamic_offset_start, + sizeof(layout->set[s].dynamic_offset_start)); + } + _mesa_sha1_update(ctx, &layout->num_sets, sizeof(layout->num_sets)); + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { + _mesa_sha1_update(ctx, &layout->stage[s].has_dynamic_offsets, + sizeof(layout->stage[s].has_dynamic_offsets)); + } + _mesa_sha1_final(ctx, layout->sha1); + *pPipelineLayout = anv_pipeline_layout_to_handle(layout); return VK_SUCCESS; @@ -409,6 +432,11 @@ anv_descriptor_set_create(struct anv_device *device, (struct anv_buffer_view *) &set->descriptors[layout->size]; set->buffer_count = layout->buffer_count; + /* By defining the descriptors to be zero now, we can later verify that + * a descriptor has not been populated with user data. + */ + memset(set->descriptors, 0, sizeof(struct anv_descriptor) * layout->size); + /* Go through and fill out immutable samplers if we have any */ struct anv_descriptor *desc = set->descriptors; for (uint32_t b = 0; b < layout->binding_count; b++) { diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index dd941b6..cd8fb3a 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -372,7 +372,7 @@ void anv_GetPhysicalDeviceFeatures( .robustBufferAccess = true, .fullDrawIndexUint32 = true, .imageCubeArray = false, - .independentBlend = pdevice->info->gen >= 8, + .independentBlend = true, .geometryShader = true, .tessellationShader = false, .sampleRateShading = false, @@ -438,6 +438,10 @@ void anv_GetPhysicalDeviceProperties( const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0; + /* See assertions made when programming the buffer surface state. */ + const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ? + (1ul << 30) : (1ul << 27); + VkSampleCountFlags sample_counts = isl_device_get_sample_counts(&pdevice->isl_dev); @@ -448,8 +452,8 @@ void anv_GetPhysicalDeviceProperties( .maxImageDimensionCube = (1 << 14), .maxImageArrayLayers = (1 << 11), .maxTexelBufferElements = 128 * 1024 * 1024, - .maxUniformBufferRange = UINT32_MAX, - .maxStorageBufferRange = UINT32_MAX, + .maxUniformBufferRange = (1ul << 27), + .maxStorageBufferRange = max_raw_buffer_sz, .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, .maxMemoryAllocationCount = UINT32_MAX, .maxSamplerAllocationCount = 64 * 1024, @@ -649,13 +653,15 @@ PFN_vkVoidFunction anv_GetInstanceProcAddr( return anv_lookup_entrypoint(pName); } -/* The loader wants us to expose a second GetInstanceProcAddr function - * to work around certain LD_PRELOAD issues seen in apps. +/* With version 1+ of the loader interface the ICD should expose + * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps. */ +PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( VkInstance instance, const char* pName); +PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( VkInstance instance, const char* pName) @@ -869,7 +875,8 @@ VkResult anv_CreateDevice( &device->dynamic_state_block_pool); anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024); - anv_pipeline_cache_init(&device->default_pipeline_cache, device); + anv_state_pool_init(&device->instruction_state_pool, + &device->instruction_block_pool); anv_block_pool_init(&device->surface_state_block_pool, device, 4096); @@ -944,6 +951,7 @@ void anv_DestroyDevice( anv_bo_pool_finish(&device->batch_bo_pool); anv_state_pool_finish(&device->dynamic_state_pool); anv_block_pool_finish(&device->dynamic_state_block_pool); + anv_state_pool_finish(&device->instruction_state_pool); anv_block_pool_finish(&device->instruction_block_pool); anv_state_pool_finish(&device->surface_state_pool); anv_block_pool_finish(&device->surface_state_block_pool); @@ -1786,23 +1794,3 @@ void anv_DestroyFramebuffer( anv_free2(&device->alloc, pAllocator, fb); } - -void vkCmdDbgMarkerBegin( - VkCommandBuffer commandBuffer, - const char* pMarker) - __attribute__ ((visibility ("default"))); - -void vkCmdDbgMarkerEnd( - VkCommandBuffer commandBuffer) - __attribute__ ((visibility ("default"))); - -void vkCmdDbgMarkerBegin( - VkCommandBuffer commandBuffer, - const char* pMarker) -{ -} - -void vkCmdDbgMarkerEnd( - VkCommandBuffer commandBuffer) -{ -} diff --git a/src/intel/vulkan/anv_entrypoints_gen.py b/src/intel/vulkan/anv_entrypoints_gen.py index 2896174..dcf25ee 100644 --- a/src/intel/vulkan/anv_entrypoints_gen.py +++ b/src/intel/vulkan/anv_entrypoints_gen.py @@ -134,7 +134,6 @@ if opt_header: print "%s gen75_%s%s;" % (type, name, args) print "%s gen8_%s%s;" % (type, name, args) print "%s gen9_%s%s;" % (type, name, args) - print "%s anv_validate_%s%s;" % (type, name, args) print_guard_end(name) exit() @@ -185,23 +184,24 @@ for type, name, args, num, h in entrypoints: print " \"vk%s\\0\"" % name offsets.append(i) i += 2 + len(name) + 1 -print """ ; +print " ;" -/* Weak aliases for all potential validate functions. These will resolve to - * NULL if they're not defined, which lets the resolve_entrypoint() function - * either pick a validate wrapper if available or just plug in the actual - * entry point. - */ -""" - -# Now generate the table of all entry points and their validation functions +# Now generate the table of all entry points print "\nstatic const struct anv_entrypoint entrypoints[] = {" for type, name, args, num, h in entrypoints: print " { %5d, 0x%08x }," % (offsets[num], h) print "};\n" -for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]: +print """ + +/* Weak aliases for all potential implementations. These will resolve to + * NULL if they're not defined, which lets the resolve_entrypoint() function + * either pick the correct entry point. + */ +""" + +for layer in [ "anv", "gen7", "gen75", "gen8", "gen9" ]: for type, name, args, num, h in entrypoints: print_guard_start(name) print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) @@ -214,27 +214,6 @@ for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]: print "};\n" print """ -#ifdef DEBUG -static bool enable_validate = true; -#else -static bool enable_validate = false; -#endif - -/* We can't use symbols that need resolving (like, oh, getenv) in the resolve - * function. This means that we have to determine whether or not to use the - * validation layer sometime before that. The constructor function attribute asks - * the dynamic linker to invoke determine_validate() at dlopen() time which - * works. - */ -static void __attribute__ ((constructor)) -determine_validate(void) -{ - const char *s = getenv("ANV_VALIDATE"); - - if (s) - enable_validate = atoi(s); -} - static const struct brw_device_info *dispatch_devinfo; void @@ -246,9 +225,6 @@ anv_set_dispatch_devinfo(const struct brw_device_info *devinfo) void * __attribute__ ((noinline)) anv_resolve_entrypoint(uint32_t index) { - if (enable_validate && validate_layer.entrypoints[index]) - return validate_layer.entrypoints[index]; - if (dispatch_devinfo == NULL) { return anv_layer.entrypoints[index]; } @@ -277,17 +253,6 @@ anv_resolve_entrypoint(uint32_t index) } """ -# Now output ifuncs and their resolve helpers for all entry points. The -# resolve helper calls resolve_entrypoint() with the entry point index, which -# lets the resolver look it up in the table. - -for type, name, args, num, h in entrypoints: - print_guard_start(name) - print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num) - print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) - print_guard_end(name) - - # Now generate the hash table used for entry point look up. This is a # uint16_t table of entry point indices. We use 0xffff to indicate an entry # in the hash table is empty. diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 77d9931..caf4a3e 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -322,81 +322,6 @@ void anv_GetImageSubresourceLayout( } } -VkResult -anv_validate_CreateImageView(VkDevice _device, - const VkImageViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImageView *pView) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *subresource; - - /* Validate structure type before dereferencing it. */ - assert(pCreateInfo); - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); - subresource = &pCreateInfo->subresourceRange; - - /* Validate viewType is in range before using it. */ - assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); - assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); - - /* Validate format is in range before using it. */ - assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); - assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); - - /* Validate channel swizzles. */ - assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE); - assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE); - assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE); - assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE); - - /* Validate subresource. */ - assert(subresource->aspectMask != 0); - assert(subresource->levelCount > 0); - assert(subresource->layerCount > 0); - assert(subresource->baseMipLevel < image->levels); - assert(subresource->baseMipLevel + anv_get_levelCount(image, subresource) <= image->levels); - assert(subresource->baseArrayLayer < image->array_size); - assert(subresource->baseArrayLayer + anv_get_layerCount(image, subresource) <= image->array_size); - assert(pView); - - MAYBE_UNUSED const VkImageAspectFlags view_format_aspects = - vk_format_aspects(pCreateInfo->format); - - const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT - | VK_IMAGE_ASPECT_STENCIL_BIT; - - /* Validate format. */ - if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); - assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); - assert(view_format_aspects == VK_IMAGE_ASPECT_COLOR_BIT); - } else if (subresource->aspectMask & ds_flags) { - assert((subresource->aspectMask & ~ds_flags) == 0); - - assert(pCreateInfo->format == image->vk_format); - - if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { - assert(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT); - assert(view_format_aspects & VK_IMAGE_ASPECT_DEPTH_BIT); - } - - if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { - /* FINISHME: Is it legal to have an R8 view of S8? */ - assert(image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT); - assert(view_format_aspects & VK_IMAGE_ASPECT_STENCIL_BIT); - } - } else { - assert(!"bad VkImageSubresourceRange::aspectFlags"); - } - - return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); -} - static struct anv_state alloc_surface_state(struct anv_device *device, struct anv_cmd_buffer *cmd_buffer) @@ -628,18 +553,19 @@ void anv_buffer_view_init(struct anv_buffer_view *view, view->format = anv_get_isl_format(&device->info, pCreateInfo->format, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_TILING_LINEAR); + const uint32_t format_bs = isl_format_get_layout(view->format)->bs; view->bo = buffer->bo; view->offset = buffer->offset + pCreateInfo->offset; view->range = pCreateInfo->range == VK_WHOLE_SIZE ? - buffer->size - view->offset : pCreateInfo->range; + buffer->size - pCreateInfo->offset : pCreateInfo->range; + view->range = align_down_npot_u32(view->range, format_bs); if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { view->surface_state = alloc_surface_state(device, cmd_buffer); anv_fill_buffer_surface_state(device, view->surface_state, view->format, - view->offset, view->range, - isl_format_get_layout(view->format)->bs); + view->offset, view->range, format_bs); } else { view->surface_state = (struct anv_state){ 0 }; } diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c index dc098ef..af15c2c 100644 --- a/src/intel/vulkan/anv_meta_blit.c +++ b/src/intel/vulkan/anv_meta_blit.c @@ -106,7 +106,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); color_out->data.location = FRAG_RESULT_DATA0; - nir_store_var(&b, color_out, &tex->dest.ssa, 4); + nir_store_var(&b, color_out, &tex->dest.ssa, 0xf); return b.shader; } diff --git a/src/intel/vulkan/anv_meta_blit2d.c b/src/intel/vulkan/anv_meta_blit2d.c index 06e1043..649c11f 100644 --- a/src/intel/vulkan/anv_meta_blit2d.c +++ b/src/intel/vulkan/anv_meta_blit2d.c @@ -92,6 +92,21 @@ vk_format_for_size(int bs) } } +/* This function returns the format corresponding to a single component of the + * RGB format for the given size returned by vk_format_for_size(). + */ +static VkFormat +vk_single_component_format_for_rgb_size(int bs) +{ + switch (bs) { + case 3: return VK_FORMAT_R8_UNORM; + case 6: return VK_FORMAT_R16_UNORM; + case 12: return VK_FORMAT_R32_UINT; + default: + unreachable("Invalid format block size"); + } +} + static void create_iview(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *surf, @@ -99,13 +114,14 @@ create_iview(struct anv_cmd_buffer *cmd_buffer, VkImageUsageFlags usage, uint32_t width, uint32_t height, + VkFormat format, VkImage *img, struct anv_image_view *iview) { const VkImageCreateInfo image_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, - .format = vk_format_for_size(surf->bs), + .format = format, .extent = { .width = width, .height = height, @@ -179,6 +195,7 @@ blit2d_bind_src(struct anv_cmd_buffer *cmd_buffer, create_iview(cmd_buffer, src, offset, VK_IMAGE_USAGE_SAMPLED_BIT, rect->src_x + rect->width, rect->src_y + rect->height, + vk_format_for_size(src->bs), &tmp->image, &tmp->iview); anv_CreateDescriptorPool(vk_device, @@ -328,10 +345,11 @@ blit2d_bind_dst(struct anv_cmd_buffer *cmd_buffer, uint64_t offset, uint32_t width, uint32_t height, + VkFormat format, struct blit2d_dst_temps *tmp) { create_iview(cmd_buffer, dst, offset, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - width, height, &tmp->image, &tmp->iview); + width, height, format, &tmp->image, &tmp->iview); anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), &(VkFramebufferCreateInfo) { @@ -406,7 +424,8 @@ anv_meta_blit2d_normal_dst(struct anv_cmd_buffer *cmd_buffer, struct blit2d_dst_temps dst_temps; blit2d_bind_dst(cmd_buffer, dst, offset, rects[r].dst_x + rects[r].width, - rects[r].dst_y + rects[r].height, &dst_temps); + rects[r].dst_y + rects[r].height, + vk_format_for_size(dst->bs), &dst_temps); struct blit_vb_data { float pos[2]; @@ -544,7 +563,8 @@ anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer, }; struct blit2d_dst_temps dst_temps; - blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y, &dst_temps); + blit2d_bind_dst(cmd_buffer, &dst_Y, offset, xmax_Y, ymax_Y, + VK_FORMAT_R8_UINT, &dst_temps); struct blit_vb_header { struct anv_vue_header vue; @@ -647,6 +667,141 @@ anv_meta_blit2d_w_tiled_dst(struct anv_cmd_buffer *cmd_buffer, } } +static void +anv_meta_blit2d_rgb_dst(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_blit2d_surf *src, + enum blit2d_src_type src_type, + struct anv_meta_blit2d_surf *dst, + unsigned num_rects, + struct anv_meta_blit2d_rect *rects) +{ + struct anv_device *device = cmd_buffer->device; + + for (unsigned r = 0; r < num_rects; ++r) { + struct blit2d_src_temps src_temps; + blit2d_bind_src(cmd_buffer, src, src_type, &rects[r], &src_temps); + + assert(dst->bs % 3 == 0); + assert(dst->tiling == ISL_TILING_LINEAR); + + uint32_t offset; + isl_tiling_get_intratile_offset_el(&cmd_buffer->device->isl_dev, + dst->tiling, 1, dst->pitch, + rects[r].dst_x, rects[r].dst_y, + &offset, + &rects[r].dst_x, &rects[r].dst_y); + + /* A red surface three times as wide as the actual RGB destination */ + struct anv_meta_blit2d_surf dst_R = { + .bo = dst->bo, + .tiling = dst->tiling, + .base_offset = dst->base_offset, + .bs = dst->bs / 3, + .pitch = dst->pitch, + }; + + struct blit2d_dst_temps dst_temps; + blit2d_bind_dst(cmd_buffer, &dst_R, offset, + (rects[r].dst_x + rects[r].width) * 3, + rects[r].dst_y + rects[r].height, + vk_single_component_format_for_rgb_size(dst->bs), + &dst_temps); + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + (rects[r].dst_x + rects[r].width) * 3, + rects[r].dst_y + rects[r].height, + }, + .tex_coord = { + rects[r].src_x + rects[r].width, + rects[r].src_y + rects[r].height, + src->pitch, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x * 3, + rects[r].dst_y + rects[r].height, + }, + .tex_coord = { + rects[r].src_x, + rects[r].src_y + rects[r].height, + src->pitch, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + rects[r].dst_x * 3, + rects[r].dst_y, + }, + .tex_coord = { + rects[r].src_x, + rects[r].src_y, + src->pitch, + }, + }; + + if (!device->info.has_llc) + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit2d.render_pass, + .framebuffer = dst_temps.fb, + .renderArea = { + .offset = { rects[r].dst_x, rects[r].dst_y, }, + .extent = { rects[r].width, rects[r].height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + bind_pipeline(cmd_buffer, src_type, BLIT2D_DST_TYPE_RGB); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + blit2d_unbind_src(cmd_buffer, src_type, &src_temps); + blit2d_unbind_dst(cmd_buffer, &dst_temps); + } +} + void anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, struct anv_meta_blit2d_surf *src, @@ -666,7 +821,8 @@ anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer, num_rects, rects); return; } else if (dst->bs % 3 == 0) { - anv_finishme("Blitting to RGB destinations not yet supported"); + anv_meta_blit2d_rgb_dst(cmd_buffer, src, src_type, dst, + num_rects, rects); return; } else { assert(util_is_power_of_two(dst->bs)); @@ -892,6 +1048,61 @@ build_nir_copy_fragment_shader(struct anv_device *device, return b.shader; } +/* RGB copies have the same interface as normal copies */ +#define rgb_vi_create_info normal_vi_create_info + +static nir_shader * +build_nir_rgb_fragment_shader(struct anv_device *device, + texel_fetch_build_func txf_func) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + const struct glsl_type *vec3 = glsl_vector_type(GLSL_TYPE_FLOAT, 3); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_fs"); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec3, "v_tex_pos"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + + /* We need gl_FragCoord so we know our position */ + nir_variable *frag_coord_in = nir_variable_create(b.shader, + nir_var_shader_in, + vec4, "gl_FragCoord"); + frag_coord_in->data.location = VARYING_SLOT_POS; + frag_coord_in->data.origin_upper_left = true; + + nir_ssa_def *pos_int = nir_f2i(&b, nir_load_var(&b, tex_pos_in)); + unsigned swiz[4] = { 0, 1 }; + nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false); + nir_ssa_def *tex_pitch = nir_channel(&b, pos_int, 2); + + nir_ssa_def *color = txf_func(&b, device, tex_pos, tex_pitch); + + /* We figure out which component we are by the x component of FragCoord */ + nir_ssa_def *frag_coord_int = nir_f2i(&b, nir_load_var(&b, frag_coord_in)); + nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, frag_coord_int, 0), + nir_imm_int(&b, 3)); + + /* Select the given channel from the texelFetch result */ + nir_ssa_def *color_channel = + nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 0)), + nir_channel(&b, color, 0), + nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 1)), + nir_channel(&b, color, 1), + nir_channel(&b, color, 2))); + + nir_ssa_def *u = nir_ssa_undef(&b, 1, 32); + nir_store_var(&b, color_out, nir_vec4(&b, color_channel, u, u, u), 0x1); + + return b.shader; +} + static const VkPipelineVertexInputStateCreateInfo w_tiled_vi_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .vertexBindingDescriptionCount = 2, @@ -1095,7 +1306,13 @@ blit2d_init_pipeline(struct anv_device *device, vi_create_info = &w_tiled_vi_create_info; break; case BLIT2D_DST_TYPE_RGB: - /* Not yet supported */ + /* RGB destinations and W-detiling don't mix */ + if (src_type != BLIT2D_SRC_TYPE_NORMAL) + return VK_SUCCESS; + + fs.nir = build_nir_rgb_fragment_shader(device, src_func); + vi_create_info = &rgb_vi_create_info; + break; default: return VK_SUCCESS; } diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c index 7ec0608..5d8dd3d 100644 --- a/src/intel/vulkan/anv_meta_clear.c +++ b/src/intel/vulkan/anv_meta_clear.c @@ -25,6 +25,8 @@ #include "anv_private.h" #include "nir/nir_builder.h" +#include "util/u_format_rgb9e5.h" + /** Vertex attributes for color clears. */ struct color_clear_vattrs { struct anv_vue_header vue_header; @@ -754,12 +756,22 @@ static void anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, struct anv_image *image, VkImageLayout image_layout, - const VkClearValue *clear_value, + VkClearValue clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges) { VkDevice device_h = anv_device_to_handle(cmd_buffer->device); + VkFormat vk_format = image->vk_format; + if (vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) { + /* We can't actually render to this format so we have to work around it + * by manually unpacking and using R32_UINT. + */ + clear_value.color.uint32[0] = + float3_to_rgb9e5(clear_value.color.float32); + vk_format = VK_FORMAT_R32_UINT; + } + for (uint32_t r = 0; r < range_count; r++) { const VkImageSubresourceRange *range = &ranges[r]; for (uint32_t l = 0; l < anv_get_levelCount(image, range); ++l) { @@ -773,7 +785,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = anv_image_to_handle(image), .viewType = anv_meta_get_view_type(image), - .format = image->vk_format, + .format = vk_format, .subresourceRange = { .aspectMask = range->aspectMask, .baseMipLevel = range->baseMipLevel + l, @@ -800,7 +812,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, &fb); VkAttachmentDescription att_desc = { - .format = iview.vk_format, + .format = vk_format, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, @@ -864,7 +876,7 @@ anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, VkClearAttachment clear_att = { .aspectMask = range->aspectMask, .colorAttachment = 0, - .clearValue = *clear_value, + .clearValue = clear_value, }; VkClearRect clear_rect = { @@ -903,7 +915,7 @@ void anv_CmdClearColorImage( meta_clear_begin(&saved_state, cmd_buffer); anv_cmd_clear_image(cmd_buffer, image, imageLayout, - (const VkClearValue *) pColor, + (VkClearValue) { .color = *pColor }, rangeCount, pRanges); meta_clear_end(&saved_state, cmd_buffer); @@ -924,7 +936,7 @@ void anv_CmdClearDepthStencilImage( meta_clear_begin(&saved_state, cmd_buffer); anv_cmd_clear_image(cmd_buffer, image, imageLayout, - (const VkClearValue *) pDepthStencil, + (VkClearValue) { .depthStencil = *pDepthStencil }, rangeCount, pRanges); meta_clear_end(&saved_state, cmd_buffer); @@ -1005,7 +1017,7 @@ do_buffer_fill(struct anv_cmd_buffer *cmd_buffer, anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - &clear_value, 1, &range); + clear_value, 1, &range); } void anv_CmdFillBuffer( diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 5a09464..33c7fe4 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -204,6 +204,12 @@ void anv_DestroyPipeline( pAllocator ? pAllocator : &device->alloc); if (pipeline->blend_state.map) anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { + if (pipeline->shaders[s]) + anv_shader_bin_unref(device, pipeline->shaders[s]); + } + anv_free2(&device->alloc, pAllocator, pipeline); } @@ -391,15 +397,34 @@ anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias) prog_data->binding_table.image_start = bias; } +static struct anv_shader_bin * +anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) +{ + if (cache) { + return anv_pipeline_cache_upload_kernel(cache, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, + bind_map); + } else { + return anv_shader_bin_create(pipeline->device, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); + } +} + + static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, - const struct brw_stage_prog_data *prog_data, - struct anv_pipeline_bind_map *map) + struct anv_shader_bin *shader) { - pipeline->prog_data[stage] = prog_data; + pipeline->shaders[stage] = shader; pipeline->active_stages |= mesa_to_vk_shader_stage(stage); - pipeline->bindings[stage] = *map; } static VkResult @@ -412,20 +437,20 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_vs_prog_key key; - uint32_t kernel = NO_KERNEL; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_vs_prog_key(&pipeline->device->info, &key); - if (module->size > 0) { - anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + if (cache) { + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, + pipeline->layout, spec_info); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (kernel == NO_KERNEL) { + if (bin == NULL) { struct brw_vs_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -464,28 +489,29 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - stage_prog_data = &prog_data.base.base; - kernel = anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + ralloc_free(mem_ctx); } const struct brw_vs_prog_data *vs_prog_data = - (const struct brw_vs_prog_data *) stage_prog_data; + (const struct brw_vs_prog_data *)anv_shader_bin_get_prog_data(bin); if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { - pipeline->vs_simd8 = kernel; + pipeline->vs_simd8 = bin->kernel.offset; pipeline->vs_vec4 = NO_KERNEL; } else { pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = kernel; + pipeline->vs_vec4 = bin->kernel.offset; } - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, - stage_prog_data, &map); + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, bin); return VK_SUCCESS; } @@ -500,20 +526,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_gs_prog_key key; - uint32_t kernel = NO_KERNEL; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_gs_prog_key(&pipeline->device->info, &key); - if (module->size > 0) { - anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + if (cache) { + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, + pipeline->layout, spec_info); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (kernel == NO_KERNEL) { + if (bin == NULL) { struct brw_gs_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -551,20 +577,20 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, } /* TODO: SIMD8 GS */ - stage_prog_data = &prog_data.base.base; - kernel = anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } ralloc_free(mem_ctx); } - pipeline->gs_kernel = kernel; + pipeline->gs_kernel = bin->kernel.offset; - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, - stage_prog_data, &map); + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, bin); return VK_SUCCESS; } @@ -580,20 +606,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_wm_prog_key key; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_wm_prog_key(&pipeline->device->info, info, extra, &key); - if (module->size > 0) { - anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - pipeline->ps_ksp0 = - anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + if (cache) { + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, + pipeline->layout, spec_info); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (pipeline->ps_ksp0 == NO_KERNEL) { + if (bin == NULL) { struct brw_wm_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -633,7 +659,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, assert(num_rts + array_len <= 8); for (unsigned i = 0; i < array_len; i++) { - rt_bindings[num_rts] = (struct anv_pipeline_binding) { + rt_bindings[num_rts + i] = (struct anv_pipeline_binding) { .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, .binding = 0, .index = rt + i, @@ -682,19 +708,20 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - stage_prog_data = &prog_data.base; - pipeline->ps_ksp0 = - anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } ralloc_free(mem_ctx); } - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, - stage_prog_data, &map); + pipeline->ps_ksp0 = bin->kernel.offset; + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, bin); return VK_SUCCESS; } @@ -709,20 +736,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - const struct brw_stage_prog_data *stage_prog_data; struct anv_pipeline_bind_map map; struct brw_cs_prog_key key; - uint32_t kernel = NO_KERNEL; + struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_cs_prog_key(&pipeline->device->info, &key); - if (module->size > 0) { - anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map); + if (cache) { + anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, + pipeline->layout, spec_info); + bin = anv_pipeline_cache_search(cache, sha1, 20); } - if (module->size == 0 || kernel == NO_KERNEL) { + if (bin == NULL) { struct brw_cs_prog_data prog_data = { 0, }; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; @@ -754,20 +781,20 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - stage_prog_data = &prog_data.base; - kernel = anv_pipeline_cache_upload_kernel(cache, - module->size > 0 ? sha1 : NULL, - shader_code, code_size, - &stage_prog_data, sizeof(prog_data), - &map); + bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, + shader_code, code_size, + &prog_data, sizeof(prog_data), &map); + if (!bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } ralloc_free(mem_ctx); } - pipeline->cs_simd = kernel; + pipeline->cs_simd = bin->kernel.offset; - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, - stage_prog_data, &map); + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, bin); return VK_SUCCESS; } @@ -1161,8 +1188,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; @@ -1180,27 +1206,33 @@ anv_pipeline_init(struct anv_pipeline *pipeline, } if (modules[MESA_SHADER_VERTEX]) { - anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, - modules[MESA_SHADER_VERTEX], - pStages[MESA_SHADER_VERTEX]->pName, - pStages[MESA_SHADER_VERTEX]->pSpecializationInfo); + result = anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, + modules[MESA_SHADER_VERTEX], + pStages[MESA_SHADER_VERTEX]->pName, + pStages[MESA_SHADER_VERTEX]->pSpecializationInfo); + if (result != VK_SUCCESS) + goto compile_fail; } if (modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL]) anv_finishme("no tessellation support"); if (modules[MESA_SHADER_GEOMETRY]) { - anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, - modules[MESA_SHADER_GEOMETRY], - pStages[MESA_SHADER_GEOMETRY]->pName, - pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo); + result = anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, + modules[MESA_SHADER_GEOMETRY], + pStages[MESA_SHADER_GEOMETRY]->pName, + pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo); + if (result != VK_SUCCESS) + goto compile_fail; } if (modules[MESA_SHADER_FRAGMENT]) { - anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, - modules[MESA_SHADER_FRAGMENT], - pStages[MESA_SHADER_FRAGMENT]->pName, - pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo); + result = anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, + modules[MESA_SHADER_FRAGMENT], + pStages[MESA_SHADER_FRAGMENT]->pName, + pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo); + if (result != VK_SUCCESS) + goto compile_fail; } if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { @@ -1263,6 +1295,16 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pipeline->topology = _3DPRIM_RECTLIST; return VK_SUCCESS; + +compile_fail: + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { + if (pipeline->shaders[s]) + anv_shader_bin_unref(device, pipeline->shaders[s]); + } + + anv_reloc_list_finish(&pipeline->batch_relocs, alloc); + + return result; } VkResult @@ -1277,9 +1319,6 @@ anv_graphics_pipeline_create( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - if (cache == NULL) - cache = &device->default_pipeline_cache; - switch (device->info.gen) { case 7: if (device->info.is_haswell) @@ -1333,9 +1372,6 @@ static VkResult anv_compute_pipeline_create( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - if (cache == NULL) - cache = &device->default_pipeline_cache; - switch (device->info.gen) { case 7: if (device->info.is_haswell) diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index fbca311..2753c46 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -22,9 +22,120 @@ */ #include "util/mesa-sha1.h" +#include "util/hash_table.h" #include "util/debug.h" #include "anv_private.h" +struct shader_bin_key { + uint32_t size; + uint8_t data[0]; +}; + +static size_t +anv_shader_bin_size(uint32_t prog_data_size, uint32_t key_size, + uint32_t surface_count, uint32_t sampler_count) +{ + const uint32_t binding_data_size = + (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding); + + return align_u32(sizeof(struct anv_shader_bin), 8) + + align_u32(prog_data_size, 8) + + align_u32(sizeof(uint32_t) + key_size, 8) + + align_u32(binding_data_size, 8); +} + +static inline const struct shader_bin_key * +anv_shader_bin_get_key(const struct anv_shader_bin *shader) +{ + const void *data = shader; + data += align_u32(sizeof(struct anv_shader_bin), 8); + data += align_u32(shader->prog_data_size, 8); + return data; +} + +struct anv_shader_bin * +anv_shader_bin_create(struct anv_device *device, + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) +{ + const size_t size = + anv_shader_bin_size(prog_data_size, key_size, + bind_map->surface_count, bind_map->sampler_count); + + struct anv_shader_bin *shader = + anv_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!shader) + return NULL; + + shader->ref_cnt = 1; + + shader->kernel = + anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64); + memcpy(shader->kernel.map, kernel_data, kernel_size); + shader->kernel_size = kernel_size; + shader->bind_map = *bind_map; + shader->prog_data_size = prog_data_size; + + /* Now we fill out the floating data at the end */ + void *data = shader; + data += align_u32(sizeof(struct anv_shader_bin), 8); + + memcpy(data, prog_data, prog_data_size); + data += align_u32(prog_data_size, 8); + + struct shader_bin_key *key = data; + key->size = key_size; + memcpy(key->data, key_data, key_size); + data += align_u32(sizeof(*key) + key_size, 8); + + shader->bind_map.surface_to_descriptor = data; + memcpy(data, bind_map->surface_to_descriptor, + bind_map->surface_count * sizeof(struct anv_pipeline_binding)); + data += bind_map->surface_count * sizeof(struct anv_pipeline_binding); + + shader->bind_map.sampler_to_descriptor = data; + memcpy(data, bind_map->sampler_to_descriptor, + bind_map->sampler_count * sizeof(struct anv_pipeline_binding)); + + return shader; +} + +void +anv_shader_bin_destroy(struct anv_device *device, + struct anv_shader_bin *shader) +{ + assert(shader->ref_cnt == 0); + anv_state_pool_free(&device->instruction_state_pool, shader->kernel); + anv_free(&device->alloc, shader); +} + +static size_t +anv_shader_bin_data_size(const struct anv_shader_bin *shader) +{ + return anv_shader_bin_size(shader->prog_data_size, + anv_shader_bin_get_key(shader)->size, + shader->bind_map.surface_count, + shader->bind_map.sampler_count) + + align_u32(shader->kernel_size, 8); +} + +static void +anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data) +{ + size_t struct_size = + anv_shader_bin_size(shader->prog_data_size, + anv_shader_bin_get_key(shader)->size, + shader->bind_map.surface_count, + shader->bind_map.sampler_count); + + memcpy(data, shader, struct_size); + data += struct_size; + + memcpy(data, shader->kernel.map, shader->kernel_size); +} + /* Remaining work: * * - Compact binding table layout so it's tight and not dependent on @@ -37,69 +148,62 @@ * dual_src_blend. */ +static uint32_t +shader_bin_key_hash_func(const void *void_key) +{ + const struct shader_bin_key *key = void_key; + return _mesa_hash_data(key->data, key->size); +} + +static bool +shader_bin_key_compare_func(const void *void_a, const void *void_b) +{ + const struct shader_bin_key *a = void_a, *b = void_b; + if (a->size != b->size) + return false; + + return memcmp(a->data, b->data, a->size) == 0; +} + void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device) + struct anv_device *device, + bool cache_enabled) { cache->device = device; - anv_state_stream_init(&cache->program_stream, - &device->instruction_block_pool); pthread_mutex_init(&cache->mutex, NULL); - cache->kernel_count = 0; - cache->total_size = 0; - cache->table_size = 1024; - const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); - cache->hash_table = malloc(byte_size); - - /* We don't consider allocation failure fatal, we just start with a 0-sized - * cache. */ - if (cache->hash_table == NULL || - !env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true)) - cache->table_size = 0; - else - memset(cache->hash_table, 0xff, byte_size); + if (cache_enabled) { + cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func, + shader_bin_key_compare_func); + } else { + cache->cache = NULL; + } } void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) { - anv_state_stream_finish(&cache->program_stream); pthread_mutex_destroy(&cache->mutex); - free(cache->hash_table); -} - -struct cache_entry { - unsigned char sha1[20]; - uint32_t prog_data_size; - uint32_t kernel_size; - uint32_t surface_count; - uint32_t sampler_count; - uint32_t image_count; - - char prog_data[0]; - - /* kernel follows prog_data at next 64 byte aligned address */ -}; - -static uint32_t -entry_size(struct cache_entry *entry) -{ - /* This returns the number of bytes needed to serialize an entry, which - * doesn't include the alignment padding bytes. - */ - const uint32_t map_size = - entry->surface_count * sizeof(struct anv_pipeline_binding) + - entry->sampler_count * sizeof(struct anv_pipeline_binding); + if (cache->cache) { + /* This is a bit unfortunate. In order to keep things from randomly + * going away, the shader cache has to hold a reference to all shader + * binaries it contains. We unref them when we destroy the cache. + */ + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) + anv_shader_bin_unref(cache->device, entry->data); - return sizeof(*entry) + entry->prog_data_size + map_size; + _mesa_hash_table_destroy(cache->cache, NULL); + } } void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, struct anv_shader_module *module, const char *entrypoint, + const struct anv_pipeline_layout *pipeline_layout, const VkSpecializationInfo *spec_info) { struct mesa_sha1 *ctx; @@ -108,6 +212,10 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, _mesa_sha1_update(ctx, key, key_size); _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); + if (pipeline_layout) { + _mesa_sha1_update(ctx, pipeline_layout->sha1, + sizeof(pipeline_layout->sha1)); + } /* hash in shader stage, pipeline layout? */ if (spec_info) { _mesa_sha1_update(ctx, spec_info->pMapEntries, @@ -117,210 +225,94 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, _mesa_sha1_final(ctx, hash); } -static uint32_t -anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map) +static struct anv_shader_bin * +anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size) { - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) sha1); - - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - const uint32_t offset = cache->hash_table[index]; - - if (offset == ~0) - return NO_KERNEL; - - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { - if (prog_data) { - assert(map); - void *p = entry->prog_data; - *prog_data = p; - p += entry->prog_data_size; - map->surface_count = entry->surface_count; - map->sampler_count = entry->sampler_count; - map->image_count = entry->image_count; - map->surface_to_descriptor = p; - p += map->surface_count * sizeof(struct anv_pipeline_binding); - map->sampler_to_descriptor = p; - } - - return offset + align_u32(entry_size(entry), 64); - } - } - - /* This can happen if the pipeline cache is disabled via - * ANV_ENABLE_PIPELINE_CACHE=false - */ - return NO_KERNEL; + uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))]; + struct shader_bin_key *key = (void *)vla; + key->size = key_size; + memcpy(key->data, key_data, key_size); + + struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key); + if (entry) + return entry->data; + else + return NULL; } -uint32_t +struct anv_shader_bin * anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map) + const void *key_data, uint32_t key_size) { - uint32_t kernel; + if (!cache->cache) + return NULL; pthread_mutex_lock(&cache->mutex); - kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); + struct anv_shader_bin *shader = + anv_pipeline_cache_search_locked(cache, key_data, key_size); pthread_mutex_unlock(&cache->mutex); - return kernel; -} - -static void -anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache, - struct cache_entry *entry, uint32_t entry_offset) -{ - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) entry->sha1); - - /* We'll always be able to insert when we get here. */ - assert(cache->kernel_count < cache->table_size / 2); + /* We increment refcount before handing it to the caller */ + if (shader) + anv_shader_bin_ref(shader); - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - if (cache->hash_table[index] == ~0) { - cache->hash_table[index] = entry_offset; - break; - } - } - - cache->total_size += entry_size(entry) + entry->kernel_size; - cache->kernel_count++; + return shader; } -static VkResult -anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) +static struct anv_shader_bin * +anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) { - const uint32_t table_size = cache->table_size * 2; - const uint32_t old_table_size = cache->table_size; - const size_t byte_size = table_size * sizeof(cache->hash_table[0]); - uint32_t *table; - uint32_t *old_table = cache->hash_table; - - table = malloc(byte_size); - if (table == NULL) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - cache->hash_table = table; - cache->table_size = table_size; - cache->kernel_count = 0; - cache->total_size = 0; - - memset(cache->hash_table, 0xff, byte_size); - for (uint32_t i = 0; i < old_table_size; i++) { - const uint32_t offset = old_table[i]; - if (offset == ~0) - continue; + struct anv_shader_bin *shader = + anv_pipeline_cache_search_locked(cache, key_data, key_size); + if (shader) + return shader; - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - anv_pipeline_cache_set_entry(cache, entry, offset); - } + struct anv_shader_bin *bin = + anv_shader_bin_create(cache->device, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); + if (!bin) + return NULL; - free(old_table); + _mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin); - return VK_SUCCESS; + return bin; } -static void -anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, - struct cache_entry *entry, uint32_t entry_offset) -{ - if (cache->kernel_count == cache->table_size / 2) - anv_pipeline_cache_grow(cache); - - /* Failing to grow that hash table isn't fatal, but may mean we don't - * have enough space to add this new kernel. Only add it if there's room. - */ - if (cache->kernel_count < cache->table_size / 2) - anv_pipeline_cache_set_entry(cache, entry, entry_offset); -} - -uint32_t +struct anv_shader_bin * anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const void *kernel, size_t kernel_size, - const struct brw_stage_prog_data **prog_data, - size_t prog_data_size, - struct anv_pipeline_bind_map *map) + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map) { - pthread_mutex_lock(&cache->mutex); - - /* Before uploading, check again that another thread didn't upload this - * shader while we were compiling it. - */ - if (sha1) { - uint32_t cached_kernel = - anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map); - if (cached_kernel != NO_KERNEL) { - pthread_mutex_unlock(&cache->mutex); - return cached_kernel; - } - } + if (cache->cache) { + pthread_mutex_lock(&cache->mutex); - struct cache_entry *entry; + struct anv_shader_bin *bin = + anv_pipeline_cache_add_shader(cache, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); - const uint32_t map_size = - map->surface_count * sizeof(struct anv_pipeline_binding) + - map->sampler_count * sizeof(struct anv_pipeline_binding); + pthread_mutex_unlock(&cache->mutex); - const uint32_t preamble_size = - align_u32(sizeof(*entry) + prog_data_size + map_size, 64); + /* We increment refcount before handing it to the caller */ + anv_shader_bin_ref(bin); - const uint32_t size = preamble_size + kernel_size; - - assert(size < cache->program_stream.block_pool->block_size); - const struct anv_state state = - anv_state_stream_alloc(&cache->program_stream, size, 64); - - entry = state.map; - entry->prog_data_size = prog_data_size; - entry->surface_count = map->surface_count; - entry->sampler_count = map->sampler_count; - entry->image_count = map->image_count; - entry->kernel_size = kernel_size; - - void *p = entry->prog_data; - memcpy(p, *prog_data, prog_data_size); - p += prog_data_size; - - memcpy(p, map->surface_to_descriptor, - map->surface_count * sizeof(struct anv_pipeline_binding)); - map->surface_to_descriptor = p; - p += map->surface_count * sizeof(struct anv_pipeline_binding); - - memcpy(p, map->sampler_to_descriptor, - map->sampler_count * sizeof(struct anv_pipeline_binding)); - map->sampler_to_descriptor = p; - - if (sha1) { - assert(anv_pipeline_cache_search_unlocked(cache, sha1, - NULL, NULL) == NO_KERNEL); - - memcpy(entry->sha1, sha1, sizeof(entry->sha1)); - anv_pipeline_cache_add_entry(cache, entry, state.offset); + return bin; + } else { + /* In this case, we're not caching it so the caller owns it entirely */ + return anv_shader_bin_create(cache->device, key_data, key_size, + kernel_data, kernel_size, + prog_data, prog_data_size, bind_map); } - - pthread_mutex_unlock(&cache->mutex); - - memcpy(state.map + preamble_size, kernel, kernel_size); - - if (!cache->device->info.has_llc) - anv_state_clflush(state); - - *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; - - return state.offset + preamble_size; } struct cache_header { @@ -339,6 +331,9 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, struct cache_header header; uint8_t uuid[VK_UUID_SIZE]; + if (cache->cache == NULL) + return; + if (size < sizeof(header)) return; memcpy(&header, data, sizeof(header)); @@ -354,38 +349,62 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) return; - void *end = (void *) data + size; - void *p = (void *) data + header.header_size; - - while (p < end) { - struct cache_entry *entry = p; - - void *data = entry->prog_data; - const struct brw_stage_prog_data *prog_data = data; - data += entry->prog_data_size; - - struct anv_pipeline_binding *surface_to_descriptor = data; - data += entry->surface_count * sizeof(struct anv_pipeline_binding); - struct anv_pipeline_binding *sampler_to_descriptor = data; - data += entry->sampler_count * sizeof(struct anv_pipeline_binding); - void *kernel = data; - - struct anv_pipeline_bind_map map = { - .surface_count = entry->surface_count, - .sampler_count = entry->sampler_count, - .image_count = entry->image_count, - .surface_to_descriptor = surface_to_descriptor, - .sampler_to_descriptor = sampler_to_descriptor - }; - - anv_pipeline_cache_upload_kernel(cache, entry->sha1, - kernel, entry->kernel_size, - &prog_data, - entry->prog_data_size, &map); - p = kernel + entry->kernel_size; + const void *end = data + size; + const void *p = data + header.header_size; + + /* Count is the total number of valid entries */ + uint32_t count; + if (p + sizeof(count) >= end) + return; + memcpy(&count, p, sizeof(count)); + p += align_u32(sizeof(count), 8); + + for (uint32_t i = 0; i < count; i++) { + struct anv_shader_bin bin; + if (p + sizeof(bin) > end) + break; + memcpy(&bin, p, sizeof(bin)); + p += align_u32(sizeof(struct anv_shader_bin), 8); + + const void *prog_data = p; + p += align_u32(bin.prog_data_size, 8); + + struct shader_bin_key key; + if (p + sizeof(key) > end) + break; + memcpy(&key, p, sizeof(key)); + const void *key_data = p + sizeof(key); + p += align_u32(sizeof(key) + key.size, 8); + + /* We're going to memcpy this so getting rid of const is fine */ + struct anv_pipeline_binding *bindings = (void *)p; + p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) * + sizeof(struct anv_pipeline_binding), 8); + bin.bind_map.surface_to_descriptor = bindings; + bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count; + + const void *kernel_data = p; + p += align_u32(bin.kernel_size, 8); + + if (p > end) + break; + + anv_pipeline_cache_add_shader(cache, key_data, key.size, + kernel_data, bin.kernel_size, + prog_data, bin.prog_data_size, + &bin.bind_map); } } +static bool +pipeline_cache_enabled() +{ + static int enabled = -1; + if (enabled < 0) + enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true); + return enabled; +} + VkResult anv_CreatePipelineCache( VkDevice _device, const VkPipelineCacheCreateInfo* pCreateInfo, @@ -404,7 +423,7 @@ VkResult anv_CreatePipelineCache( if (cache == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - anv_pipeline_cache_init(cache, device); + anv_pipeline_cache_init(cache, device, pipeline_cache_enabled()); if (pCreateInfo->initialDataSize > 0) anv_pipeline_cache_load(cache, @@ -439,9 +458,16 @@ VkResult anv_GetPipelineCacheData( ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); struct cache_header *header; - const size_t size = sizeof(*header) + cache->total_size; - if (pData == NULL) { + size_t size = align_u32(sizeof(*header), 8) + + align_u32(sizeof(uint32_t), 8); + + if (cache->cache) { + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) + size += anv_shader_bin_data_size(entry->data); + } + *pDataSize = size; return VK_SUCCESS; } @@ -458,25 +484,25 @@ VkResult anv_GetPipelineCacheData( header->vendor_id = 0x8086; header->device_id = device->chipset_id; anv_device_get_cache_uuid(header->uuid); - p += header->header_size; + p += align_u32(header->header_size, 8); - struct cache_entry *entry; - for (uint32_t i = 0; i < cache->table_size; i++) { - if (cache->hash_table[i] == ~0) - continue; + uint32_t *count = p; + p += align_u32(sizeof(*count), 8); + *count = 0; - entry = cache->program_stream.block_pool->map + cache->hash_table[i]; - const uint32_t size = entry_size(entry); - if (end < p + size + entry->kernel_size) - break; - - memcpy(p, entry, size); - p += size; + if (cache->cache) { + struct hash_entry *entry; + hash_table_foreach(cache->cache, entry) { + struct anv_shader_bin *shader = entry->data; + size_t data_size = anv_shader_bin_data_size(entry->data); + if (p + data_size > end) + break; - void *kernel = (void *) entry + align_u32(size, 64); + anv_shader_bin_write_data(shader, p); + p += data_size; - memcpy(p, kernel, entry->kernel_size); - p += entry->kernel_size; + (*count)++; + } } *pDataSize = p - pData; @@ -484,25 +510,6 @@ VkResult anv_GetPipelineCacheData( return VK_SUCCESS; } -static void -anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, - struct anv_pipeline_cache *src) -{ - for (uint32_t i = 0; i < src->table_size; i++) { - const uint32_t offset = src->hash_table[i]; - if (offset == ~0) - continue; - - struct cache_entry *entry = - src->program_stream.block_pool->map + offset; - - if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL) - continue; - - anv_pipeline_cache_add_entry(dst, entry, offset); - } -} - VkResult anv_MergePipelineCaches( VkDevice _device, VkPipelineCache destCache, @@ -511,10 +518,23 @@ VkResult anv_MergePipelineCaches( { ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); + if (!dst->cache) + return VK_SUCCESS; + for (uint32_t i = 0; i < srcCacheCount; i++) { ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); + if (!src->cache) + continue; + + struct hash_entry *entry; + hash_table_foreach(src->cache, entry) { + struct anv_shader_bin *bin = entry->data; + if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin))) + continue; - anv_pipeline_cache_merge(dst, src); + anv_shader_bin_ref(bin); + _mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin); + } } return VK_SUCCESS; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 50b860c..8b57e1b 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -83,6 +83,12 @@ extern "C" { #define MAX(a, b) ((a) > (b) ? (a) : (b)) static inline uint32_t +align_down_npot_u32(uint32_t v, uint32_t a) +{ + return v - (v % a); +} + +static inline uint32_t align_u32(uint32_t v, uint32_t a) { assert(a != 0 && a == (a & -a)); @@ -394,9 +400,9 @@ struct anv_fixed_size_state_pool { }; #define ANV_MIN_STATE_SIZE_LOG2 6 -#define ANV_MAX_STATE_SIZE_LOG2 10 +#define ANV_MAX_STATE_SIZE_LOG2 17 -#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) +#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1) struct anv_state_pool { struct anv_block_pool *block_pool; @@ -652,31 +658,27 @@ struct anv_queue { struct anv_pipeline_cache { struct anv_device * device; - struct anv_state_stream program_stream; pthread_mutex_t mutex; - uint32_t total_size; - uint32_t table_size; - uint32_t kernel_count; - uint32_t * hash_table; + struct hash_table * cache; }; struct anv_pipeline_bind_map; void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device); + struct anv_device *device, + bool cache_enabled); void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); -uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const struct brw_stage_prog_data **prog_data, - struct anv_pipeline_bind_map *map); -uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const void *kernel, - size_t kernel_size, - const struct brw_stage_prog_data **prog_data, - size_t prog_data_size, - struct anv_pipeline_bind_map *map); + +struct anv_shader_bin * +anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const void *key, uint32_t key_size); +struct anv_shader_bin * +anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const void *key_data, uint32_t key_size, + const void *kernel_data, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map); struct anv_device { VK_LOADER_DATA _loader_data; @@ -698,7 +700,7 @@ struct anv_device { struct anv_state_pool dynamic_state_pool; struct anv_block_pool instruction_block_pool; - struct anv_pipeline_cache default_pipeline_cache; + struct anv_state_pool instruction_state_pool; struct anv_block_pool surface_state_block_pool; struct anv_state_pool surface_state_pool; @@ -1057,6 +1059,8 @@ struct anv_pipeline_layout { struct { bool has_dynamic_offsets; } stage[MESA_SHADER_STAGES]; + + unsigned char sha1[20]; }; struct anv_buffer { @@ -1422,6 +1426,7 @@ struct anv_shader_module { void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, struct anv_shader_module *module, const char *entrypoint, + const struct anv_pipeline_layout *pipeline_layout, const VkSpecializationInfo *spec_info); static inline gl_shader_stage @@ -1449,13 +1454,57 @@ struct anv_pipeline_bind_map { uint32_t surface_count; uint32_t sampler_count; uint32_t image_count; - uint32_t attachment_count; struct anv_pipeline_binding * surface_to_descriptor; struct anv_pipeline_binding * sampler_to_descriptor; - uint32_t * surface_to_attachment; }; +struct anv_shader_bin { + uint32_t ref_cnt; + + struct anv_state kernel; + uint32_t kernel_size; + + struct anv_pipeline_bind_map bind_map; + + uint32_t prog_data_size; + + /* Prog data follows, then the key, both aligned to 8-bytes */ +}; + +struct anv_shader_bin * +anv_shader_bin_create(struct anv_device *device, + const void *key, uint32_t key_size, + const void *kernel, uint32_t kernel_size, + const void *prog_data, uint32_t prog_data_size, + const struct anv_pipeline_bind_map *bind_map); + +void +anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader); + +static inline void +anv_shader_bin_ref(struct anv_shader_bin *shader) +{ + assert(shader->ref_cnt >= 1); + __sync_fetch_and_add(&shader->ref_cnt, 1); +} + +static inline void +anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader) +{ + assert(shader->ref_cnt >= 1); + if (__sync_fetch_and_add(&shader->ref_cnt, -1) == 1) + anv_shader_bin_destroy(device, shader); +} + +static inline const struct brw_stage_prog_data * +anv_shader_bin_get_prog_data(const struct anv_shader_bin *shader) +{ + const void *data = shader; + data += align_u32(sizeof(struct anv_shader_bin), 8); + return data; +} + struct anv_pipeline { struct anv_device * device; struct anv_batch batch; @@ -1465,12 +1514,12 @@ struct anv_pipeline { struct anv_dynamic_state dynamic_state; struct anv_pipeline_layout * layout; - struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES]; bool use_repclear; bool needs_data_cache; - const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; + struct anv_shader_bin * shaders[MESA_SHADER_STAGES]; + struct { uint32_t start[MESA_SHADER_GEOMETRY + 1]; uint32_t size[MESA_SHADER_GEOMETRY + 1]; @@ -1513,29 +1562,29 @@ struct anv_pipeline { } gen9; }; -static inline const struct brw_vs_prog_data * -get_vs_prog_data(struct anv_pipeline *pipeline) -{ - return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX]; -} - -static inline const struct brw_gs_prog_data * -get_gs_prog_data(struct anv_pipeline *pipeline) +static inline bool +anv_pipeline_has_stage(const struct anv_pipeline *pipeline, + gl_shader_stage stage) { - return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY]; + return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0; } -static inline const struct brw_wm_prog_data * -get_wm_prog_data(struct anv_pipeline *pipeline) -{ - return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT]; +#define ANV_DECL_GET_PROG_DATA_FUNC(prefix, stage) \ +static inline const struct brw_##prefix##_prog_data * \ +get_##prefix##_prog_data(struct anv_pipeline *pipeline) \ +{ \ + if (anv_pipeline_has_stage(pipeline, stage)) { \ + return (const struct brw_##prefix##_prog_data *) \ + anv_shader_bin_get_prog_data(pipeline->shaders[stage]); \ + } else { \ + return NULL; \ + } \ } -static inline const struct brw_cs_prog_data * -get_cs_prog_data(struct anv_pipeline *pipeline) -{ - return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE]; -} +ANV_DECL_GET_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX) +ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY) +ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT) +ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE) struct anv_graphics_pipeline_create_info { /** diff --git a/src/intel/vulkan/anv_wsi_wayland.c b/src/intel/vulkan/anv_wsi_wayland.c index a9e1617..18dae0a 100644 --- a/src/intel/vulkan/anv_wsi_wayland.c +++ b/src/intel/vulkan/anv_wsi_wayland.c @@ -519,6 +519,7 @@ wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain, if (!chain->images[i].busy) { /* We found a non-busy image */ *image_index = i; + chain->images[i].busy = true; return VK_SUCCESS; } } diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c index 2895d6b..81c524b 100644 --- a/src/intel/vulkan/anv_wsi_x11.c +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -516,6 +516,7 @@ x11_acquire_next_image(struct anv_swapchain *anv_chain, /* We found a non-busy image */ xshmfence_await(chain->images[i].shm_fence); *image_index = i; + chain->images[i].busy = true; return VK_SUCCESS; } } @@ -553,6 +554,7 @@ x11_queue_present(struct anv_swapchain *anv_chain, xshmfence_reset(image->shm_fence); + ++chain->send_sbc; xcb_void_cookie_t cookie = xcb_present_pixmap(chain->conn, chain->window, @@ -786,6 +788,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, chain->window = surface->window; chain->extent = pCreateInfo->imageExtent; chain->image_count = num_images; + chain->send_sbc = 0; chain->event_id = xcb_generate_id(chain->conn); xcb_present_select_input(chain->conn, chain->event_id, chain->window, diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 89cb51f..d1b18e0 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -75,76 +75,6 @@ gen7_emit_rs_state(struct anv_pipeline *pipeline, GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf); } -static void -gen7_emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *info, - const VkPipelineMultisampleStateCreateInfo *ms_info) -{ - struct anv_device *device = pipeline->device; - - if (info == NULL || info->attachmentCount == 0) { - pipeline->blend_state = - anv_state_pool_emit(&device->dynamic_state_pool, - GENX(BLEND_STATE), 64, - .ColorBufferBlendEnable = false, - .WriteDisableAlpha = true, - .WriteDisableRed = true, - .WriteDisableGreen = true, - .WriteDisableBlue = true); - } else { - const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; - struct GENX(BLEND_STATE) blend = { - .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, - .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, - - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - .ColorBufferBlendEnable = a->blendEnable, - .ColorClampRange = COLORCLAMP_RTFORMAT, - .PreBlendColorClampEnable = true, - .PostBlendColorClampEnable = true, - .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], - .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], - .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], - .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], - .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), - .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), - .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), - .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), - }; - - /* Our hardware applies the blend factor prior to the blend function - * regardless of what function is used. Technically, this means the - * hardware can do MORE than GL or Vulkan specify. However, it also - * means that, for MIN and MAX, we have to stomp the blend factor to - * ONE to make it a no-op. - */ - if (a->colorBlendOp == VK_BLEND_OP_MIN || - a->colorBlendOp == VK_BLEND_OP_MAX) { - blend.SourceBlendFactor = BLENDFACTOR_ONE; - blend.DestinationBlendFactor = BLENDFACTOR_ONE; - } - if (a->alphaBlendOp == VK_BLEND_OP_MIN || - a->alphaBlendOp == VK_BLEND_OP_MAX) { - blend.SourceAlphaBlendFactor = BLENDFACTOR_ONE; - blend.DestinationAlphaBlendFactor = BLENDFACTOR_ONE; - } - - pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool, - GENX(BLEND_STATE_length) * 4, - 64); - GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend); - if (pipeline->device->info.has_llc) - anv_state_clflush(pipeline->blend_state); - } - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { - bsp.BlendStatePointer = pipeline->blend_state.offset; - } -} - VkResult genX(graphics_pipeline_create)( VkDevice _device, @@ -182,31 +112,13 @@ genX(graphics_pipeline_create)( emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass); - gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState, - pCreateInfo->pMultisampleState); + emit_cb_state(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); emit_urb_setup(pipeline); - const VkPipelineRasterizationStateCreateInfo *rs_info = - pCreateInfo->pRasterizationState; - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) { - clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace], - clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode], - clip.ClipEnable = !(extra && extra->use_rectlist), - clip.APIMode = APIMODE_OGL, - clip.ViewportXYClipTestEnable = true, - clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable, - clip.ClipMode = CLIPMODE_NORMAL, - - clip.TriangleStripListProvokingVertexSelect = 0, - clip.LineStripListProvokingVertexSelect = 0, - clip.TriangleFanProvokingVertexSelect = 1, - - clip.MinimumPointWidth = 0.125, - clip.MaximumPointWidth = 255.875, - clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1; - } + emit_3dstate_clip(pipeline, pCreateInfo->pViewportState, + pCreateInfo->pRasterizationState, extra); if (pCreateInfo->pMultisampleState && pCreateInfo->pMultisampleState->rasterizationSamples > 1) @@ -385,6 +297,7 @@ genX(graphics_pipeline_create)( wm.LineEndCapAntialiasingRegionWidth = 0; /* 0.5 pixels */ wm.LineAntialiasingRegionWidth = 1; /* 1.0 pixels */ wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; + wm.PixelShaderKillPixel = wm_prog_data->uses_kill; wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 6d70df6..cc10d3a 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -100,123 +100,6 @@ emit_rs_state(struct anv_pipeline *pipeline, } static void -emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *info, - const VkPipelineMultisampleStateCreateInfo *ms_info) -{ - struct anv_device *device = pipeline->device; - - uint32_t num_dwords = GENX(BLEND_STATE_length); - pipeline->blend_state = - anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); - - struct GENX(BLEND_STATE) blend_state = { - .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, - .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, - }; - - /* Default everything to disabled */ - for (uint32_t i = 0; i < 8; i++) { - blend_state.Entry[i].WriteDisableAlpha = true; - blend_state.Entry[i].WriteDisableRed = true; - blend_state.Entry[i].WriteDisableGreen = true; - blend_state.Entry[i].WriteDisableBlue = true; - } - - struct anv_pipeline_bind_map *map = - &pipeline->bindings[MESA_SHADER_FRAGMENT]; - - bool has_writeable_rt = false; - for (unsigned i = 0; i < map->surface_count; i++) { - struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; - - /* All color attachments are at the beginning of the binding table */ - if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) - break; - - /* We can have at most 8 attachments */ - assert(i < 8); - - if (binding->index >= info->attachmentCount) - continue; - - assert(binding->binding == 0); - const VkPipelineColorBlendAttachmentState *a = - &info->pAttachments[binding->index]; - - if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || - a->dstColorBlendFactor != a->dstAlphaBlendFactor || - a->colorBlendOp != a->alphaBlendOp) { - blend_state.IndependentAlphaBlendEnable = true; - } - - blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) { - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - .ColorBufferBlendEnable = a->blendEnable, - .PreBlendSourceOnlyClampEnable = false, - .ColorClampRange = COLORCLAMP_RTFORMAT, - .PreBlendColorClampEnable = true, - .PostBlendColorClampEnable = true, - .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], - .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], - .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], - .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], - .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), - .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), - .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), - .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), - }; - - if (a->colorWriteMask != 0) - has_writeable_rt = true; - - /* Our hardware applies the blend factor prior to the blend function - * regardless of what function is used. Technically, this means the - * hardware can do MORE than GL or Vulkan specify. However, it also - * means that, for MIN and MAX, we have to stomp the blend factor to - * ONE to make it a no-op. - */ - if (a->colorBlendOp == VK_BLEND_OP_MIN || - a->colorBlendOp == VK_BLEND_OP_MAX) { - blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE; - blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE; - } - if (a->alphaBlendOp == VK_BLEND_OP_MIN || - a->alphaBlendOp == VK_BLEND_OP_MAX) { - blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE; - blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE; - } - } - - struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0]; - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) { - blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable; - blend.HasWriteableRT = has_writeable_rt; - blend.ColorBufferBlendEnable = bs0->ColorBufferBlendEnable; - blend.SourceAlphaBlendFactor = bs0->SourceAlphaBlendFactor; - blend.DestinationAlphaBlendFactor = bs0->DestinationAlphaBlendFactor; - blend.SourceBlendFactor = bs0->SourceBlendFactor; - blend.DestinationBlendFactor = bs0->DestinationBlendFactor; - blend.AlphaTestEnable = false; - blend.IndependentAlphaBlendEnable = - blend_state.IndependentAlphaBlendEnable; - } - - GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); - if (!device->info.has_llc) - anv_state_clflush(pipeline->blend_state); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { - bsp.BlendStatePointer = pipeline->blend_state.offset; - bsp.BlendStatePointerValid = true; - } -} - -static void emit_ms_state(struct anv_pipeline *pipeline, const VkPipelineMultisampleStateCreateInfo *info) { @@ -303,29 +186,10 @@ genX(graphics_pipeline_create)( emit_urb_setup(pipeline); - const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) { - clip.ClipEnable = !(extra && extra->use_rectlist); - clip.EarlyCullEnable = true; - clip.APIMode = 1; /* D3D */ - clip.ViewportXYClipTestEnable = true; - - clip.ClipMode = - pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? - REJECT_ALL : NORMAL; - - clip.NonPerspectiveBarycentricEnable = wm_prog_data ? - (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0; - - clip.TriangleStripListProvokingVertexSelect = 0; - clip.LineStripListProvokingVertexSelect = 0; - clip.TriangleFanProvokingVertexSelect = 1; - - clip.MinimumPointWidth = 0.125; - clip.MaximumPointWidth = 255.875; - clip.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1; - } + emit_3dstate_clip(pipeline, pCreateInfo->pViewportState, + pCreateInfo->pRasterizationState, extra); + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) { wm.StatisticsEnable = true; wm.LineEndCapAntialiasingRegionWidth = _05pixels; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 741d5bf..f92d856 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1448,4 +1448,17 @@ void genX(CmdCopyQueryPoolResults)( } } +#else +void genX(CmdCopyQueryPoolResults)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + anv_finishme("Queries not yet supported on Ivy Bridge"); +} #endif diff --git a/src/intel/vulkan/genX_l3.c b/src/intel/vulkan/genX_l3.c index 0d36e3c..a74071c 100644 --- a/src/intel/vulkan/genX_l3.c +++ b/src/intel/vulkan/genX_l3.c @@ -315,10 +315,14 @@ get_pipeline_state_l3_weights(const struct anv_pipeline *pipeline) bool needs_dc = false, needs_slm = false; for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - const struct brw_stage_prog_data *prog_data = pipeline->prog_data[i]; + if (!anv_pipeline_has_stage(pipeline, i)) + continue; + + const struct brw_stage_prog_data *prog_data = + anv_shader_bin_get_prog_data(pipeline->shaders[i]); needs_dc |= pipeline->needs_data_cache; - needs_slm |= prog_data && prog_data->total_shared; + needs_slm |= prog_data->total_shared; } return get_default_l3_weights(&pipeline->device->info, diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 5cbcfd2..7d8129d 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -63,8 +63,7 @@ genX(compute_pipeline_create)( /* When we free the pipeline, we detect stages based on the NULL status * of various prog_data pointers. Make them NULL by default. */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); pipeline->vs_simd8 = NO_KERNEL; pipeline->vs_vec4 = NO_KERNEL; @@ -76,9 +75,13 @@ genX(compute_pipeline_create)( assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); - anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, - pCreateInfo->stage.pName, - pCreateInfo->stage.pSpecializationInfo); + result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, + pCreateInfo->stage.pName, + pCreateInfo->stage.pSpecializationInfo); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } pipeline->use_repclear = false; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 669b456..94692e4 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -291,6 +291,11 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) if (input_index < 0) continue; + if (attr == VARYING_SLOT_PNTC) { + sbe.PointSpriteTextureCoordinateEnable = 1 << input_index; + continue; + } + const int slot = fs_input_map->varying_to_slot[attr]; if (input_index >= 16) @@ -512,3 +517,177 @@ emit_ds_state(struct anv_pipeline *pipeline, GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, depth_stencil_dw, &depth_stencil); #endif } + +static void +emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + struct anv_device *device = pipeline->device; + + const uint32_t num_dwords = GENX(BLEND_STATE_length); + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GENX(BLEND_STATE) blend_state = { +#if GEN_GEN >= 8 + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, +#else + /* Make sure it gets zeroed */ + .Entry = { { 0, }, }, +#endif + }; + + /* Default everything to disabled */ + for (uint32_t i = 0; i < 8; i++) { + blend_state.Entry[i].WriteDisableAlpha = true; + blend_state.Entry[i].WriteDisableRed = true; + blend_state.Entry[i].WriteDisableGreen = true; + blend_state.Entry[i].WriteDisableBlue = true; + } + + uint32_t surface_count = 0; + struct anv_pipeline_bind_map *map; + if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { + map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; + surface_count = map->surface_count; + } + + bool has_writeable_rt = false; + for (unsigned i = 0; i < surface_count; i++) { + struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; + + /* All color attachments are at the beginning of the binding table */ + if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) + break; + + /* We can have at most 8 attachments */ + assert(i < 8); + + if (binding->index >= info->attachmentCount) + continue; + + assert(binding->binding == 0); + const VkPipelineColorBlendAttachmentState *a = + &info->pAttachments[binding->index]; + + blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) { +#if GEN_GEN < 8 + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, +#endif + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .ColorClampRange = COLORCLAMP_RTFORMAT, + .PreBlendColorClampEnable = true, + .PostBlendColorClampEnable = true, + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + }; + + if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || + a->dstColorBlendFactor != a->dstAlphaBlendFactor || + a->colorBlendOp != a->alphaBlendOp) { +#if GEN_GEN >= 8 + blend_state.IndependentAlphaBlendEnable = true; +#else + blend_state.Entry[i].IndependentAlphaBlendEnable = true; +#endif + } + + if (a->colorWriteMask != 0) + has_writeable_rt = true; + + /* Our hardware applies the blend factor prior to the blend function + * regardless of what function is used. Technically, this means the + * hardware can do MORE than GL or Vulkan specify. However, it also + * means that, for MIN and MAX, we have to stomp the blend factor to + * ONE to make it a no-op. + */ + if (a->colorBlendOp == VK_BLEND_OP_MIN || + a->colorBlendOp == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE; + } + if (a->alphaBlendOp == VK_BLEND_OP_MIN || + a->alphaBlendOp == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE; + } + } + +#if GEN_GEN >= 8 + struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0]; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) { + blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable; + blend.HasWriteableRT = has_writeable_rt; + blend.ColorBufferBlendEnable = bs0->ColorBufferBlendEnable; + blend.SourceAlphaBlendFactor = bs0->SourceAlphaBlendFactor; + blend.DestinationAlphaBlendFactor = bs0->DestinationAlphaBlendFactor; + blend.SourceBlendFactor = bs0->SourceBlendFactor; + blend.DestinationBlendFactor = bs0->DestinationBlendFactor; + blend.AlphaTestEnable = false; + blend.IndependentAlphaBlendEnable = + blend_state.IndependentAlphaBlendEnable; + } +#else + (void)has_writeable_rt; +#endif + + GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); + if (!device->info.has_llc) + anv_state_clflush(pipeline->blend_state); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { + bsp.BlendStatePointer = pipeline->blend_state.offset; +#if GEN_GEN >= 8 + bsp.BlendStatePointerValid = true; +#endif + } +} + +static void +emit_3dstate_clip(struct anv_pipeline *pipeline, + const VkPipelineViewportStateCreateInfo *vp_info, + const VkPipelineRasterizationStateCreateInfo *rs_info, + const struct anv_graphics_pipeline_create_info *extra) +{ + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); + (void) wm_prog_data; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) { + clip.ClipEnable = !(extra && extra->use_rectlist); + clip.EarlyCullEnable = true; + clip.APIMode = APIMODE_D3D, + clip.ViewportXYClipTestEnable = true; + + clip.ClipMode = rs_info->rasterizerDiscardEnable ? + CLIPMODE_REJECT_ALL : CLIPMODE_NORMAL; + + clip.TriangleStripListProvokingVertexSelect = 0; + clip.LineStripListProvokingVertexSelect = 0; + clip.TriangleFanProvokingVertexSelect = 1; + + clip.MinimumPointWidth = 0.125; + clip.MaximumPointWidth = 255.875; + clip.MaximumVPIndex = vp_info->viewportCount - 1; + +#if GEN_GEN == 7 + clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace]; + clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode]; + clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable; +#else + clip.NonPerspectiveBarycentricEnable = wm_prog_data ? + (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0; +#endif + } +} diff --git a/src/loader/loader.c b/src/loader/loader.c index 522fba3..56ffc5d 100644 --- a/src/loader/loader.c +++ b/src/loader/loader.c @@ -80,8 +80,11 @@ #include "xmlpool.h" #endif #endif -#ifdef HAVE_SYSFS -#include <sys/types.h> +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#endif +#ifdef MAJOR_IN_SYSMACROS +#include <sys/sysmacros.h> #endif #include "loader.h" diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c index 896f225..4c2e849 100644 --- a/src/loader/loader_dri3_helper.c +++ b/src/loader/loader_dri3_helper.c @@ -68,15 +68,10 @@ dri3_fence_await(xcb_connection_t *c, struct loader_dri3_buffer *buffer) static void dri3_update_num_back(struct loader_dri3_drawable *draw) { - draw->num_back = 1; - if (draw->flipping) { - if (!draw->is_pixmap && - !(draw->present_capabilities & XCB_PRESENT_CAPABILITY_ASYNC)) - draw->num_back++; - draw->num_back++; - } - if (draw->vtable->get_swap_interval(draw) == 0) - draw->num_back++; + if (draw->flipping) + draw->num_back = 3; + else + draw->num_back = 2; } void @@ -785,6 +780,7 @@ loader_dri3_open(xcb_connection_t *conn, } fd = xcb_dri3_open_reply_fds(conn, reply)[0]; + free(reply); fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); return fd; @@ -1115,6 +1111,7 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, unsigned int format, xcb_sync_fence_t sync_fence; struct xshmfence *shm_fence; int fence_fd; + __DRIscreen *cur_screen; if (buffer) return buffer; @@ -1145,8 +1142,17 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, unsigned int format, if (!bp_reply) goto no_image; + /* Get the currently-bound screen or revert to using the drawable's screen if + * no contexts are currently bound. The latter case is at least necessary for + * obs-studio, when using Window Capture (Xcomposite) as a Source. + */ + cur_screen = draw->vtable->get_dri_screen(draw); + if (!cur_screen) { + cur_screen = draw->dri_screen; + } + buffer->image = loader_dri3_create_image(draw->conn, bp_reply, format, - draw->dri_screen, draw->ext->image, + cur_screen, draw->ext->image, buffer); if (!buffer->image) goto no_image; diff --git a/src/loader/loader_dri3_helper.h b/src/loader/loader_dri3_helper.h index 5b8fd1d..658e190 100644 --- a/src/loader/loader_dri3_helper.h +++ b/src/loader/loader_dri3_helper.h @@ -103,6 +103,7 @@ struct loader_dri3_vtable { void (*set_drawable_size)(struct loader_dri3_drawable *, int, int); bool (*in_current_context)(struct loader_dri3_drawable *); __DRIcontext *(*get_dri_context)(struct loader_dri3_drawable *); + __DRIscreen *(*get_dri_screen)(struct loader_dri3_drawable *); void (*flush_drawable)(struct loader_dri3_drawable *, unsigned); void (*show_fps)(struct loader_dri3_drawable *, uint64_t); }; diff --git a/src/mapi/Makefile.am b/src/mapi/Makefile.am index 68a28a2..b44341d 100644 --- a/src/mapi/Makefile.am +++ b/src/mapi/Makefile.am @@ -64,6 +64,9 @@ BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h lib_LTLIBRARIES += shared-glapi/libglapi.la shared_glapi_libglapi_la_SOURCES = $(MAPI_GLAPI_FILES) shared-glapi/glapi_mapi_tmp.h +shared_glapi_libglapi_la_CFLAGS = \ + $(AM_CFLAGS) \ + $(VISIBILITY_CFLAGS) shared_glapi_libglapi_la_CPPFLAGS = \ $(AM_CPPFLAGS) \ -DMAPI_MODE_GLAPI \ diff --git a/src/mapi/entry_x86-64_tls.h b/src/mapi/entry_x86-64_tls.h index 38faccc..8f3fa91 100644 --- a/src/mapi/entry_x86-64_tls.h +++ b/src/mapi/entry_x86-64_tls.h @@ -25,6 +25,11 @@ * Chia-I Wu <olv@lunarg.com> */ +#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY +#define HIDDEN __attribute__((visibility("hidden"))) +#else +#define HIDDEN +#endif __asm__(".text\n" ".balign 32\n" @@ -54,8 +59,8 @@ entry_patch_public(void) { } -static char -x86_64_entry_start[]; +extern char +x86_64_entry_start[] HIDDEN; mapi_func entry_get_public(int slot) diff --git a/src/mapi/entry_x86_tls.h b/src/mapi/entry_x86_tls.h index 46d2ece..545b5a3 100644 --- a/src/mapi/entry_x86_tls.h +++ b/src/mapi/entry_x86_tls.h @@ -27,6 +27,12 @@ #include <string.h> +#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY +#define HIDDEN __attribute__((visibility("hidden"))) +#else +#define HIDDEN +#endif + __asm__(".text"); __asm__("x86_current_tls:\n\t" @@ -71,8 +77,8 @@ __asm__(".text"); extern unsigned long x86_current_tls(); -static char x86_entry_start[]; -static char x86_entry_end[]; +extern char x86_entry_start[] HIDDEN; +extern char x86_entry_end[] HIDDEN; void entry_patch_public(void) diff --git a/src/mapi/entry_x86_tsd.h b/src/mapi/entry_x86_tsd.h index ea7bacb..0c28c8f 100644 --- a/src/mapi/entry_x86_tsd.h +++ b/src/mapi/entry_x86_tsd.h @@ -25,6 +25,11 @@ * Chia-I Wu <olv@lunarg.com> */ +#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY +#define HIDDEN __attribute__((visibility("hidden"))) +#else +#define HIDDEN +#endif #define X86_ENTRY_SIZE 32 @@ -58,8 +63,8 @@ __asm__(".balign 32\n" #include <string.h> #include "u_execmem.h" -static const char x86_entry_start[]; -static const char x86_entry_end[]; +extern const char x86_entry_start[] HIDDEN; +extern const char x86_entry_end[] HIDDEN; void entry_patch_public(void) diff --git a/src/mesa/Android.gen.mk b/src/mesa/Android.gen.mk index e04482b..aaa2de9 100644 --- a/src/mesa/Android.gen.mk +++ b/src/mesa/Android.gen.mk @@ -70,7 +70,7 @@ define es-gen $(hide) $(PRIVATE_SCRIPT) $(1) $(PRIVATE_XML) > $@ endef -$(intermediates)/main/git_sha1.h: $(wildcard $(MESA_TOP)/.git/HEAD) +$(intermediates)/main/git_sha1.h: $(wildcard $(MESA_TOP)/.git/ORIG_HEAD) @mkdir -p $(dir $@) @echo "GIT-SHA1: $(PRIVATE_MODULE) <= git" $(hide) touch $@ diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index 037384a..9710c7f 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -33,11 +33,6 @@ if HAVE_OSMESA SUBDIRS += drivers/osmesa endif -if HAVE_GLX -gldir = $(includedir)/GL -gl_HEADERS = $(top_srcdir)/include/GL/*.h -endif - include Makefile.sources EXTRA_DIST = \ @@ -161,11 +156,6 @@ libmesa_sse41_la_SOURCES = \ libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS) -if HAVE_GLX -pkgconfigdir = $(libdir)/pkgconfig -pkgconfig_DATA = gl.pc -endif - MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D) YACC_GEN = $(AM_V_GEN)$(YACC) $(YFLAGS) LEX_GEN = $(AM_V_GEN)$(LEX) $(LFLAGS) diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 84db5a8..bd5b3d3 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -645,11 +645,11 @@ INCLUDE_DIRS = \ -I$(top_builddir)/src \ -I$(top_srcdir)/src \ -I$(top_builddir)/src/compiler/nir \ - -I$(top_srcdir)/src/mesa \ -I$(top_builddir)/src/mesa \ - -I$(top_srcdir)/src/mesa/main \ + -I$(top_srcdir)/src/mesa \ -I$(top_builddir)/src/mesa/main \ - -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa/main \ -I$(top_builddir)/src/mapi \ + -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/gallium/include \ -I$(top_srcdir)/src/gallium/auxiliary diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 434800e..8f41174 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -15,13 +15,13 @@ env.MSVC2013Compat() env.Append(CPPPATH = [ '../compiler/nir', # for generated nir_opcodes.h, etc '#/src', + Dir('../mapi'), # src/mapi build path '#/src/mapi', '#/src/glsl', + Dir('.'), # src/mesa build path '#/src/mesa', '#/src/gallium/include', '#/src/gallium/auxiliary', - Dir('../mapi'), # src/mapi build path - Dir('.'), # src/mesa build path ]) if env['platform'] == 'windows': @@ -116,7 +116,7 @@ if env['platform'] not in ('cygwin', 'darwin', 'windows', 'haiku'): ) # Add the dir containing the generated header (somewhere inside the # build dir) to the include path - env.Append(CPPPATH = [matypes[0].dir]) + env.Prepend(CPPPATH = [matypes[0].dir]) def write_git_sha1_h_file(filename): diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c index e5a3f00..5607d5b 100644 --- a/src/mesa/drivers/dri/i915/intel_context.c +++ b/src/mesa/drivers/dri/i915/intel_context.c @@ -858,6 +858,7 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable) struct __DRIimageList images; unsigned int format; uint32_t buffer_mask = 0; + int ret; front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); @@ -877,12 +878,14 @@ intel_update_image_buffers(struct intel_context *intel, __DRIdrawable *drawable) if (back_rb) buffer_mask |= __DRI_IMAGE_BUFFER_BACK; - (*screen->image.loader->getBuffers) (drawable, - driGLFormatToImageFormat(format), - &drawable->dri2.stamp, - drawable->loaderPrivate, - buffer_mask, - &images); + ret = screen->image.loader->getBuffers(drawable, + driGLFormatToImageFormat(format), + &drawable->dri2.stamp, + drawable->loaderPrivate, + buffer_mask, + &images); + if (!ret) + return; if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) { drawable->w = images.front->width; diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index f448551..194b412 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -48,6 +48,7 @@ i965_compiler_FILES = \ brw_nir_attribute_workarounds.c \ brw_nir_intrinsics.c \ brw_nir_opt_peephole_ffma.c \ + brw_nir_tcs_workarounds.c \ brw_packed_float.c \ brw_predicated_break.cpp \ brw_reg.h \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 9590968..6be82c5 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -167,7 +167,8 @@ nir_uniform_type_size(const struct glsl_type *type) } const unsigned * -brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir, +brw_blorp_compile_nir_shader(struct brw_context *brw, void *mem_ctx, + struct nir_shader *nir, const struct brw_wm_prog_key *wm_key, bool use_repclear, struct brw_blorp_prog_data *prog_data, @@ -175,13 +176,6 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir, { const struct brw_compiler *compiler = brw->intelScreen->compiler; - void *mem_ctx = ralloc_context(NULL); - - /* Calling brw_preprocess_nir and friends is destructive and, if cloning is - * enabled, may end up completely replacing the nir_shader. Therefore, we - * own it and might as well put it in our context for easy cleanup. - */ - ralloc_steal(mem_ctx, nir); nir->options = compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions; diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 7ec5875..133a8ac 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -366,7 +366,8 @@ struct brw_blorp_blit_prog_key void brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key); const unsigned * -brw_blorp_compile_nir_shader(struct brw_context *brw, struct nir_shader *nir, +brw_blorp_compile_nir_shader(struct brw_context *brw, void *mem_ctx, + struct nir_shader *nir, const struct brw_wm_prog_key *wm_key, bool use_repclear, struct brw_blorp_prog_data *prog_data, diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 782d285..db94f33 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1296,7 +1296,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos, * of samples). */ static nir_shader * -brw_blorp_build_nir_shader(struct brw_context *brw, +brw_blorp_build_nir_shader(struct brw_context *brw, void *mem_ctx, const brw_blorp_blit_prog_key *key) { nir_ssa_def *src_pos, *dst_pos, *color; @@ -1342,7 +1342,7 @@ brw_blorp_build_nir_shader(struct brw_context *brw, (key->dst_samples == 0)); nir_builder b; - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL); struct brw_blorp_blit_vars v; brw_blorp_blit_vars_init(&b, &v, key); @@ -1505,6 +1505,8 @@ brw_blorp_get_blit_kernel(struct brw_context *brw, ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) return; + void *mem_ctx = ralloc_context(NULL); + const unsigned *program; unsigned program_size; struct brw_blorp_prog_data prog_data; @@ -1512,7 +1514,7 @@ brw_blorp_get_blit_kernel(struct brw_context *brw, /* Try and compile with NIR first. If that fails, fall back to the old * method of building shaders manually. */ - nir_shader *nir = brw_blorp_build_nir_shader(brw, prog_key); + nir_shader *nir = brw_blorp_build_nir_shader(brw, mem_ctx, prog_key); struct brw_wm_prog_key wm_key; brw_blorp_init_wm_prog_key(&wm_key); wm_key.tex.compressed_multisample_layout_mask = @@ -1520,7 +1522,7 @@ brw_blorp_get_blit_kernel(struct brw_context *brw, wm_key.tex.msaa_16 = prog_key->tex_samples == 16; wm_key.multisample_fbo = prog_key->rt_samples > 1; - program = brw_blorp_compile_nir_shader(brw, nir, &wm_key, false, + program = brw_blorp_compile_nir_shader(brw, mem_ctx, nir, &wm_key, false, &prog_data, &program_size); brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, @@ -1528,6 +1530,8 @@ brw_blorp_get_blit_kernel(struct brw_context *brw, program, program_size, &prog_data, sizeof(prog_data), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); + + ralloc_free(mem_ctx); } static void diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp index 2515a04..6400218 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp @@ -64,7 +64,7 @@ brw_blorp_params_get_clear_kernel(struct brw_context *brw, void *mem_ctx = ralloc_context(NULL); nir_builder b; - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL); b.shader->info.name = ralloc_strdup(b.shader, "BLORP-clear"); nir_variable *u_color = nir_variable_create(b.shader, nir_var_uniform, @@ -84,7 +84,8 @@ brw_blorp_params_get_clear_kernel(struct brw_context *brw, struct brw_blorp_prog_data prog_data; unsigned program_size; const unsigned *program = - brw_blorp_compile_nir_shader(brw, b.shader, &wm_key, use_replicated_data, + brw_blorp_compile_nir_shader(brw, mem_ctx, + b.shader, &wm_key, use_replicated_data, &prog_data, &program_size); brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 10e9f47..7d15c28 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -220,6 +220,8 @@ struct brw_tcs_prog_key /** A bitfield of per-vertex outputs written. */ uint64_t outputs_written; + bool quads_workaround; + struct brw_sampler_prog_key_data tex; }; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 1cb99da..2af42e0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5681,7 +5681,7 @@ fs_visitor::setup_gs_payload() * have to multiply by VerticesIn to obtain the total storage requirement. */ if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in > - max_push_components) { + max_push_components || gs_prog_data->invocations > 1) { gs_prog_data->base.include_vue_handles = true; /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 11c078a..91763d3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2322,23 +2322,23 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, break; fs_reg m0 = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - fs_reg m0_2 = byte_offset(m0, 2 * sizeof(uint32_t)); + fs_reg m0_2 = component(m0, 2); - const fs_builder fwa_bld = bld.exec_all(); + const fs_builder chanbld = bld.exec_all().group(1, 0); /* Zero the message header */ - fwa_bld.MOV(m0, brw_imm_ud(0u)); + bld.exec_all().MOV(m0, brw_imm_ud(0u)); /* Copy "Barrier ID" from r0.2, bits 16:13 */ - fwa_bld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), + chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(INTEL_MASK(16, 13))); /* Shift it up to bits 27:24. */ - fwa_bld.SHL(m0_2, m0_2, brw_imm_ud(11)); + chanbld.SHL(m0_2, m0_2, brw_imm_ud(11)); /* Set the Barrier Count and the enable bit */ - fwa_bld.OR(m0_2, m0_2, - brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15))); + chanbld.OR(m0_2, m0_2, + brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15))); bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0); break; @@ -4060,12 +4060,23 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, dest = get_nir_dest(instr->dest); fs_reg surface = brw_imm_ud(GEN7_BTI_SLM); - fs_reg offset = get_nir_src(instr->src[0]); + fs_reg offset; fs_reg data1 = get_nir_src(instr->src[1]); fs_reg data2; if (op == BRW_AOP_CMPWR) data2 = get_nir_src(instr->src[2]); + /* Get the offset */ + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); + } else { + offset = vgrf(glsl_type::uint_type); + bld.ADD(offset, + retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0])); + } + /* Emit the actual atomic operation operation */ fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset, diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 74c354f..6185310 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -117,6 +117,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir, bool brw_nir_apply_trig_workarounds(nir_shader *nir); +void brw_nir_apply_tcs_quads_workaround(nir_shader *nir); + nir_shader *brw_nir_apply_sampler_key(nir_shader *nir, const struct brw_device_info *devinfo, const struct brw_sampler_prog_key_data *key, diff --git a/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c new file mode 100644 index 0000000..0626981 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c @@ -0,0 +1,152 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "compiler/nir/nir_builder.h" +#include "brw_nir.h" + +/** + * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8). + * + * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the + * definition of the patch header layouts): + * + * "HW Bug: The Tessellation stage will incorrectly add domain points + * along patch edges under the following conditions, which may result + * in conformance failures and/or cracking artifacts: + * + * * QUAD domain + * * INTEGER partitioning + * * All three TessFactors in a given U or V direction (e.g., V + * direction: UEQ0, InsideV, UEQ1) are all exactly 1.0 + * * All three TessFactors in the other direction are > 1.0 and all + * round up to the same integer value (e.g, U direction: + * VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4) + * + * The suggested workaround (to be implemented as part of the postamble + * to the HS shader in the HS kernel) is: + * + * if ( + * (TF[UEQ0] > 1.0) || + * (TF[VEQ0] > 1.0) || + * (TF[UEQ1] > 1.0) || + * (TF[VEQ1] > 1.0) || + * (TF[INSIDE_U] > 1.0) || + * (TF[INSIDE_V] > 1.0) ) + * { + * TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U]; + * TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V]; + * }" + * + * There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes + * that the above workaround fails to fix certain GL/ES CTS tests which + * have inside tessellation factors of -1.0. This can be explained by + * a quote from the ARB_tessellation_shader specification: + * + * "If "equal_spacing" is used, the floating-point tessellation level is + * first clamped to the range [1,<max>], where <max> is implementation- + * dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)." + * + * In other words, the actual inner tessellation factor used is + * clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped + * value against 1.0. To accomplish this, we change the comparison from + * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0). + */ + +static inline nir_ssa_def * +load_output(nir_builder *b, int num_components, int offset) +{ + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output); + nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL); + load->num_components = num_components; + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_intrinsic_set_base(load, offset); + + nir_builder_instr_insert(b, &load->instr); + + return &load->dest.ssa; +} + +static inline void +store_output(nir_builder *b, nir_ssa_def *value, int offset, unsigned comps) +{ + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); + store->num_components = comps; + nir_intrinsic_set_write_mask(store, (1u << comps) - 1); + store->src[0] = nir_src_for_ssa(value); + store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_builder_instr_insert(b, &store->instr); +} + +static void +emit_quads_workaround(nir_builder *b, nir_block *block) +{ + /* We're going to insert a new if-statement in a predecessor of the end + * block. This would normally create a new block (after the if) which + * would then become the predecessor of the end block, causing our set + * walking to get screwed up. To avoid this, just emit a constant at + * the end of our current block, and insert the if before that. + */ + b->cursor = nir_after_block_before_jump(block); + b->cursor = nir_before_instr(nir_imm_int(b, 0)->parent_instr); + + nir_ssa_def *inner = load_output(b, 2, 0); + nir_ssa_def *outer = load_output(b, 4, 1); + + nir_ssa_def *any_greater_than_1 = + nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)), + nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner))); + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(any_greater_than_1); + nir_builder_cf_insert(b, &if_stmt->cf_node); + + /* Fill out the new then-block */ + b->cursor = nir_after_cf_list(&if_stmt->then_list); + + store_output(b, nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner), + nir_imm_float(b, 2.0f), inner), 0, 2); +} + +void +brw_nir_apply_tcs_quads_workaround(nir_shader *nir) +{ + assert(nir->stage == MESA_SHADER_TESS_CTRL); + + nir_foreach_function(func, nir) { + if (!func->impl) + continue; + + nir_builder b; + nir_builder_init(&b, func->impl); + + struct set_entry *entry; + set_foreach(func->impl->end_block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + emit_quads_workaround(&b, pred); + } + + nir_metadata_preserve(func->impl, 0); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c b/src/mesa/drivers/dri/i965/brw_performance_monitor.c index a91c6e2..a42a322 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c +++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c @@ -686,12 +686,12 @@ stop_oa_counters(struct brw_context *brw) * The amount of batch space it takes to emit an MI_REPORT_PERF_COUNT snapshot, * including the required PIPE_CONTROL flushes. * - * Sandybridge is the worst case scenario: brw_emit_mi_flush - * expands to three PIPE_CONTROLs which are 4 DWords each. We have to flush - * before and after MI_REPORT_PERF_COUNT, so multiply by two. Finally, add - * the 3 DWords for MI_REPORT_PERF_COUNT itself. + * Sandybridge is the worst case scenario: brw_emit_mi_flush expands to four + * PIPE_CONTROLs which are 5 DWords each. We have to flush before and after + * MI_REPORT_PERF_COUNT, so multiply by two. Finally, add the 3 DWords for + * MI_REPORT_PERF_COUNT itself. */ -#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (3 * 4) + 3) +#define MI_REPORT_PERF_COUNT_BATCH_DWORDS (2 * (4 * 5) + 3) /** * Emit an MI_REPORT_PERF_COUNT command packet. diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index 4672efd..d51cf1b 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -96,10 +96,38 @@ gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags) void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags) { + if (brw->gen >= 6 && + (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) && + (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) { + /* A pipe control command with flush and invalidate bits set + * simultaneously is an inherently racy operation on Gen6+ if the + * contents of the flushed caches were intended to become visible from + * any of the invalidated caches. Split it in two PIPE_CONTROLs, the + * first one should stall the pipeline to make sure that the flushed R/W + * caches are coherent with memory once the specified R/O caches are + * invalidated. On pre-Gen6 hardware the (implicit) R/O cache + * invalidation seems to happen at the bottom of the pipeline together + * with any write cache flush, so this shouldn't be a concern. + */ + brw_emit_pipe_control_flush(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) | + PIPE_CONTROL_CS_STALL); + flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL); + } + if (brw->gen >= 8) { if (brw->gen == 8) gen8_add_cs_stall_workaround_bits(&flags); + if (brw->gen == 9 && + (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { + /* Hardware workaround: SKL + * + * Emit Pipe Control with all bits set to zero before emitting + * a Pipe Control with VF Cache Invalidate set. + */ + brw_emit_pipe_control_flush(brw, 0); + } + BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); OUT_BATCH(flags); @@ -311,15 +339,6 @@ brw_emit_mi_flush(struct brw_context *brw) } else { int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH; if (brw->gen >= 6) { - if (brw->gen == 9) { - /* Hardware workaround: SKL - * - * Emit Pipe Control with all bits set to zero before emitting - * a Pipe Control with VF Cache Invalidate set. - */ - brw_emit_pipe_control_flush(brw, 0); - } - flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_VF_CACHE_INVALIDATE | diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c index 8a5dd7e..6b7fde2 100644 --- a/src/mesa/drivers/dri/i965/brw_tcs.c +++ b/src/mesa/drivers/dri/i965/brw_tcs.c @@ -153,6 +153,8 @@ brw_tcs_debug_recompile(struct brw_context *brw, key->patch_outputs_written); found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode, key->tes_primitive_mode); + found |= key_debug(brw, "quads and equal_spacing workaround", + old_key->quads_workaround, key->quads_workaround); found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); if (!found) { @@ -346,6 +348,9 @@ brw_upload_tcs_prog(struct brw_context *brw, * based on the domain the DS is expecting to tessellate. */ key.tes_primitive_mode = tep->program.PrimitiveMode; + key.quads_workaround = brw->gen < 9 && + tep->program.PrimitiveMode == GL_QUADS && + tep->program.Spacing == GL_EQUAL; if (tcp) { key.program_string_id = tcp->id; @@ -383,6 +388,8 @@ brw_tcs_precompile(struct gl_context *ctx, struct gl_tess_ctrl_program *tcp = (struct gl_tess_ctrl_program *)prog; struct brw_tess_ctrl_program *btcp = brw_tess_ctrl_program(tcp); + const struct gl_shader *tes = + shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; memset(&key, 0, sizeof(key)); @@ -393,9 +400,14 @@ brw_tcs_precompile(struct gl_context *ctx, if (brw->gen < 8) key.input_vertices = shader_prog->TessCtrl.VerticesOut; - key.tes_primitive_mode = - shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] ? - shader_prog->TessEval.PrimitiveMode : GL_TRIANGLES; + if (tes) { + key.tes_primitive_mode = shader_prog->TessEval.PrimitiveMode; + key.quads_workaround = brw->gen < 9 && + shader_prog->TessEval.PrimitiveMode == GL_QUADS && + shader_prog->TessEval.Spacing == GL_EQUAL; + } else { + key.tes_primitive_mode = GL_TRIANGLES; + } key.outputs_written = prog->OutputsWritten; key.patch_outputs_written = prog->PatchOutputsWritten; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 162b481..a7398a7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -354,95 +354,97 @@ vec4_visitor::opt_vector_float() { bool progress = false; - int last_reg = -1, last_reg_offset = -1; - enum brw_reg_file last_reg_file = BAD_FILE; + foreach_block(block, cfg) { + int last_reg = -1, last_reg_offset = -1; + enum brw_reg_file last_reg_file = BAD_FILE; + + uint8_t imm[4] = { 0 }; + int inst_count = 0; + vec4_instruction *imm_inst[4]; + unsigned writemask = 0; + enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F; + + foreach_inst_in_block_safe(vec4_instruction, inst, block) { + int vf = -1; + enum brw_reg_type need_type; + + /* Look for unconditional MOVs from an immediate with a partial + * writemask. Skip type-conversion MOVs other than integer 0, + * where the type doesn't matter. See if the immediate can be + * represented as a VF. + */ + if (inst->opcode == BRW_OPCODE_MOV && + inst->src[0].file == IMM && + inst->predicate == BRW_PREDICATE_NONE && + inst->dst.writemask != WRITEMASK_XYZW && + (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) { + + vf = brw_float_to_vf(inst->src[0].d); + need_type = BRW_REGISTER_TYPE_D; + + if (vf == -1) { + vf = brw_float_to_vf(inst->src[0].f); + need_type = BRW_REGISTER_TYPE_F; + } + } else { + last_reg = -1; + } - uint8_t imm[4] = { 0 }; - int inst_count = 0; - vec4_instruction *imm_inst[4]; - unsigned writemask = 0; - enum brw_reg_type dest_type = BRW_REGISTER_TYPE_F; + /* If this wasn't a MOV, or the destination register doesn't match, + * or we have to switch destination types, then this breaks our + * sequence. Combine anything we've accumulated so far. + */ + if (last_reg != inst->dst.nr || + last_reg_offset != inst->dst.reg_offset || + last_reg_file != inst->dst.file || + (vf > 0 && dest_type != need_type)) { + + if (inst_count > 1) { + unsigned vf; + memcpy(&vf, imm, sizeof(vf)); + vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf)); + mov->dst.type = dest_type; + mov->dst.writemask = writemask; + inst->insert_before(block, mov); + + for (int i = 0; i < inst_count; i++) { + imm_inst[i]->remove(block); + } - foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { - int vf = -1; - enum brw_reg_type need_type; + progress = true; + } - /* Look for unconditional MOVs from an immediate with a partial - * writemask. Skip type-conversion MOVs other than integer 0, - * where the type doesn't matter. See if the immediate can be - * represented as a VF. - */ - if (inst->opcode == BRW_OPCODE_MOV && - inst->src[0].file == IMM && - inst->predicate == BRW_PREDICATE_NONE && - inst->dst.writemask != WRITEMASK_XYZW && - (inst->src[0].type == inst->dst.type || inst->src[0].d == 0)) { - - vf = brw_float_to_vf(inst->src[0].d); - need_type = BRW_REGISTER_TYPE_D; - - if (vf == -1) { - vf = brw_float_to_vf(inst->src[0].f); - need_type = BRW_REGISTER_TYPE_F; - } - } else { - last_reg = -1; - } + inst_count = 0; + last_reg = -1; + writemask = 0; + dest_type = BRW_REGISTER_TYPE_F; - /* If this wasn't a MOV, or the destination register doesn't match, - * or we have to switch destination types, then this breaks our - * sequence. Combine anything we've accumulated so far. - */ - if (last_reg != inst->dst.nr || - last_reg_offset != inst->dst.reg_offset || - last_reg_file != inst->dst.file || - (vf > 0 && dest_type != need_type)) { - - if (inst_count > 1) { - unsigned vf; - memcpy(&vf, imm, sizeof(vf)); - vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf)); - mov->dst.type = dest_type; - mov->dst.writemask = writemask; - inst->insert_before(block, mov); - - for (int i = 0; i < inst_count; i++) { - imm_inst[i]->remove(block); + for (int i = 0; i < 4; i++) { + imm[i] = 0; } - - progress = true; } - inst_count = 0; - last_reg = -1; - writemask = 0; - dest_type = BRW_REGISTER_TYPE_F; - - for (int i = 0; i < 4; i++) { - imm[i] = 0; + /* Record this instruction's value (if it was representable). */ + if (vf != -1) { + if ((inst->dst.writemask & WRITEMASK_X) != 0) + imm[0] = vf; + if ((inst->dst.writemask & WRITEMASK_Y) != 0) + imm[1] = vf; + if ((inst->dst.writemask & WRITEMASK_Z) != 0) + imm[2] = vf; + if ((inst->dst.writemask & WRITEMASK_W) != 0) + imm[3] = vf; + + writemask |= inst->dst.writemask; + imm_inst[inst_count++] = inst; + + last_reg = inst->dst.nr; + last_reg_offset = inst->dst.reg_offset; + last_reg_file = inst->dst.file; + if (vf > 0) + dest_type = need_type; } } - - /* Record this instruction's value (if it was representable). */ - if (vf != -1) { - if ((inst->dst.writemask & WRITEMASK_X) != 0) - imm[0] = vf; - if ((inst->dst.writemask & WRITEMASK_Y) != 0) - imm[1] = vf; - if ((inst->dst.writemask & WRITEMASK_Z) != 0) - imm[2] = vf; - if ((inst->dst.writemask & WRITEMASK_W) != 0) - imm[3] = vf; - - writemask |= inst->dst.writemask; - imm_inst[inst_count++] = inst; - - last_reg = inst->dst.nr; - last_reg_offset = inst->dst.reg_offset; - last_reg_file = inst->dst.file; - if (vf > 0) - dest_type = need_type; - } } if (progress) @@ -1109,7 +1111,7 @@ vec4_visitor::opt_register_coalesce() /* Can't coalesce this GRF if someone else was going to * read it later. */ - if (var_range_end(var_from_reg(alloc, inst->src[0]), 4) > ip) + if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 4) > ip) continue; /* We need to check interference with the final destination between this diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index 0c1f0c3..10898a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -246,7 +246,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) * more -- a sure sign they'll fail operands_match(). */ if (src->file == VGRF) { - if (var_range_end(var_from_reg(alloc, *src), 4) < ip) { + if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 4) < ip) { entry->remove(); ralloc_free(entry); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 927438f..26a910c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -59,7 +59,10 @@ vec4_gs_visitor::make_reg_for_system_value(int location) switch (location) { case SYSTEM_VALUE_INVOCATION_ID: this->current_annotation = "initialize gl_InvocationID"; - emit(GS_OPCODE_GET_INSTANCE_ID, *reg); + if (gs_prog_data->invocations > 1) + emit(GS_OPCODE_GET_INSTANCE_ID, *reg); + else + emit(MOV(*reg, brw_imm_ud(0))); break; default: unreachable("not reached"); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index f61c612..5440dba 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -451,6 +451,9 @@ brw_compile_tcs(const struct brw_compiler *compiler, nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map); brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map); + if (key->quads_workaround) + brw_nir_apply_tcs_quads_workaround(nir); + nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); if (is_scalar) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 609285e..61ada53 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1443,10 +1443,12 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw) /* _NEW_PROGRAM */ struct gl_shader_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + /* BRW_NEW_CS_PROG_DATA */ + const struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data; - if (prog && brw->cs.prog_data->uses_num_work_groups) { + if (prog && cs_prog_data->uses_num_work_groups) { const unsigned surf_idx = - brw->cs.prog_data->binding_table.work_groups_start; + cs_prog_data->binding_table.work_groups_start; uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx]; drm_intel_bo *bo; uint32_t bo_offset; @@ -1475,6 +1477,7 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw) const struct brw_tracked_state brw_cs_work_groups_surface = { .dirty = { .brw = BRW_NEW_BLORP | + BRW_NEW_CS_PROG_DATA | BRW_NEW_CS_WORK_GROUPS }, .emit = brw_upload_cs_work_groups_surface, diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index 26de633..64ccdb6 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -50,6 +50,7 @@ upload_clip_state(struct brw_context *brw) dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE; } + /* BRW_NEW_VS_PROG_DATA */ dw1 |= brw->vs.prog_data->base.cull_distance_mask; if (brw->gen >= 7) @@ -224,6 +225,7 @@ const struct brw_tracked_state gen7_clip_state = { BRW_NEW_CONTEXT | BRW_NEW_FS_PROG_DATA | BRW_NEW_GEOMETRY_PROGRAM | + BRW_NEW_VS_PROG_DATA | BRW_NEW_META_IN_PROGRESS | BRW_NEW_PRIMITIVE | BRW_NEW_RASTERIZER_DISCARD | diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c index 5427fa5..b245226 100644 --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c @@ -283,7 +283,7 @@ gen7_upload_cs_push_constants(struct brw_context *brw) (struct brw_compute_program *) brw->compute_program; if (cp) { - /* CACHE_NEW_CS_PROG */ + /* BRW_NEW_CS_PROG_DATA */ struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data; brw_upload_cs_push_constants(brw, &cp->program.Base, cs_prog_data, @@ -297,6 +297,7 @@ const struct brw_tracked_state gen7_cs_push_constants = { .brw = BRW_NEW_BATCH | BRW_NEW_BLORP | BRW_NEW_COMPUTE_PROGRAM | + BRW_NEW_CS_PROG_DATA | BRW_NEW_PUSH_CONSTANT_ALLOCATION, }, .emit = gen7_upload_cs_push_constants, diff --git a/src/mesa/drivers/dri/i965/gen8_ds_state.c b/src/mesa/drivers/dri/i965/gen8_ds_state.c index 6f01abb..3b79b55 100644 --- a/src/mesa/drivers/dri/i965/gen8_ds_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ds_state.c @@ -69,6 +69,7 @@ gen8_upload_ds_state(struct brw_context *brw) GEN7_DS_SIMD8_DISPATCH_ENABLE : 0) | (tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ? GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0)); + /* _NEW_TRANSFORM */ OUT_BATCH(SET_FIELD(ctx->Transform.ClipPlanesEnabled, GEN8_DS_USER_CLIP_DISTANCE) | SET_FIELD(vue_prog_data->cull_distance_mask, @@ -106,7 +107,7 @@ gen8_upload_ds_state(struct brw_context *brw) const struct brw_tracked_state gen8_ds_state = { .dirty = { - .mesa = 0, + .mesa = _NEW_TRANSFORM, .brw = BRW_NEW_BATCH | BRW_NEW_BLORP | BRW_NEW_TESS_PROGRAMS | diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 8a904fe..f916d99 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -124,6 +124,7 @@ const struct brw_tracked_state gen8_ps_extra = { .mesa = _NEW_BUFFERS | _NEW_COLOR, .brw = BRW_NEW_BLORP | BRW_NEW_CONTEXT | + BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA, }, .emit = upload_ps_extra, @@ -283,7 +284,6 @@ const struct brw_tracked_state gen8_ps_state = { .mesa = _NEW_MULTISAMPLE, .brw = BRW_NEW_BATCH | BRW_NEW_BLORP | - BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA, }, .emit = upload_ps_state, diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index aa1dc38..67e8e8f 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -21,13 +21,13 @@ extern "C" { * - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes) * - Disabling OA counters on Gen6+ (3 DWords = 12 bytes) * - Ending MI_REPORT_PERF_COUNT on Gen5+, plus associated PIPE_CONTROLs: - * - Two sets of PIPE_CONTROLs, which become 3 PIPE_CONTROLs each on SNB, - * which are 5 DWords each ==> 2 * 3 * 5 * 4 = 120 bytes + * - Two sets of PIPE_CONTROLs, which become 4 PIPE_CONTROLs each on SNB, + * which are 5 DWords each ==> 2 * 4 * 5 * 4 = 160 bytes * - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+. ==> 12 bytes. * On Ironlake, it's 6 DWords, but we have some slack due to the lack of * Sandybridge PIPE_CONTROL madness. - * - CC_STATE workaround on HSW (12 * 4 = 48 bytes) - * - 5 dwords for initial mi_flush + * - CC_STATE workaround on HSW (17 * 4 = 68 bytes) + * - 10 dwords for initial mi_flush * - 2 dwords for CC state setup * - 5 dwords for the required pipe control at the end * - Restoring L3 configuration: (24 dwords = 96 bytes) @@ -35,7 +35,7 @@ extern "C" { * - 7 dwords for L3 configuration set-up. * - 5 dwords for L3 atomic set-up (on HSW). */ -#define BATCH_RESERVED 248 +#define BATCH_RESERVED 308 struct intel_batchbuffer; diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 939f9a0..8a0d2ad 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -374,6 +374,19 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, if (!irb->mt) return; + /* Adjust the miptree's upper-left coordinate. + * + * FIXME: Adjusting the miptree's layout outside of + * intel_miptree_create_layout() is fragile. Plumb the adjustment through + * intel_miptree_create_layout() and brw_tex_layout(). + */ + irb->mt->level[0].level_x = image->tile_x; + irb->mt->level[0].level_y = image->tile_y; + irb->mt->level[0].slice[0].x_offset = image->tile_x; + irb->mt->level[0].slice[0].y_offset = image->tile_y; + irb->mt->total_width += image->tile_x; + irb->mt->total_height += image->tile_y; + rb->InternalFormat = image->internal_format; rb->Width = image->width; rb->Height = image->height; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index b6265dc..e74a2dc 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -366,25 +366,8 @@ intel_miptree_create_layout(struct brw_context *brw, _mesa_get_format_name(format), first_level, last_level, depth0, mt); - if (target == GL_TEXTURE_1D_ARRAY) { - /* For a 1D Array texture the OpenGL API will treat the height0 - * parameter as the number of array slices. For Intel hardware, we treat - * the 1D array as a 2D Array with a height of 1. - * - * So, when we first come through this path to create a 1D Array - * texture, height0 stores the number of slices, and depth0 is 1. In - * this case, we want to swap height0 and depth0. - * - * Since some miptrees will be created based on the base miptree, we may - * come through this path and see height0 as 1 and depth0 being the - * number of slices. In this case we don't need to do the swap. - */ - assert(height0 == 1 || depth0 == 1); - if (height0 > 1) { - depth0 = height0; - height0 = 1; - } - } + if (target == GL_TEXTURE_1D_ARRAY) + assert(height0 == 1); mt->target = target; mt->format = format; @@ -1050,6 +1033,7 @@ intel_get_image_dims(struct gl_texture_image *image, * as a 2D Array with a height of 1. So, here we want to swap image * height and depth. */ + assert(image->Depth == 1); *width = image->Width; *height = 1; *depth = image->Height; diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index a486d6e..cacd7e2 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -110,22 +110,6 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, if (ctx->_ImageTransferState) return false; - /* This renderbuffer can come from a texture. In this case, we impose - * some of the same restrictions we have for textures and adjust for - * miplevels. - */ - if (rb->TexImage) { - if (rb->TexImage->TexObject->Target != GL_TEXTURE_2D && - rb->TexImage->TexObject->Target != GL_TEXTURE_RECTANGLE) - return false; - - int level = rb->TexImage->Level + rb->TexImage->TexObject->MinLevel; - - /* Adjust x and y offset based on miplevel */ - xoffset += irb->mt->level[level].level_x; - yoffset += irb->mt->level[level].level_y; - } - /* It is possible that the renderbuffer (or underlying texture) is * multisampled. Since ReadPixels from a multisampled buffer requires a * multisample resolve, we can't handle this here @@ -169,6 +153,9 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, return false; } + xoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].x_offset; + yoffset += irb->mt->level[irb->mt_level].slice[irb->mt_layer].y_offset; + dst_pitch = _mesa_image_row_stride(pack, width, format, type); /* For a window-system renderbuffer, the buffer is actually flipped @@ -201,7 +188,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp, - bo->virtual, + bo->virtual + irb->mt->offset, dst_pitch, irb->mt->pitch, brw->has_swizzling, irb->mt->tiling, diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h index 95365fe..7a82be4 100644 --- a/src/mesa/drivers/dri/i965/intel_reg.h +++ b/src/mesa/drivers/dri/i965/intel_reg.h @@ -134,6 +134,15 @@ #define PIPE_CONTROL_PPGTT_WRITE (0 << 2) #define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) +#define PIPE_CONTROL_CACHE_FLUSH_BITS \ + (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \ + PIPE_CONTROL_RENDER_TARGET_FLUSH) + +#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \ + (PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \ + PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ + PIPE_CONTROL_INSTRUCTION_INVALIDATE) + /** @} */ #define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22)) diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c index 3e359a5..39c9636 100644 --- a/src/mesa/drivers/dri/i965/intel_syncobj.c +++ b/src/mesa/drivers/dri/i965/intel_syncobj.c @@ -49,6 +49,7 @@ struct brw_fence { /** The fence waits for completion of this batch. */ drm_intel_bo *batch_bo; + mtx_t mutex; bool signalled; }; @@ -58,10 +59,20 @@ struct intel_gl_sync_object { }; static void +brw_fence_init(struct brw_context *brw, struct brw_fence *fence) +{ + fence->brw = brw; + fence->batch_bo = NULL; + mtx_init(&fence->mutex, mtx_plain); +} + +static void brw_fence_finish(struct brw_fence *fence) { if (fence->batch_bo) drm_intel_bo_unreference(fence->batch_bo); + + mtx_destroy(&fence->mutex); } static void @@ -77,7 +88,7 @@ brw_fence_insert(struct brw_context *brw, struct brw_fence *fence) } static bool -brw_fence_has_completed(struct brw_fence *fence) +brw_fence_has_completed_locked(struct brw_fence *fence) { if (fence->signalled) return true; @@ -92,13 +103,21 @@ brw_fence_has_completed(struct brw_fence *fence) return false; } -/** - * Return true if the function successfully signals or has already signalled. - * (This matches the behavior expected from __DRI2fence::client_wait_sync). - */ static bool -brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence, - uint64_t timeout) +brw_fence_has_completed(struct brw_fence *fence) +{ + bool ret; + + mtx_lock(&fence->mutex); + ret = brw_fence_has_completed_locked(fence); + mtx_unlock(&fence->mutex); + + return ret; +} + +static bool +brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence, + uint64_t timeout) { if (fence->signalled) return true; @@ -123,6 +142,23 @@ brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence, return true; } +/** + * Return true if the function successfully signals or has already signalled. + * (This matches the behavior expected from __DRI2fence::client_wait_sync). + */ +static bool +brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence, + uint64_t timeout) +{ + bool ret; + + mtx_lock(&fence->mutex); + ret = brw_fence_client_wait_locked(brw, fence, timeout); + mtx_unlock(&fence->mutex); + + return ret; +} + static void brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence) { @@ -161,6 +197,7 @@ intel_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *s, struct brw_context *brw = brw_context(ctx); struct intel_gl_sync_object *sync = (struct intel_gl_sync_object *)s; + brw_fence_init(brw, &sync->fence); brw_fence_insert(brw, &sync->fence); } @@ -215,7 +252,7 @@ intel_dri_create_fence(__DRIcontext *ctx) if (!fence) return NULL; - fence->brw = brw; + brw_fence_init(brw, fence); brw_fence_insert(brw, fence); return fence; @@ -244,6 +281,12 @@ intel_dri_server_wait_sync(__DRIcontext *ctx, void *driver_fence, unsigned flags { struct brw_fence *fence = driver_fence; + /* We might be called here with a NULL fence as a result of WaitSyncKHR + * on a EGL_KHR_reusable_sync fence. Nothing to do here in such case. + */ + if (!fence) + return; + brw_fence_server_wait(fence->brw, fence); } diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index cac33ac..a1364b9 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -140,6 +140,8 @@ intel_alloc_texture_storage(struct gl_context *ctx, !intel_miptree_match_image(intel_texobj->mt, first_image) || intel_texobj->mt->last_level != levels - 1) { intel_miptree_release(&intel_texobj->mt); + + intel_get_image_dims(first_image, &width, &height, &depth); intel_texobj->mt = intel_miptree_create(brw, texobj->Target, first_image->TexFormat, 0, levels - 1, diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_frag.c b/src/mesa/drivers/dri/nouveau/nv20_state_frag.c index 492ecdc..2c5c2db 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_state_frag.c +++ b/src/mesa/drivers/dri/nouveau/nv20_state_frag.c @@ -67,5 +67,5 @@ nv20_emit_frag(struct gl_context *ctx, int emit) PUSH_DATA (push, in >> 32); BEGIN_NV04(push, NV20_3D(RC_ENABLE), 1); - PUSH_DATA (push, n); + PUSH_DATA (push, MAX2(1, n)); } diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index 2d4bb70..6e006f8 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -484,14 +484,14 @@ swrast_map_renderbuffer(struct gl_context *ctx, xrb->map_mode = mode; xrb->map_x = x; - xrb->map_y = y; + xrb->map_y = rb->Height - y - h; xrb->map_w = w; xrb->map_h = h; stride = w * cpp; xrb->Base.Buffer = malloc(h * stride); - sPriv->swrast_loader->getImage(dPriv, x, rb->Height - y - h, w, h, + sPriv->swrast_loader->getImage(dPriv, x, xrb->map_y, w, h, (char *) xrb->Base.Buffer, dPriv->loaderPrivate); diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index a28c583..d8815af 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -378,17 +378,48 @@ draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, /* complicated error checking... */ for (output = 0; output < n; output++) { - /* Section 4.2 (Whole Framebuffer Operations) of the OpenGL 3.0 + destMask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]); + + /* From the OpenGL 3.0 specification, page 258: + * "Each buffer listed in bufs must be one of the values from tables + * 4.5 or 4.6. Otherwise, an INVALID_ENUM error is generated. + */ + if (destMask[output] == BAD_MASK) { + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", + caller, _mesa_enum_to_string(buffers[output])); + return; + } + + /* From the OpenGL 4.0 specification, page 256: + * "For both the default framebuffer and framebuffer objects, the + * constants FRONT, BACK, LEFT, RIGHT, and FRONT_AND_BACK are not + * valid in the bufs array passed to DrawBuffers, and will result in + * the error INVALID_ENUM. This restriction is because these + * constants may themselves refer to multiple buffers, as shown in + * table 4.4." + * Previous versions of the OpenGL specification say INVALID_OPERATION, + * but the Khronos conformance tests expect INVALID_ENUM. + */ + if (_mesa_bitcount(destMask[output]) > 1) { + _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", + caller, _mesa_enum_to_string(buffers[output])); + return; + } + + /* Section 4.2 (Whole Framebuffer Operations) of the OpenGL ES 3.0 * specification says: * - * "Each buffer listed in bufs must be BACK, NONE, or one of the values - * from table 4.3 (NONE, COLOR_ATTACHMENTi)" + * "If the GL is bound to a draw framebuffer object, the ith buffer + * listed in bufs must be COLOR_ATTACHMENTi or NONE . Specifying a + * buffer out of order, BACK , or COLOR_ATTACHMENTm where m is greater + * than or equal to the value of MAX_- COLOR_ATTACHMENTS , will + * generate the error INVALID_OPERATION . */ - if (_mesa_is_gles3(ctx) && buffers[output] != GL_NONE && - buffers[output] != GL_BACK && + if (_mesa_is_gles3(ctx) && _mesa_is_user_fbo(fb) && + buffers[output] != GL_NONE && (buffers[output] < GL_COLOR_ATTACHMENT0 || buffers[output] >= GL_COLOR_ATTACHMENT0 + ctx->Const.MaxColorAttachments)) { - _mesa_error(ctx, GL_INVALID_ENUM, "glDrawBuffers(buffer)"); + _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawBuffers(buffer)"); return; } @@ -412,34 +443,6 @@ draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, return; } - destMask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]); - - /* From the OpenGL 3.0 specification, page 258: - * "Each buffer listed in bufs must be one of the values from tables - * 4.5 or 4.6. Otherwise, an INVALID_ENUM error is generated. - */ - if (destMask[output] == BAD_MASK) { - _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", - caller, _mesa_enum_to_string(buffers[output])); - return; - } - - /* From the OpenGL 4.0 specification, page 256: - * "For both the default framebuffer and framebuffer objects, the - * constants FRONT, BACK, LEFT, RIGHT, and FRONT_AND_BACK are not - * valid in the bufs array passed to DrawBuffers, and will result in - * the error INVALID_ENUM. This restriction is because these - * constants may themselves refer to multiple buffers, as shown in - * table 4.4." - * Previous versions of the OpenGL specification say INVALID_OPERATION, - * but the Khronos conformance tests expect INVALID_ENUM. - */ - if (_mesa_bitcount(destMask[output]) > 1) { - _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", - caller, _mesa_enum_to_string(buffers[output])); - return; - } - /* From the OpenGL 3.0 specification, page 259: * "If the GL is bound to the default framebuffer and DrawBuffers is * supplied with a constant (other than NONE) that does not indicate diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index bf47c1c..68da639 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -303,9 +303,21 @@ _mesa_get_fb0_attachment(struct gl_context *ctx, struct gl_framebuffer *fb, switch (attachment) { case GL_FRONT_LEFT: - return &fb->Attachment[BUFFER_FRONT_LEFT]; + /* Front buffers can be allocated on the first use, but + * glGetFramebufferAttachmentParameteriv must work even if that + * allocation hasn't happened yet. In such case, use the back buffer, + * which should be the same. + */ + if (fb->Attachment[BUFFER_FRONT_LEFT].Type == GL_NONE) + return &fb->Attachment[BUFFER_BACK_LEFT]; + else + return &fb->Attachment[BUFFER_FRONT_LEFT]; case GL_FRONT_RIGHT: - return &fb->Attachment[BUFFER_FRONT_RIGHT]; + /* Same as above. */ + if (fb->Attachment[BUFFER_FRONT_RIGHT].Type == GL_NONE) + return &fb->Attachment[BUFFER_BACK_RIGHT]; + else + return &fb->Attachment[BUFFER_FRONT_RIGHT]; case GL_BACK_LEFT: return &fb->Attachment[BUFFER_BACK_LEFT]; case GL_BACK_RIGHT: diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index d72bc71..18dffc3 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -293,10 +293,9 @@ struct ureg { GLuint file:4; GLint idx:9; /* relative addressing may be negative */ /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ - GLuint abs:1; GLuint negate:1; GLuint swz:12; - GLuint pad:5; + GLuint pad:6; }; @@ -325,7 +324,6 @@ static const struct ureg undef = { 0, 0, 0, - 0, 0 }; @@ -344,7 +342,6 @@ static struct ureg make_ureg(GLuint file, GLint idx) struct ureg reg; reg.file = file; reg.idx = idx; - reg.abs = 0; reg.negate = 0; reg.swz = SWIZZLE_NOOP; reg.pad = 0; @@ -352,15 +349,6 @@ static struct ureg make_ureg(GLuint file, GLint idx) } - -static struct ureg absolute( struct ureg reg ) -{ - reg.abs = 1; - reg.negate = 0; - return reg; -} - - static struct ureg negate( struct ureg reg ) { reg.negate ^= 1; @@ -961,7 +949,8 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p, emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); - emit_op2(p, OPCODE_POW, spot, 0, absolute(spot), swizzle1(attenuation, W)); + emit_op1(p, OPCODE_ABS, spot, 0, spot); + emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); emit_op2(p, OPCODE_MUL, att, 0, slt, spot); release_temp(p, spot); diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c index 215c14f..e9727ea 100644 --- a/src/mesa/main/formatquery.c +++ b/src/mesa/main/formatquery.c @@ -387,13 +387,13 @@ _is_target_supported(struct gl_context *ctx, GLenum target) * "if a particular type of <target> is not supported by the * implementation the "unsupported" answer should be given. * This is not an error." + * + * For OpenGL ES, queries can only be used with GL_RENDERBUFFER or MS. */ switch(target){ + case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_3D: - break; - - case GL_TEXTURE_1D: if (!_mesa_is_desktop_gl(ctx)) return false; break; @@ -404,12 +404,12 @@ _is_target_supported(struct gl_context *ctx, GLenum target) break; case GL_TEXTURE_2D_ARRAY: - if (!(_mesa_has_EXT_texture_array(ctx) || _mesa_is_gles3(ctx))) + if (!_mesa_has_EXT_texture_array(ctx)) return false; break; case GL_TEXTURE_CUBE_MAP: - if (!_mesa_has_ARB_texture_cube_map(ctx)) + if (ctx->API != API_OPENGL_CORE && !_mesa_has_ARB_texture_cube_map(ctx)) return false; break; @@ -419,7 +419,7 @@ _is_target_supported(struct gl_context *ctx, GLenum target) break; case GL_TEXTURE_RECTANGLE: - if (!_mesa_has_NV_texture_rectangle(ctx)) + if (!_mesa_has_ARB_texture_rectangle(ctx)) return false; break; @@ -962,7 +962,8 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname, switch (pname) { case GL_INTERNALFORMAT_DEPTH_SIZE: - if (!_mesa_has_ARB_depth_texture(ctx) && + if (ctx->API != API_OPENGL_CORE && + !_mesa_has_ARB_depth_texture(ctx) && target != GL_RENDERBUFFER && target != GL_TEXTURE_BUFFER) goto end; diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c index d917220..2afe7be 100644 --- a/src/mesa/main/genmipmap.c +++ b/src/mesa/main/genmipmap.c @@ -85,10 +85,15 @@ _mesa_is_valid_generate_texture_mipmap_internalformat(struct gl_context *ctx, * not specified with an unsized internal format from table 8.3 or a * sized internal format that is both color-renderable and * texture-filterable according to table 8.10." + * + * GL_EXT_texture_format_BGRA8888 adds a GL_BGRA_EXT unsized internal + * format, and includes it in a very similar looking table. So we + * include it here as well. */ return internalformat == GL_RGBA || internalformat == GL_RGB || internalformat == GL_LUMINANCE_ALPHA || internalformat == GL_LUMINANCE || internalformat == GL_ALPHA || + internalformat == GL_BGRA_EXT || (_mesa_is_es3_color_renderable(internalformat) && _mesa_is_es3_texture_filterable(internalformat)); } @@ -144,6 +149,11 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx, return; } + if (srcImage->Width == 0 || srcImage->Height == 0) { + _mesa_unlock_texture(ctx, texObj); + return; + } + if (target == GL_TEXTURE_CUBE_MAP) { GLuint face; for (face = 0; face < 6; face++) { diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 9f70749..7623b93 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -411,6 +411,14 @@ static const int extra_ARB_gpu_shader5_or_oes_geometry_shader[] = { static const int extra_ARB_gpu_shader5_or_OES_sample_variables[] = { EXT(ARB_gpu_shader5), EXT(OES_sample_variables), + EXTRA_END +}; + +static const int extra_KHR_robustness_or_GL[] = { + EXT(KHR_robustness), + EXTRA_API_GL, + EXTRA_API_GL_CORE, + EXTRA_END }; EXTRA_EXT(ARB_texture_cube_map); diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index bfcbfd6..ea3649a 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -338,6 +338,9 @@ descriptor=[ # blend_func_extended [ "MAX_DUAL_SOURCE_DRAW_BUFFERS", "CONTEXT_INT(Const.MaxDualSourceDrawBuffers), extra_ARB_blend_func_extended" ], + +# GL_ARB_robustness / GL_KHR_robustness + [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), extra_KHR_robustness_or_GL" ], ]}, # GLES3 is not a typo. @@ -842,9 +845,6 @@ descriptor=[ # GL 3.2 [ "CONTEXT_PROFILE_MASK", "CONTEXT_INT(Const.ProfileMask), extra_version_32" ], -# GL_ARB_robustness - [ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ], - # GL_ARB_timer_query [ "TIMESTAMP", "LOC_CUSTOM, TYPE_INT64, 0, extra_ARB_timer_query" ], diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index 24ce7b0..6df09bb 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -907,6 +907,29 @@ _mesa_is_astc_format(GLenum internalFormat) } /** + * Test if the given format is an ETC2 format. + */ +GLboolean +_mesa_is_etc2_format(GLenum internalFormat) +{ + switch (internalFormat) { + case GL_COMPRESSED_RGB8_ETC2: + case GL_COMPRESSED_SRGB8_ETC2: + case GL_COMPRESSED_RGBA8_ETC2_EAC: + case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC: + case GL_COMPRESSED_R11_EAC: + case GL_COMPRESSED_RG11_EAC: + case GL_COMPRESSED_SIGNED_R11_EAC: + case GL_COMPRESSED_SIGNED_RG11_EAC: + case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: + case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: + return true; + default: + return false; + } +} + +/** * Test if the given format is an integer (non-normalized) format. */ GLboolean @@ -2495,7 +2518,6 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat) case GL_RGBA8I_EXT: case GL_RGBA16I_EXT: case GL_RGBA32I_EXT: - case GL_RGB10_A2UI: return GL_RGBA; case GL_RGB8UI_EXT: case GL_RGB16UI_EXT: @@ -2507,6 +2529,13 @@ _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat) } } + if (ctx->Extensions.ARB_texture_rgb10_a2ui) { + switch (internalFormat) { + case GL_RGB10_A2UI: + return GL_RGBA; + } + } + if (ctx->Extensions.EXT_texture_integer) { switch (internalFormat) { case GL_ALPHA8UI_EXT: diff --git a/src/mesa/main/glformats.h b/src/mesa/main/glformats.h index c73f464..474ede2 100644 --- a/src/mesa/main/glformats.h +++ b/src/mesa/main/glformats.h @@ -61,6 +61,9 @@ extern GLboolean _mesa_is_astc_format(GLenum internalFormat); extern GLboolean +_mesa_is_etc2_format(GLenum internalFormat); + +extern GLboolean _mesa_is_type_unsigned(GLenum type); extern GLboolean diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 5956ce4..35ce0f2 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -1385,13 +1385,24 @@ _mesa_get_program_resourceiv(struct gl_shader_program *shProg, static bool validate_io(struct gl_shader_program *producer, - struct gl_shader_program *consumer) + struct gl_shader_program *consumer, + gl_shader_stage producer_stage, + gl_shader_stage consumer_stage) { if (producer == consumer) return true; + const bool nonarray_stage_to_array_stage = + producer_stage != MESA_SHADER_TESS_CTRL && + (consumer_stage == MESA_SHADER_GEOMETRY || + consumer_stage == MESA_SHADER_TESS_CTRL || + consumer_stage == MESA_SHADER_TESS_EVAL); + bool valid = true; + void *name_buffer = NULL; + size_t name_buffer_size = 0; + gl_shader_variable const **outputs = (gl_shader_variable const **) calloc(producer->NumProgramResourceList, sizeof(gl_shader_variable *)); @@ -1463,11 +1474,52 @@ validate_io(struct gl_shader_program *producer, } } } else { + char *consumer_name = consumer_var->name; + + if (nonarray_stage_to_array_stage && + consumer_var->interface_type != NULL && + consumer_var->interface_type->is_array() && + !is_gl_identifier(consumer_var->name)) { + const size_t name_len = strlen(consumer_var->name); + + if (name_len >= name_buffer_size) { + free(name_buffer); + + name_buffer_size = name_len + 1; + name_buffer = malloc(name_buffer_size); + if (name_buffer == NULL) { + valid = false; + goto out; + } + } + + consumer_name = (char *) name_buffer; + + char *s = strchr(consumer_var->name, '['); + if (s == NULL) { + valid = false; + goto out; + } + + char *t = strchr(s, ']'); + if (t == NULL) { + valid = false; + goto out; + } + + assert(t[1] == '.' || t[1] == '['); + + const ptrdiff_t base_name_len = s - consumer_var->name; + + memcpy(consumer_name, consumer_var->name, base_name_len); + strcpy(consumer_name + base_name_len, t + 1); + } + for (unsigned j = 0; j < num_outputs; j++) { const gl_shader_variable *const var = outputs[j]; if (!var->explicit_location && - strcmp(consumer_var->name, var->name) == 0) { + strcmp(consumer_name, var->name) == 0) { producer_var = var; match_index = j; break; @@ -1529,25 +1581,53 @@ validate_io(struct gl_shader_program *producer, * Note that location mismatches are detected by the loops above that * find the producer variable that goes with the consumer variable. */ - if (producer_var->type != consumer_var->type || - producer_var->interpolation != consumer_var->interpolation || - producer_var->precision != consumer_var->precision) { + if (nonarray_stage_to_array_stage) { + if (!consumer_var->type->is_array() || + consumer_var->type->fields.array != producer_var->type) { + valid = false; + goto out; + } + + if (consumer_var->interface_type != NULL) { + if (!consumer_var->interface_type->is_array() || + consumer_var->interface_type->fields.array != producer_var->interface_type) { + valid = false; + goto out; + } + } else if (producer_var->interface_type != NULL) { + valid = false; + goto out; + } + } else { + if (producer_var->type != consumer_var->type) { + valid = false; + goto out; + } + + if (producer_var->interface_type != consumer_var->interface_type) { + valid = false; + goto out; + } + } + + if (producer_var->interpolation != consumer_var->interpolation) { valid = false; goto out; } - if (producer_var->outermost_struct_type != consumer_var->outermost_struct_type) { + if (producer_var->precision != consumer_var->precision) { valid = false; goto out; } - if (producer_var->interface_type != consumer_var->interface_type) { + if (producer_var->outermost_struct_type != consumer_var->outermost_struct_type) { valid = false; goto out; } } out: + free(name_buffer); free(outputs); return valid && num_outputs == 0; } @@ -1579,7 +1659,9 @@ _mesa_validate_pipeline_io(struct gl_pipeline_object *pipeline) if (shProg[idx]->_LinkedShaders[idx]->Stage == MESA_SHADER_COMPUTE) break; - if (!validate_io(shProg[prev], shProg[idx])) + if (!validate_io(shProg[prev], shProg[idx], + shProg[prev]->_LinkedShaders[prev]->Stage, + shProg[idx]->_LinkedShaders[idx]->Stage)) return false; prev = idx; diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index fc3cc6b..3dde03f 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -502,13 +502,15 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions, */ if (format == rgba_format) { rgba = dest; - } else if (rgba == NULL) { /* Allocate the RGBA buffer only once */ + } else { need_convert = true; - rgba = malloc(height * rgba_stride); - if (!rgba) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()"); - ctx->Driver.UnmapTextureImage(ctx, texImage, img); - return; + if (rgba == NULL) { /* Allocate the RGBA buffer only once */ + rgba = malloc(height * rgba_stride); + if (!rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()"); + ctx->Driver.UnmapTextureImage(ctx, texImage, img); + return; + } } } diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 58b7f27..7b13a28 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1300,6 +1300,7 @@ bool _mesa_format_no_online_compression(const struct gl_context *ctx, GLenum format) { return _mesa_is_astc_format(format) || + _mesa_is_etc2_format(format) || compressedteximage_only_format(ctx, format); } @@ -2587,10 +2588,16 @@ check_rtt_cb(GLuint key, void *data, void *userData) att->Texture == texObj && att->TextureLevel == level && att->CubeMapFace == face) { - _mesa_update_texture_renderbuffer(ctx, ctx->DrawBuffer, att); + _mesa_update_texture_renderbuffer(ctx, fb, att); assert(att->Renderbuffer->TexImage); /* Mark fb status as indeterminate to force re-validation */ fb->_Status = 0; + + /* Make sure that the revalidation actually happens if this is + * being done to currently-bound buffers. + */ + if (fb == ctx->DrawBuffer || fb == ctx->ReadBuffer) + ctx->NewState |= _NEW_BUFFERS; } } } diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c index f4a0760..72ed869 100644 --- a/src/mesa/main/texstorage.c +++ b/src/mesa/main/texstorage.c @@ -179,9 +179,7 @@ clear_texture_fields(struct gl_context *ctx, return; } - _mesa_init_teximage_fields(ctx, texImage, - 0, 0, 0, 0, /* w, h, d, border */ - GL_NONE, MESA_FORMAT_NONE); + _mesa_clear_texture_image(ctx, texImage); } } } diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c index 0847184..758d8b4 100644 --- a/src/mesa/state_tracker/st_atom_array.c +++ b/src/mesa/state_tracker/st_atom_array.c @@ -386,6 +386,7 @@ static void init_velement(struct pipe_vertex_element *velement, } static void init_velement_lowered(struct st_context *st, + const struct st_vertex_program *vp, struct pipe_vertex_element *velements, int src_offset, int format, int instance_divisor, int vbo_index, @@ -396,23 +397,33 @@ static void init_velement_lowered(struct st_context *st, if (doubles) { int lower_format; - if (nr_components == 1) + if (nr_components < 2) lower_format = PIPE_FORMAT_R32G32_UINT; - else if (nr_components >= 2) + else lower_format = PIPE_FORMAT_R32G32B32A32_UINT; init_velement(&velements[idx], src_offset, lower_format, instance_divisor, vbo_index); idx++; - if (nr_components > 2) { - if (nr_components == 3) - lower_format = PIPE_FORMAT_R32G32_UINT; - else if (nr_components >= 4) - lower_format = PIPE_FORMAT_R32G32B32A32_UINT; + if (idx < vp->num_inputs && + vp->index_to_input[idx] == ST_DOUBLE_ATTRIB_PLACEHOLDER) { + if (nr_components >= 3) { + if (nr_components == 3) + lower_format = PIPE_FORMAT_R32G32_UINT; + else + lower_format = PIPE_FORMAT_R32G32B32A32_UINT; + + init_velement(&velements[idx], src_offset + 4 * sizeof(float), + lower_format, instance_divisor, vbo_index); + } else { + /* The values here are undefined. Fill in some conservative + * dummy values. + */ + init_velement(&velements[idx], src_offset, PIPE_FORMAT_R32G32_UINT, + instance_divisor, vbo_index); + } - init_velement(&velements[idx], src_offset + 4 * sizeof(float), - lower_format, instance_divisor, vbo_index); idx++; } } else { @@ -435,10 +446,9 @@ setup_interleaved_attribs(struct st_context *st, const struct st_vp_variant *vpv, const struct gl_client_array **arrays, struct pipe_vertex_buffer *vbuffer, - struct pipe_vertex_element velements[], - unsigned *num_velements) + struct pipe_vertex_element velements[]) { - GLuint attr, attr_idx; + GLuint attr; const GLubyte *low_addr = NULL; GLboolean usingVBO; /* all arrays in a VBO? */ struct gl_buffer_object *bufobj; @@ -481,15 +491,13 @@ setup_interleaved_attribs(struct st_context *st, /* are the arrays in user space? */ usingVBO = _mesa_is_bufferobj(bufobj); - attr_idx = 0; - for (attr = 0; attr < vpv->num_inputs; attr++) { + for (attr = 0; attr < vpv->num_inputs;) { const struct gl_client_array *array; unsigned src_offset; unsigned src_format; array = get_client_array(vp, arrays, attr); - if (!array) - continue; + assert(array); src_offset = (unsigned) (array->Ptr - low_addr); assert(array->_ElementSize == @@ -501,13 +509,11 @@ setup_interleaved_attribs(struct st_context *st, array->Normalized, array->Integer); - init_velement_lowered(st, velements, src_offset, src_format, + init_velement_lowered(st, vp, velements, src_offset, src_format, array->InstanceDivisor, 0, - array->Size, array->Doubles, &attr_idx); + array->Size, array->Doubles, &attr); } - *num_velements = attr_idx; - /* * Return the vbuffer info and setup user-space attrib info, if needed. */ @@ -554,25 +560,25 @@ setup_non_interleaved_attribs(struct st_context *st, const struct gl_client_array **arrays, struct pipe_vertex_buffer vbuffer[], struct pipe_vertex_element velements[], - unsigned *num_velements) + unsigned *num_vbuffers) { struct gl_context *ctx = st->ctx; - GLuint attr, attr_idx = 0; + GLuint attr; - for (attr = 0; attr < vpv->num_inputs; attr++) { + *num_vbuffers = 0; + + for (attr = 0; attr < vpv->num_inputs;) { const GLuint mesaAttr = vp->index_to_input[attr]; const struct gl_client_array *array; struct gl_buffer_object *bufobj; GLsizei stride; unsigned src_format; + unsigned bufidx; array = get_client_array(vp, arrays, attr); - if (!array) { - vbuffer[attr].buffer = NULL; - vbuffer[attr].user_buffer = NULL; - vbuffer[attr].buffer_offset = 0; - continue; - } + assert(array); + + bufidx = (*num_vbuffers)++; stride = array->StrideB; bufobj = array->BufferObj; @@ -590,9 +596,9 @@ setup_non_interleaved_attribs(struct st_context *st, return FALSE; /* out-of-memory error probably */ } - vbuffer[attr].buffer = stobj->buffer; - vbuffer[attr].user_buffer = NULL; - vbuffer[attr].buffer_offset = pointer_to_offset(array->Ptr); + vbuffer[bufidx].buffer = stobj->buffer; + vbuffer[bufidx].user_buffer = NULL; + vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr); } else { /* wrap user data */ @@ -609,13 +615,13 @@ setup_non_interleaved_attribs(struct st_context *st, assert(ptr); - vbuffer[attr].buffer = NULL; - vbuffer[attr].user_buffer = ptr; - vbuffer[attr].buffer_offset = 0; + vbuffer[bufidx].buffer = NULL; + vbuffer[bufidx].user_buffer = ptr; + vbuffer[bufidx].buffer_offset = 0; } /* common-case setup */ - vbuffer[attr].stride = stride; /* in bytes */ + vbuffer[bufidx].stride = stride; /* in bytes */ src_format = st_pipe_vertex_format(array->Type, array->Size, @@ -623,13 +629,11 @@ setup_non_interleaved_attribs(struct st_context *st, array->Normalized, array->Integer); - init_velement_lowered(st, velements, 0, src_format, - array->InstanceDivisor, attr, - array->Size, array->Doubles, &attr_idx); - + init_velement_lowered(st, vp, velements, 0, src_format, + array->InstanceDivisor, bufidx, + array->Size, array->Doubles, &attr); } - *num_velements = attr_idx; return TRUE; } @@ -641,7 +645,7 @@ static void update_array(struct st_context *st) const struct st_vp_variant *vpv; struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; - unsigned num_vbuffers, num_velements; + unsigned num_vbuffers; st->vertex_array_out_of_memory = FALSE; @@ -659,23 +663,21 @@ static void update_array(struct st_context *st) * Setup the vbuffer[] and velements[] arrays. */ if (is_interleaved_arrays(vp, vpv, arrays)) { - if (!setup_interleaved_attribs(st, vp, vpv, arrays, vbuffer, velements, &num_velements)) { + if (!setup_interleaved_attribs(st, vp, vpv, arrays, vbuffer, velements)) { st->vertex_array_out_of_memory = TRUE; return; } num_vbuffers = 1; - if (num_velements == 0) + if (vpv->num_inputs == 0) num_vbuffers = 0; } else { if (!setup_non_interleaved_attribs(st, vp, vpv, arrays, vbuffer, - velements, &num_velements)) { + velements, &num_vbuffers)) { st->vertex_array_out_of_memory = TRUE; return; } - - num_vbuffers = vpv->num_inputs; } cso_set_vertex_buffers(st->cso_context, 0, num_vbuffers, vbuffer); @@ -685,7 +687,7 @@ static void update_array(struct st_context *st) st->last_num_vbuffers - num_vbuffers, NULL); } st->last_num_vbuffers = num_vbuffers; - cso_set_vertex_elements(st->cso_context, num_velements, velements); + cso_set_vertex_elements(st->cso_context, vpv->num_inputs, velements); } diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 4b7ad77..3d409a6 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -304,12 +304,10 @@ st_create_texture_sampler_view_from_stobj(struct st_context *st, templ.target = gl_target_to_pipe(stObj->base.Target); } - if (swizzle != SWIZZLE_NOOP) { - templ.swizzle_r = GET_SWZ(swizzle, 0); - templ.swizzle_g = GET_SWZ(swizzle, 1); - templ.swizzle_b = GET_SWZ(swizzle, 2); - templ.swizzle_a = GET_SWZ(swizzle, 3); - } + templ.swizzle_r = GET_SWZ(swizzle, 0); + templ.swizzle_g = GET_SWZ(swizzle, 1); + templ.swizzle_b = GET_SWZ(swizzle, 2); + templ.swizzle_a = GET_SWZ(swizzle, 3); return st->pipe->create_sampler_view(st->pipe, stObj->pt, &templ); } diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 362cef4..1acec7c 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -313,11 +313,13 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) static inline GLboolean is_scissor_enabled(struct gl_context *ctx, struct gl_renderbuffer *rb) { + const struct gl_scissor_rect *scissor = &ctx->Scissor.ScissorArray[0]; + return (ctx->Scissor.EnableFlags & 1) && - (ctx->Scissor.ScissorArray[0].X > 0 || - ctx->Scissor.ScissorArray[0].Y > 0 || - (unsigned) ctx->Scissor.ScissorArray[0].Width < rb->Width || - (unsigned) ctx->Scissor.ScissorArray[0].Height < rb->Height); + (scissor->X > 0 || + scissor->Y > 0 || + scissor->X + scissor->Width < rb->Width || + scissor->Y + scissor->Height < rb->Height); } diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 3db5749..c013d3b 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -203,8 +203,19 @@ st_draw_vbo(struct gl_context *ctx, /* The VBO module handles restart for the non-indexed GLDrawArrays * so we only set these fields for indexed drawing: */ - info.primitive_restart = ctx->Array._PrimitiveRestart; - info.restart_index = _mesa_primitive_restart_index(ctx, ib->type); + if (ctx->Array._PrimitiveRestart) { + info.restart_index = _mesa_primitive_restart_index(ctx, ib->type); + + /* Enable primitive restart only when the restart index can have an + * effect. This is required for correctness in radeonsi VI support, + * though other hardware may also benefit from taking a faster, + * non-restart path when possible. + */ + if ((ibuffer.index_size >= 4) || + (ibuffer.index_size >= 2 && info.restart_index <= 0xffff) || + (info.restart_index <= 0xff)) + info.primitive_restart = true; + } } else { /* Transform feedback drawing is always non-indexed. */ diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 9a280fc..5f76241 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -37,6 +37,7 @@ #include "main/enums.h" #include "main/formats.h" #include "main/glformats.h" +#include "main/texcompress.h" #include "main/texgetimage.h" #include "main/teximage.h" #include "main/texstore.h" @@ -2282,6 +2283,12 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target, } if (pFormat == PIPE_FORMAT_NONE) { + /* lie about using etc1/etc2 natively if we do decoding tricks */ + mFormat = _mesa_glenum_to_compressed_format(internalFormat); + if ((mFormat == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1) || + (_mesa_is_format_etc2(mFormat) && !st->has_etc2)) + return mFormat; + /* no luck at all */ return MESA_FORMAT_NONE; } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index aa443a5..ee117c9 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -255,6 +255,7 @@ public: ir_instruction *ir; GLboolean cond_update; bool saturate; + bool is_64bit_expanded; st_src_reg sampler; /**< sampler register */ int sampler_base; int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */ @@ -515,7 +516,8 @@ public: unsigned *array_size, unsigned *base, unsigned *index, - st_src_reg *reladdr); + st_src_reg *reladdr, + bool opaque); void calc_deref_offsets(ir_dereference *head, ir_dereference *tail, unsigned *array_elements, @@ -523,6 +525,7 @@ public: unsigned *index, st_src_reg *indirect, unsigned *location); + st_src_reg canonicalize_gather_offset(st_src_reg offset); bool try_emit_mad(ir_expression *ir, int mul_operand); @@ -670,6 +673,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, inst->src[1] = src1; inst->src[2] = src2; inst->src[3] = src3; + inst->is_64bit_expanded = false; inst->ir = ir; inst->dead_mask = 0; /* default to float, for paths where this is not initialized @@ -792,6 +796,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, dinst->prev = NULL; } this->instructions.push_tail(dinst); + dinst->is_64bit_expanded = true; /* modify the destination if we are splitting */ for (j = 0; j < 2; j++) { @@ -1136,7 +1141,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_double(double val) uval[0].u = *(uint32_t *)&val; uval[1].u = *(((uint32_t *)&val) + 1); src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle); - + src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); return src; } @@ -1958,12 +1963,14 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_bitcast_f2i: - result_src = op[0]; - result_src.type = GLSL_TYPE_INT; - break; case ir_unop_bitcast_f2u: - result_src = op[0]; - result_src.type = GLSL_TYPE_UINT; + /* Make sure we don't propagate the negate modifier to integer opcodes. */ + if (op[0].negate) + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); + else + result_src = op[0]; + result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT : + GLSL_TYPE_UINT; break; case ir_unop_bitcast_i2f: case ir_unop_bitcast_u2f: @@ -2792,6 +2799,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type * assert(type->is_scalar() || type->is_vector()); + l->type = type->base_type; r->type = type->base_type; if (cond) { st_src_reg l_src = st_src_reg(*l); @@ -2903,6 +2911,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } else if (ir->rhs->as_expression() && this->instructions.get_tail() && ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && + !((glsl_to_tgsi_instruction *)this->instructions.get_tail())->is_64bit_expanded && type_size(ir->lhs->type) == 1 && l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) { /* To avoid emitting an extra MOV when assigning an expression to a @@ -3144,7 +3153,7 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) st_src_reg offset; unsigned array_size = 0, base = 0, index = 0; - get_deref_offsets(deref, &array_size, &base, &index, &offset); + get_deref_offsets(deref, &array_size, &base, &index, &offset, false); if (offset.file != PROGRAM_UNDEFINED) { emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), @@ -3451,7 +3460,7 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); get_deref_offsets(img, &sampler_array_size, &sampler_base, - (unsigned int *)&image.index, &reladdr); + (unsigned int *)&image.index, &reladdr, true); if (reladdr.file != PROGRAM_UNDEFINED) { emit_arl(ir, sampler_reladdr, reladdr); image.reladdr = ralloc(mem_ctx, st_src_reg); @@ -3811,7 +3820,8 @@ glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, unsigned *array_size, unsigned *base, unsigned *index, - st_src_reg *reladdr) + st_src_reg *reladdr, + bool opaque) { GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target); unsigned location = 0; @@ -3836,12 +3846,27 @@ glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, *array_size = 1; } - if (location != 0xffffffff) { + if (opaque) { + assert(location != 0xffffffff); *base += this->shader_program->UniformStorage[location].opaque[shader].index; *index += this->shader_program->UniformStorage[location].opaque[shader].index; } } +st_src_reg +glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset) +{ + if (offset.reladdr || offset.reladdr2) { + st_src_reg tmp = get_temp(glsl_type::ivec2_type); + st_dst_reg tmp_dst = st_dst_reg(tmp); + tmp_dst.writemask = WRITEMASK_XY; + emit_asm(NULL, TGSI_OPCODE_MOV, tmp_dst, offset); + return tmp; + } + + return offset; +} + void glsl_to_tgsi_visitor::visit(ir_texture *ir) { @@ -3967,9 +3992,10 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) offset[i].index += i * type_size(elt_type); offset[i].type = elt_type->base_type; offset[i].swizzle = swizzle_for_size(elt_type->vector_elements); + offset[i] = canonicalize_gather_offset(offset[i]); } } else { - offset[0] = this->result; + offset[0] = canonicalize_gather_offset(this->result); } } break; @@ -4075,7 +4101,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base, - &sampler_index, &reladdr); + &sampler_index, &reladdr, true); if (reladdr.file != PROGRAM_UNDEFINED) emit_arl(ir, sampler_reladdr, reladdr); @@ -5526,60 +5552,24 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) static struct tgsi_texture_offset translate_tex_offset(struct st_translate *t, - const st_src_reg *in_offset, int idx) + const st_src_reg *in_offset) { struct tgsi_texture_offset offset; - struct ureg_src imm_src; - struct ureg_dst dst; - int array; + struct ureg_src src = translate_src(t, in_offset); - switch (in_offset->file) { - case PROGRAM_IMMEDIATE: - assert(in_offset->index >= 0 && in_offset->index < t->num_immediates); - imm_src = t->immediates[in_offset->index]; + offset.File = src.File; + offset.Index = src.Index; + offset.SwizzleX = src.SwizzleX; + offset.SwizzleY = src.SwizzleY; + offset.SwizzleZ = src.SwizzleZ; + offset.Padding = 0; - offset.File = imm_src.File; - offset.Index = imm_src.Index; - offset.SwizzleX = imm_src.SwizzleX; - offset.SwizzleY = imm_src.SwizzleY; - offset.SwizzleZ = imm_src.SwizzleZ; - offset.Padding = 0; - break; - case PROGRAM_INPUT: - imm_src = t->inputs[t->inputMapping[in_offset->index]]; - offset.File = imm_src.File; - offset.Index = imm_src.Index; - offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0); - offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1); - offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2); - offset.Padding = 0; - break; - case PROGRAM_TEMPORARY: - imm_src = ureg_src(t->temps[in_offset->index]); - offset.File = imm_src.File; - offset.Index = imm_src.Index; - offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0); - offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1); - offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2); - offset.Padding = 0; - break; - case PROGRAM_ARRAY: - array = in_offset->index >> 16; - - assert(array >= 0); - assert(array < (int)t->num_temp_arrays); + assert(!src.Indirect); + assert(!src.DimIndirect); + assert(!src.Dimension); + assert(!src.Absolute); /* those shouldn't be used with integers anyway */ + assert(!src.Negate); - dst = t->arrays[array]; - offset.File = dst.File; - offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000; - offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0); - offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1); - offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2); - offset.Padding = 0; - break; - default: - break; - } return offset; } @@ -5643,7 +5633,7 @@ compile_tgsi_instruction(struct st_translate *t, ureg_src_indirect(src[num_src], ureg_src(t->address[2])); num_src++; for (i = 0; i < (int)inst->tex_offset_num_offset; i++) { - texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i); + texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); } tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); @@ -6045,7 +6035,11 @@ st_translate_program( inputSemanticName[i], inputSemanticIndex[i], interpMode[i], 0, interpLocation[i], array_id, array_size); - i += array_size - 1; + + GLuint base_attr = inputSlotToAttr[i]; + while (i + 1 < numInputs && + inputSlotToAttr[i + 1] < base_attr + array_size) + ++i; } else { t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, diff --git a/src/mesa/state_tracker/st_vdpau.c b/src/mesa/state_tracker/st_vdpau.c index dffa52f..4f599dd 100644 --- a/src/mesa/state_tracker/st_vdpau.c +++ b/src/mesa/state_tracker/st_vdpau.c @@ -65,6 +65,7 @@ st_vdpau_video_surface_gallium(struct gl_context *ctx, const void *vdpSurface, struct pipe_video_buffer *buffer; struct pipe_sampler_view **samplers; + struct pipe_resource *res = NULL; getProcAddr = (void *)ctx->vdpGetProcAddress; if (getProcAddr(device, VDP_FUNC_ID_VIDEO_SURFACE_GALLIUM, (void**)&f)) @@ -82,7 +83,8 @@ st_vdpau_video_surface_gallium(struct gl_context *ctx, const void *vdpSurface, if (!sv) return NULL; - return sv->texture; + pipe_resource_reference(&res, sv->texture); + return res; } static struct pipe_resource * @@ -90,13 +92,15 @@ st_vdpau_output_surface_gallium(struct gl_context *ctx, const void *vdpSurface) { int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr); uint32_t device = (uintptr_t)ctx->vdpDevice; + struct pipe_resource *res = NULL; VdpOutputSurfaceGallium *f; getProcAddr = (void *)ctx->vdpGetProcAddress; if (getProcAddr(device, VDP_FUNC_ID_OUTPUT_SURFACE_GALLIUM, (void**)&f)) return NULL; - return f((uintptr_t)vdpSurface); + pipe_resource_reference(&res, f((uintptr_t)vdpSurface)); + return res; } static struct pipe_resource * @@ -208,6 +212,7 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access, /* do we have different screen objects ? */ if (res->screen != st->pipe->screen) { _mesa_error(ctx, GL_INVALID_OPERATION, "VDPAUMapSurfacesNV"); + pipe_resource_reference(&res, NULL); return; } @@ -241,6 +246,7 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access, stObj->surface_format = res->format; _mesa_dirty_texobj(ctx, texObj); + pipe_resource_reference(&res, NULL); } static void diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 87ed7f7..96ed84f 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -814,6 +814,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode, prim[0].basevertex = basevertex; prim[0].num_instances = numInstances; prim[0].base_instance = baseInstance; + prim[0].draw_id = 0; /* Need to give special consideration to rendering a range of * indices starting somewhere above zero. Typically the diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index 97a1dfd..fafdf1d 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -1167,8 +1167,8 @@ _save_OBE_DrawArrays(GLenum mode, GLint start, GLsizei count) * then emitting an indexed prim at runtime. */ static void GLAPIENTRY -_save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type, - const GLvoid * indices) +_save_OBE_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, + const GLvoid * indices, GLint basevertex) { GET_CURRENT_CONTEXT(ctx); struct vbo_save_context *save = &vbo_context(ctx)->save; @@ -1205,15 +1205,15 @@ _save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type, switch (type) { case GL_UNSIGNED_BYTE: for (i = 0; i < count; i++) - CALL_ArrayElement(GET_DISPATCH(), (((GLubyte *) indices)[i])); + CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLubyte *) indices)[i])); break; case GL_UNSIGNED_SHORT: for (i = 0; i < count; i++) - CALL_ArrayElement(GET_DISPATCH(), (((GLushort *) indices)[i])); + CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLushort *) indices)[i])); break; case GL_UNSIGNED_INT: for (i = 0; i < count; i++) - CALL_ArrayElement(GET_DISPATCH(), (((GLuint *) indices)[i])); + CALL_ArrayElement(GET_DISPATCH(), (basevertex + ((GLuint *) indices)[i])); break; default: _mesa_error(ctx, GL_INVALID_ENUM, "glDrawElements(type)"); @@ -1225,6 +1225,13 @@ _save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type, _ae_unmap_vbos(ctx); } +static void GLAPIENTRY +_save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum type, + const GLvoid * indices) +{ + _save_OBE_DrawElementsBaseVertex(mode, count, type, indices, 0); +} + static void GLAPIENTRY _save_OBE_DrawRangeElements(GLenum mode, GLuint start, GLuint end, @@ -1462,6 +1469,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx, { SET_DrawArrays(exec, _save_OBE_DrawArrays); SET_DrawElements(exec, _save_OBE_DrawElements); + SET_DrawElementsBaseVertex(exec, _save_OBE_DrawElementsBaseVertex); SET_DrawRangeElements(exec, _save_OBE_DrawRangeElements); SET_MultiDrawElementsEXT(exec, _save_OBE_MultiDrawElements); SET_MultiDrawElementsBaseVertex(exec, _save_OBE_MultiDrawElementsBaseVertex); diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c index cb27ef9..1c3474c 100644 --- a/src/mesa/vbo/vbo_split_copy.c +++ b/src/mesa/vbo/vbo_split_copy.c @@ -243,7 +243,7 @@ begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag ) static GLuint elt(struct copy_context *copy, GLuint elt_idx) { - GLuint elt = copy->srcelt[elt_idx]; + GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex; GLuint slot = elt & (ELT_TABLE_SIZE-1); /* printf("elt %d\n", elt); */ |