diff options
52 files changed, 632 insertions, 224 deletions
diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore index 194252b..27f5d6d 100644 --- a/bin/.cherry-ignore +++ b/bin/.cherry-ignore @@ -1,2 +1,5 @@ # Commit was picked with -x 907ace57986733add2aebfa9dd7c83c67efed70e mapi: automake: set VISIBILITY_CFLAGS for shared glapi + +# Commit was reverted shortly after it landed in master +a39ad185932eab4f25a0cb2b112c10d8700ef242 configure.ac: honour LLVM_LIBDIR when linking against LLVM diff --git a/docs/relnotes/13.0.1.html b/docs/relnotes/13.0.1.html index d317adb..0ce9fc1 100644 --- a/docs/relnotes/13.0.1.html +++ b/docs/relnotes/13.0.1.html @@ -31,7 +31,8 @@ because compatibility contexts are not supported. <h2>SHA256 checksums</h2> <pre> -TBD +7cbb91dead05cde279ee95f86e8321c8e1c8fc9deb88f12e0f587672a10d88c5 mesa-13.0.1.tar.gz +71962fb2bf77d33b0ad4a565b490dbbeaf4619099c6d9722f04a73187957a731 mesa-13.0.1.tar.xz </pre> diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h index 1566afd..ffcaf33 100644 --- a/include/pci_ids/i965_pci_ids.h +++ b/include/pci_ids/i965_pci_ids.h @@ -109,6 +109,10 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 (Broadwell GT3e)") CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)") CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3") CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3") +CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherrytrail)") +CHIPSET(0x22B1, chv, "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */ +CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)") +CHIPSET(0x22B3, chv, "Intel(R) HD Graphics (Cherryview)") CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)") CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)") CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1") @@ -134,6 +138,11 @@ CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)") CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)") CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)") CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)") +CHIPSET(0x0A84, bxt, "Intel(R) HD Graphics (Broxton)") +CHIPSET(0x1A84, bxt, "Intel(R) HD Graphics (Broxton)") +CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)") +CHIPSET(0x5A84, bxt, "Intel(R) HD Graphics 505 (Broxton)") +CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics 500 (Broxton 2x6)") CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1") CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1") CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1") @@ -144,22 +153,15 @@ CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5") CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5") CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5") CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2") -CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2") +CHIPSET(0x5916, kbl_gt2, "Intel(R) HD Graphics 620 (Kabylake GT2)") CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2") CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2") CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2") -CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2") +CHIPSET(0x591E, kbl_gt2, "Intel(R) HD Graphics 615 (Kabylake GT2)") CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F") CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3") CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3") CHIPSET(0x5927, kbl_gt3, "Intel(R) Kabylake GT3") CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4") -CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherrytrail)") -CHIPSET(0x22B1, chv, "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */ -CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)") -CHIPSET(0x22B3, chv, "Intel(R) HD Graphics (Cherryview)") -CHIPSET(0x0A84, bxt, "Intel(R) HD Graphics (Broxton)") -CHIPSET(0x1A84, bxt, "Intel(R) HD Graphics (Broxton)") -CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)") -CHIPSET(0x5A84, bxt, "Intel(R) HD Graphics (Broxton)") -CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)") +CHIPSET(0x3184, glk, "Intel(R) HD Graphics (Geminilake)") +CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)") diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index f235cc2..31d7b6e 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1683,7 +1683,7 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, for (c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, - ctx->i32zero, ""); + LLVMConstInt(ctx->i32, c, false), ""); half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); half_texel[c] = emit_fdiv(ctx, ctx->f32one, half_texel[c]); half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], @@ -3299,17 +3299,25 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) } if (instr->op == nir_texop_texture_samples) { - LLVMValueRef res, samples; + LLVMValueRef res, samples, is_msaa; res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, ""); samples = LLVMBuildExtractElement(ctx->builder, res, LLVMConstInt(ctx->i32, 3, false), ""); + is_msaa = LLVMBuildLShr(ctx->builder, samples, + LLVMConstInt(ctx->i32, 28, false), ""); + is_msaa = LLVMBuildAnd(ctx->builder, is_msaa, + LLVMConstInt(ctx->i32, 0xe, false), ""); + is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa, + LLVMConstInt(ctx->i32, 0xe, false), ""); + samples = LLVMBuildLShr(ctx->builder, samples, LLVMConstInt(ctx->i32, 16, false), ""); samples = LLVMBuildAnd(ctx->builder, samples, LLVMConstInt(ctx->i32, 0xf, false), ""); samples = LLVMBuildShl(ctx->builder, ctx->i32one, samples, ""); - + samples = LLVMBuildSelect(ctx->builder, is_msaa, samples, + ctx->i32one, ""); result = samples; goto write_result; } @@ -3408,7 +3416,10 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) address[count++] = sample_index; } else if(instr->op == nir_texop_txs) { count = 0; - address[count++] = lod; + if (lod) + address[count++] = lod; + else + address[count++] = ctx->i32zero; } for (chan = 0; chan < count; chan++) { diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 690c739..9517e7a 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2291,6 +2291,7 @@ void radv_CmdPipelineBarrier( break; case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: case VK_ACCESS_TRANSFER_READ_BIT: + case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2; default: break; diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c index 76d5fa1..fe786b3 100644 --- a/src/amd/vulkan/radv_formats.c +++ b/src/amd/vulkan/radv_formats.c @@ -154,6 +154,7 @@ uint32_t radv_translate_tex_dataformat(VkFormat format, case VK_FORMAT_D16_UNORM: return V_008F14_IMG_DATA_FORMAT_16; case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_X8_D24_UNORM_PACK32: return V_008F14_IMG_DATA_FORMAT_8_24; case VK_FORMAT_S8_UINT: return V_008F14_IMG_DATA_FORMAT_8; diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 710eda1..3099d83 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -267,17 +267,7 @@ si_make_texture_descriptor(struct radv_device *device, if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) { const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; - const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; - - switch (vk_format) { - case VK_FORMAT_X8_D24_UNORM_PACK32: - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - vk_format_compose_swizzles(mapping, swizzle_yyyy, swizzle); - break; - default: - vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle); - } + vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle); } else { vk_format_compose_swizzles(mapping, desc->swizzle, swizzle); } @@ -775,8 +765,13 @@ radv_image_view_init(struct radv_image_view *iview, iview->vk_format = pCreateInfo->format; iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; - if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) + if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { is_stencil = true; + iview->vk_format = vk_format_stencil_only(iview->vk_format); + } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { + iview->vk_format = vk_format_depth_only(iview->vk_format); + } + iview->extent = (VkExtent3D) { .width = radv_minify(image->extent.width , range->baseMipLevel), .height = radv_minify(image->extent.height, range->baseMipLevel), @@ -794,7 +789,7 @@ radv_image_view_init(struct radv_image_view *iview, si_make_texture_descriptor(device, image, false, iview->type, - pCreateInfo->format, + iview->vk_format, &pCreateInfo->components, 0, radv_get_levelCount(image, range) - 1, range->baseArrayLayer, diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index a946bd4..1f1ab1c 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -288,6 +288,9 @@ void radv_DestroySwapchainKHR( RADV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); const VkAllocationCallbacks *alloc; + if (!_swapchain) + return; + if (pAllocator) alloc = pAllocator; else diff --git a/src/compiler/glsl/cache.c b/src/compiler/glsl/cache.c index 64a34f0..db934e5 100644 --- a/src/compiler/glsl/cache.c +++ b/src/compiler/glsl/cache.c @@ -612,19 +612,18 @@ cache_put(struct program_cache *cache, p_atomic_add(cache->size, size); + done: + if (fd_final != -1) + close(fd_final); /* This close finally releases the flock, (now that the final dile * has been renamed into place and the size has been added). */ - close(fd); - fd = -1; - - done: + if (fd != -1) + close(fd); if (filename_tmp) ralloc_free(filename_tmp); if (filename) ralloc_free(filename); - if (fd != -1) - close(fd); } void * diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 4fd1448..7656325 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -176,7 +176,7 @@ add_builtin_define(glcpp_parser_t *parser, const char *name, int value); * (such as the <HASH> and <DEFINE> start conditions in the lexer). */ %token DEFINED ELIF_EXPANDED HASH_TOKEN DEFINE_TOKEN FUNC_IDENTIFIER OBJ_IDENTIFIER ELIF ELSE ENDIF ERROR_TOKEN IF IFDEF IFNDEF LINE PRAGMA UNDEF VERSION_TOKEN GARBAGE IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING LINE_EXPANDED NEWLINE OTHER PLACEHOLDER SPACE PLUS_PLUS MINUS_MINUS %token PASTE -%type <ival> INTEGER operator SPACE integer_constant +%type <ival> INTEGER operator SPACE integer_constant version_constant %type <expression_value> expression %type <str> IDENTIFIER FUNC_IDENTIFIER OBJ_IDENTIFIER INTEGER_STRING OTHER ERROR_TOKEN PRAGMA %type <string_list> identifier_list @@ -424,14 +424,14 @@ control_line_success: | HASH_TOKEN ENDIF { _glcpp_parser_skip_stack_pop (parser, & @1); } NEWLINE -| HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE { - if (parser->version != 0) { +| HASH_TOKEN VERSION_TOKEN version_constant NEWLINE { + if (parser->version_set) { glcpp_error(& @1, parser, "#version must appear on the first line"); } _glcpp_parser_handle_version_declaration(parser, $3, NULL, true); } -| HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE { - if (parser->version != 0) { +| HASH_TOKEN VERSION_TOKEN version_constant IDENTIFIER NEWLINE { + if (parser->version_set) { glcpp_error(& @1, parser, "#version must appear on the first line"); } _glcpp_parser_handle_version_declaration(parser, $3, $4, true); @@ -470,6 +470,17 @@ integer_constant: $$ = $1; } +version_constant: + INTEGER_STRING { + /* Both octal and hexadecimal constants begin with 0. */ + if ($1[0] == '0' && $1[1] != '\0') { + glcpp_error(&@1, parser, "invalid #version \"%s\" (not a decimal constant)", $1); + $$ = 0; + } else { + $$ = strtoll($1, NULL, 10); + } + } + expression: integer_constant { $$.value = $1; @@ -1376,6 +1387,7 @@ glcpp_parser_create(glcpp_extension_iterator extensions, void *state, gl_api api parser->state = state; parser->api = api; parser->version = 0; + parser->version_set = false; parser->has_new_line_number = 0; parser->new_line_number = 1; @@ -2318,10 +2330,11 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio const char *es_identifier, bool explicitly_set) { - if (parser->version != 0) + if (parser->version_set) return; parser->version = version; + parser->version_set = true; add_builtin_define (parser, "__VERSION__", version); diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h index cab4374..fcee812 100644 --- a/src/compiler/glsl/glcpp/glcpp.h +++ b/src/compiler/glsl/glcpp/glcpp.h @@ -207,6 +207,15 @@ struct glcpp_parser { void *state; gl_api api; unsigned version; + + /** + * Has the #version been set? + * + * A separate flag is used because any possible sentinel value in + * \c ::version could also be set by a #version line. + */ + bool version_set; + bool has_new_line_number; int new_line_number; bool has_new_source_number; diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll index d5e5d4c..450faeb 100644 --- a/src/compiler/glsl/glsl_lexer.ll +++ b/src/compiler/glsl/glsl_lexer.ll @@ -253,6 +253,10 @@ HASH ^{SPC}#{SPC} yylval->n = strtol(yytext, NULL, 10); return INTCONSTANT; } +<PP>0 { + yylval->n = 0; + return INTCONSTANT; + } <PP>\n { BEGIN 0; yylineno++; yycolumn = 0; return EOL; } <PP>. { return yytext[0]; } diff --git a/src/compiler/glsl/lower_output_reads.cpp b/src/compiler/glsl/lower_output_reads.cpp index 732f4d3..8a375ac 100644 --- a/src/compiler/glsl/lower_output_reads.cpp +++ b/src/compiler/glsl/lower_output_reads.cpp @@ -157,7 +157,6 @@ ir_visitor_status output_read_remover::visit_leave(ir_emit_vertex *ir) { hash_table_call_foreach(replacements, emit_return_copy, ir); - _mesa_hash_table_clear(replacements, NULL); return visit_continue; } diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 634058c..b66ceb2 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -805,8 +805,12 @@ vtn_get_builtin_location(struct vtn_builder *b, set_mode_system_value(mode); break; case SpvBuiltInPrimitiveId: - *location = VARYING_SLOT_PRIMITIVE_ID; - *mode = nir_var_shader_out; + if (*mode == nir_var_shader_out) { + *location = VARYING_SLOT_PRIMITIVE_ID; + } else { + *location = SYSTEM_VALUE_PRIMITIVE_ID; + set_mode_system_value(mode); + } break; case SpvBuiltInInvocationId: *location = SYSTEM_VALUE_INVOCATION_ID; diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index e785106..a35187c 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -42,7 +42,8 @@ struct si_compute { struct si_shader shader; struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; - bool use_code_object_v2; + unsigned use_code_object_v2 : 1; + unsigned variable_group_size : 1; }; struct dispatch_packet { @@ -147,7 +148,11 @@ static void *si_create_compute_state( S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) | S_00B84C_LDS_SIZE(shader->config.lds_size); + program->variable_group_size = + sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0; + FREE(sel.tokens); + program->shader.selector = NULL; } else { const struct pipe_llvm_program_header *header; const char *code; @@ -607,14 +612,12 @@ static void si_setup_tgsi_grid(struct si_context *sctx, } } else { struct si_compute *program = sctx->cs_shader_state.program; - bool variable_group_size = - program->shader.selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0; - radeon_set_sh_reg_seq(cs, grid_size_reg, variable_group_size ? 6 : 3); + radeon_set_sh_reg_seq(cs, grid_size_reg, program->variable_group_size ? 6 : 3); radeon_emit(cs, info->grid[0]); radeon_emit(cs, info->grid[1]); radeon_emit(cs, info->grid[2]); - if (variable_group_size) { + if (program->variable_group_size) { radeon_emit(cs, info->block[0]); radeon_emit(cs, info->block[1]); radeon_emit(cs, info->block[2]); diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 313630a..c164eba 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -156,6 +156,12 @@ struct vc4_compiled_shader { bool disable_early_z; + /* Set if the compile failed, likely due to register allocation + * failure. In this case, we have no shader to run and should not try + * to do any draws. + */ + bool failed; + uint8_t num_inputs; /* Byte offsets for the start of the vertex attributes 0-7, and the @@ -449,7 +455,7 @@ void vc4_flush_jobs_reading_resource(struct vc4_context *vc4, void vc4_emit_state(struct pipe_context *pctx); void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c); struct qpu_reg *vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c); -void vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode); +bool vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode); bool vc4_rt_format_supported(enum pipe_format f); bool vc4_rt_format_is_565(enum pipe_format f); diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 61c5842..c5afc0c 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -307,7 +307,10 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } vc4_start_draw(vc4); - vc4_update_compiled_shaders(vc4, info->mode); + if (!vc4_update_compiled_shaders(vc4, info->mode)) { + debug_warn_once("shader compile failed, skipping draw call.\n"); + return; + } vc4_emit_state(pctx); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 0145488..05e2021 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -451,6 +451,15 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) struct qreg u0 = qir_uniform_f(c, 0.0f); struct qreg u1 = qir_uniform_f(c, 1.0f); if (c->key->tex[unit].compare_mode) { + /* From the GL_ARB_shadow spec: + * + * "Let Dt (D subscript t) be the depth texture + * value, in the range [0, 1]. Let R be the + * interpolated texture coordinate clamped to the + * range [0, 1]." + */ + compare = qir_SAT(c, compare); + switch (c->key->tex[unit].compare_func) { case PIPE_FUNC_NEVER: depth_output = qir_uniform_f(c, 0.0f); @@ -2437,9 +2446,15 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, } } - copy_uniform_state_to_shader(shader, c); - shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts, - c->qpu_inst_count * sizeof(uint64_t)); + shader->failed = c->failed; + if (c->failed) { + shader->failed = true; + } else { + copy_uniform_state_to_shader(shader, c); + shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts, + c->qpu_inst_count * + sizeof(uint64_t)); + } /* Copy the compiler UBO range state to the compiled shader, dropping * out arrays that were never referenced by an indirect load. @@ -2642,11 +2657,15 @@ vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode) } } -void +bool vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode) { vc4_update_compiled_fs(vc4, prim_mode); vc4_update_compiled_vs(vc4, prim_mode); + + return !(vc4->prog.cs->failed || + vc4->prog.vs->failed || + vc4->prog.fs->failed); } static uint32_t diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 4d41c42..c76aeb2 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -523,6 +523,7 @@ struct vc4_compile { uint32_t program_id; uint32_t variant_id; + bool failed; }; /* Special nir_load_input intrinsic index for loading the current TLB diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 4d371c0..eedee55 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -565,10 +565,13 @@ vc4_generate_code_block(struct vc4_compile *c, void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) { - struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c); struct qblock *start_block = list_first_entry(&c->blocks, struct qblock, link); + struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c); + if (!temp_registers) + return; + switch (c->stage) { case QSTAGE_VERT: case QSTAGE_COORD: diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index fc44764..ab343ee 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -247,7 +247,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) case QOP_ROT_MUL: assert(inst->src[0].file == QFILE_TEMP); - class_bits[inst->src[0].index] &= ~CLASS_BIT_R0_R3; + class_bits[inst->src[0].index] &= CLASS_BIT_R0_R3; break; default: @@ -323,7 +323,8 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) if (!ok) { fprintf(stderr, "Failed to register allocate:\n"); qir_dump(c); - abort(); + c->failed = true; + return NULL; } for (uint32_t i = 0; i < c->num_temps; i++) { diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c index 51c4117..88f4428 100644 --- a/src/gbm/backends/dri/gbm_dri.c +++ b/src/gbm/backends/dri/gbm_dri.c @@ -245,7 +245,7 @@ struct dri_extension_match { static struct dri_extension_match dri_core_extensions[] = { { __DRI2_FLUSH, 1, offsetof(struct gbm_dri_device, flush) }, { __DRI_IMAGE, 1, offsetof(struct gbm_dri_device, image) }, - { __DRI2_FENCE, 2, offsetof(struct gbm_dri_device, fence), 1 }, + { __DRI2_FENCE, 1, offsetof(struct gbm_dri_device, fence), 1 }, { NULL, 0, 0 } }; diff --git a/src/intel/common/gen_device_info.c b/src/intel/common/gen_device_info.c index 1dc1769..51fdf34 100644 --- a/src/intel/common/gen_device_info.c +++ b/src/intel/common/gen_device_info.c @@ -391,6 +391,7 @@ static const struct gen_device_info gen_device_info_bxt = { .urb = { .size = 192, .min_vs_entries = 34, + .min_ds_entries = 34, .max_vs_entries = 704, .max_tcs_entries = 256, .max_tes_entries = 416, @@ -413,6 +414,7 @@ static const struct gen_device_info gen_device_info_bxt_2x6 = { .urb = { .size = 128, .min_vs_entries = 34, + .min_ds_entries = 34, .max_vs_entries = 352, .max_tcs_entries = 128, .max_tes_entries = 208, @@ -473,6 +475,52 @@ static const struct gen_device_info gen_device_info_kbl_gt4 = { .num_slices = 3, }; +static const struct gen_device_info gen_device_info_glk = { + GEN9_FEATURES, + .is_broxton = 1, + .gt = 1, + .has_llc = false, + + .num_slices = 1, + .max_vs_threads = 112, + .max_tcs_threads = 112, + .max_tes_threads = 112, + .max_gs_threads = 112, + .max_cs_threads = 6 * 6, + .urb = { + .size = 192, + .min_vs_entries = 34, + .min_ds_entries = 34, + .max_vs_entries = 704, + .max_tcs_entries = 256, + .max_tes_entries = 416, + .max_gs_entries = 256, + } +}; + +static const struct gen_device_info gen_device_info_glk_2x6 = { + GEN9_FEATURES, + .is_broxton = 1, + .gt = 1, + .has_llc = false, + + .num_slices = 1, + .max_vs_threads = 56, /* XXX: guess */ + .max_tcs_threads = 56, /* XXX: guess */ + .max_tes_threads = 56, + .max_gs_threads = 56, + .max_cs_threads = 6 * 6, + .urb = { + .size = 128, + .min_vs_entries = 34, + .min_ds_entries = 34, + .max_vs_entries = 352, + .max_tcs_entries = 128, + .max_tes_entries = 208, + .max_gs_entries = 128, + } +}; + bool gen_get_device_info(int devid, struct gen_device_info *devinfo) { diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 7831c5e..32463b1 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -339,7 +339,7 @@ isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, if (width) *width = isl_align(*width, 2) * px_size_sa.width; if (height) - *height = isl_align(*height, 2) * px_size_sa.width; + *height = isl_align(*height, 2) * px_size_sa.height; } static enum isl_array_pitch_span diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 87f242c..d417469 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -787,7 +787,7 @@ void anv_CmdClearColorImage( unsigned base_layer = pRanges[r].baseArrayLayer; unsigned layer_count = pRanges[r].layerCount; - for (unsigned i = 0; i < pRanges[r].levelCount; i++) { + for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) { const unsigned level = pRanges[r].baseMipLevel + i; const unsigned level_width = anv_minify(image->extent.width, level); const unsigned level_height = anv_minify(image->extent.height, level); @@ -847,7 +847,7 @@ void anv_CmdClearDepthStencilImage( unsigned base_layer = pRanges[r].baseArrayLayer; unsigned layer_count = pRanges[r].layerCount; - for (unsigned i = 0; i < pRanges[r].levelCount; i++) { + for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) { const unsigned level = pRanges[r].baseMipLevel + i; const unsigned level_width = anv_minify(image->extent.width, level); const unsigned level_height = anv_minify(image->extent.height, level); @@ -1141,15 +1141,6 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; - /* FINISHME(perf): Skip clears for resolve attachments. - * - * From the Vulkan 1.0 spec: - * - * If the first use of an attachment in a render pass is as a resolve - * attachment, then the loadOp is effectively ignored as the resolve is - * guaranteed to overwrite all pixels in the render area. - */ - if (!subpass->has_resolve) return; @@ -1163,6 +1154,17 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) if (dst_att == VK_ATTACHMENT_UNUSED) continue; + if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) { + /* From the Vulkan 1.0 spec: + * + * If the first use of an attachment in a render pass is as a + * resolve attachment, then the loadOp is effectively ignored + * as the resolve is guaranteed to overwrite all pixels in the + * render area. + */ + cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0; + } + struct anv_image_view *src_iview = fb->attachments[src_att]; struct anv_image_view *dst_iview = fb->attachments[dst_att]; diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 7ff7dba..44ae67d 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -318,6 +318,9 @@ void anv_FreeCommandBuffers( for (uint32_t i = 0; i < commandBufferCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + if (!cmd_buffer) + continue; + anv_cmd_buffer_destroy(cmd_buffer); } } @@ -796,6 +799,9 @@ void anv_DestroyCommandPool( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + if (!pool) + return; + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link) { anv_cmd_buffer_destroy(cmd_buffer); diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 7d5a78d..17a1c8e 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -200,6 +200,9 @@ void anv_DestroyDescriptorSetLayout( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); + if (!set_layout) + return; + vk_free2(&device->alloc, pAllocator, set_layout); } @@ -282,6 +285,9 @@ void anv_DestroyPipelineLayout( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); + if (!pipeline_layout) + return; + vk_free2(&device->alloc, pAllocator, pipeline_layout); } @@ -355,6 +361,9 @@ void anv_DestroyDescriptorPool( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_descriptor_pool, pool, _pool); + if (!pool) + return; + anv_state_stream_finish(&pool->surface_state_stream); vk_free2(&device->alloc, pAllocator, pool); } @@ -546,6 +555,9 @@ VkResult anv_FreeDescriptorSets( for (uint32_t i = 0; i < count; i++) { ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + if (!set) + continue; + anv_descriptor_set_destroy(device, pool, set); } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index e83887c..125df22 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -896,6 +896,12 @@ VkResult anv_CreateDevice( pthread_mutex_init(&device->mutex, NULL); + pthread_condattr_t condattr; + pthread_condattr_init(&condattr); + pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC); + pthread_cond_init(&device->queue_submit, NULL); + pthread_condattr_destroy(&condattr); + anv_bo_pool_init(&device->batch_bo_pool, device); anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); @@ -1001,10 +1007,11 @@ VkResult anv_EnumerateInstanceExtensionProperties( return VK_SUCCESS; } - assert(*pPropertyCount >= ARRAY_SIZE(global_extensions)); + *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions)); + typed_memcpy(pProperties, global_extensions, *pPropertyCount); - *pPropertyCount = ARRAY_SIZE(global_extensions); - memcpy(pProperties, global_extensions, sizeof(global_extensions)); + if (*pPropertyCount < ARRAY_SIZE(global_extensions)) + return VK_INCOMPLETE; return VK_SUCCESS; } @@ -1020,10 +1027,11 @@ VkResult anv_EnumerateDeviceExtensionProperties( return VK_SUCCESS; } - assert(*pPropertyCount >= ARRAY_SIZE(device_extensions)); + *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions)); + typed_memcpy(pProperties, device_extensions, *pPropertyCount); - *pPropertyCount = ARRAY_SIZE(device_extensions); - memcpy(pProperties, device_extensions, sizeof(device_extensions)); + if (*pPropertyCount < ARRAY_SIZE(device_extensions)) + return VK_INCOMPLETE; return VK_SUCCESS; } @@ -1141,6 +1149,11 @@ VkResult anv_QueueSubmit( result = anv_device_execbuf(device, &fence->execbuf, &fence_bo); if (result != VK_SUCCESS) goto out; + + /* Update the fence and wake up any waiters */ + assert(fence->state == ANV_FENCE_STATE_RESET); + fence->state = ANV_FENCE_STATE_SUBMITTED; + pthread_cond_broadcast(&device->queue_submit); } out: @@ -1518,7 +1531,11 @@ VkResult anv_CreateFence( fence->execbuf.rsvd1 = device->context_id; fence->execbuf.rsvd2 = 0; - fence->ready = false; + if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) { + fence->state = ANV_FENCE_STATE_SIGNALED; + } else { + fence->state = ANV_FENCE_STATE_RESET; + } *pFence = anv_fence_to_handle(fence); @@ -1533,6 +1550,9 @@ void anv_DestroyFence( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_fence, fence, _fence); + if (!fence) + return; + assert(fence->bo.map == fence); anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); } @@ -1544,7 +1564,7 @@ VkResult anv_ResetFences( { for (uint32_t i = 0; i < fenceCount; i++) { ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - fence->ready = false; + fence->state = ANV_FENCE_STATE_RESET; } return VK_SUCCESS; @@ -1559,26 +1579,41 @@ VkResult anv_GetFenceStatus( int64_t t = 0; int ret; - if (fence->ready) - return VK_SUCCESS; + switch (fence->state) { + case ANV_FENCE_STATE_RESET: + /* If it hasn't even been sent off to the GPU yet, it's not ready */ + return VK_NOT_READY; - ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == 0) { - fence->ready = true; + case ANV_FENCE_STATE_SIGNALED: + /* It's been signaled, return success */ return VK_SUCCESS; - } - return VK_NOT_READY; + case ANV_FENCE_STATE_SUBMITTED: + /* It's been submitted to the GPU but we don't know if it's done yet. */ + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == 0) { + fence->state = ANV_FENCE_STATE_SIGNALED; + return VK_SUCCESS; + } else { + return VK_NOT_READY; + } + default: + unreachable("Invalid fence status"); + } } +#define NSEC_PER_SEC 1000000000 +#define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1) + VkResult anv_WaitForFences( VkDevice _device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, - uint64_t timeout) + uint64_t _timeout) { ANV_FROM_HANDLE(anv_device, device, _device); + int ret; /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed * to block indefinitely timeouts <= 0. Unfortunately, this was broken @@ -1587,22 +1622,107 @@ VkResult anv_WaitForFences( * best we can do is to clamp the timeout to INT64_MAX. This limits the * maximum timeout from 584 years to 292 years - likely not a big deal. */ - if (timeout > INT64_MAX) - timeout = INT64_MAX; - - int64_t t = timeout; + int64_t timeout = MIN2(_timeout, INT64_MAX); + + uint32_t pending_fences = fenceCount; + while (pending_fences) { + pending_fences = 0; + bool signaled_fences = false; + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + switch (fence->state) { + case ANV_FENCE_STATE_RESET: + /* This fence hasn't been submitted yet, we'll catch it the next + * time around. Yes, this may mean we dead-loop but, short of + * lots of locking and a condition variable, there's not much that + * we can do about that. + */ + pending_fences++; + continue; + + case ANV_FENCE_STATE_SIGNALED: + /* This fence is not pending. If waitAll isn't set, we can return + * early. Otherwise, we have to keep going. + */ + if (!waitAll) + return VK_SUCCESS; + continue; + + case ANV_FENCE_STATE_SUBMITTED: + /* These are the fences we really care about. Go ahead and wait + * on it until we hit a timeout. + */ + ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout); + if (ret == -1 && errno == ETIME) { + return VK_TIMEOUT; + } else if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m"); + } else { + fence->state = ANV_FENCE_STATE_SIGNALED; + signaled_fences = true; + if (!waitAll) + return VK_SUCCESS; + continue; + } + } + } - /* FIXME: handle !waitAll */ + if (pending_fences && !signaled_fences) { + /* If we've hit this then someone decided to vkWaitForFences before + * they've actually submitted any of them to a queue. This is a + * fairly pessimal case, so it's ok to lock here and use a standard + * pthreads condition variable. + */ + pthread_mutex_lock(&device->mutex); + + /* It's possible that some of the fences have changed state since the + * last time we checked. Now that we have the lock, check for + * pending fences again and don't wait if it's changed. + */ + uint32_t now_pending_fences = 0; + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + if (fence->state == ANV_FENCE_STATE_RESET) + now_pending_fences++; + } + assert(now_pending_fences <= pending_fences); + + if (now_pending_fences == pending_fences) { + struct timespec before; + clock_gettime(CLOCK_MONOTONIC, &before); + + uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC; + uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) + + (timeout / NSEC_PER_SEC); + abs_nsec %= NSEC_PER_SEC; + + /* Avoid roll-over in tv_sec on 32-bit systems if the user + * provided timeout is UINT64_MAX + */ + struct timespec abstime; + abstime.tv_nsec = abs_nsec; + abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec)); + + ret = pthread_cond_timedwait(&device->queue_submit, + &device->mutex, &abstime); + assert(ret != EINVAL); + + struct timespec after; + clock_gettime(CLOCK_MONOTONIC, &after); + uint64_t time_elapsed = + ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) - + ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec); + + if (time_elapsed >= timeout) { + pthread_mutex_unlock(&device->mutex); + return VK_TIMEOUT; + } + + timeout -= time_elapsed; + } - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - int ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == -1 && errno == ETIME) { - return VK_TIMEOUT; - } else if (ret == -1) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "gem wait failed: %m"); + pthread_mutex_unlock(&device->mutex); } } @@ -1672,6 +1792,9 @@ void anv_DestroyEvent( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_event, event, _event); + if (!event) + return; + anv_state_pool_free(&device->dynamic_state_pool, event->state); } @@ -1764,6 +1887,9 @@ void anv_DestroyBuffer( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + if (!buffer) + return; + vk_free2(&device->alloc, pAllocator, buffer); } @@ -1791,6 +1917,9 @@ void anv_DestroySampler( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); + if (!sampler) + return; + vk_free2(&device->alloc, pAllocator, sampler); } @@ -1835,5 +1964,8 @@ void anv_DestroyFramebuffer( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); + if (!fb) + return; + vk_free2(&device->alloc, pAllocator, fb); } diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index f691554..875a7d3 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -463,6 +463,9 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( uint32_t maxArraySize; VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; + if (anv_formats[format].isl_format == ISL_FORMAT_UNSUPPORTED) + goto unsupported; + anv_physical_device_get_format_properties(physical_device, format, &format_props); diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index d4ed325..9f843b3 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -42,6 +42,8 @@ void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer); +void genX(cmd_buffer_emit_gen7_depth_flush)(struct anv_cmd_buffer *cmd_buffer); + void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index b7c2e99..4a4d87e 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -275,8 +275,12 @@ anv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image, image, _image); + + if (!image) + return; - vk_free2(&device->alloc, pAllocator, anv_image_from_handle(_image)); + vk_free2(&device->alloc, pAllocator, image); } VkResult anv_BindImageMemory( @@ -565,6 +569,9 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview, ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_image_view, iview, _iview); + if (!iview) + return; + if (iview->color_rt_surface_state.alloc_size > 0) { anv_state_pool_free(&device->surface_state_pool, iview->color_rt_surface_state); @@ -655,6 +662,9 @@ anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer_view, view, bufferView); + if (!view) + return; + if (view->surface_state.alloc_size > 0) anv_state_pool_free(&device->surface_state_pool, view->surface_state); diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c index 1c50e2b..c356e84 100644 --- a/src/intel/vulkan/anv_intel.c +++ b/src/intel/vulkan/anv_intel.c @@ -55,7 +55,7 @@ VkResult anv_CreateDmaBufImageINTEL( goto fail; } - uint64_t size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + uint64_t size = (uint64_t)pCreateInfo->strideInBytes * pCreateInfo->extent.height; anv_bo_init(&mem->bo, gem_handle, size); diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c index 6eaa5c8..1f35a42 100644 --- a/src/intel/vulkan/anv_pass.c +++ b/src/intel/vulkan/anv_pass.c @@ -146,6 +146,9 @@ void anv_DestroyRenderPass( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_render_pass, pass, _pass); + if (!pass) + return; + vk_free2(&device->alloc, pAllocator, pass->subpass_attachments); vk_free2(&device->alloc, pAllocator, pass); } diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 4b8020a..e543c98 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -75,6 +75,9 @@ void anv_DestroyShaderModule( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_shader_module, module, _module); + if (!module) + return; + vk_free2(&device->alloc, pAllocator, module); } @@ -189,6 +192,9 @@ void anv_DestroyPipeline( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + if (!pipeline) + return; + anv_reloc_list_finish(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); if (pipeline->blend_state.map) diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index ff6e651..ddd51db 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -454,6 +454,9 @@ void anv_DestroyPipelineCache( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + if (!cache) + return; + anv_pipeline_cache_finish(cache); vk_free2(&device->alloc, pAllocator, cache); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 31b4766..06cdc0a 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -577,6 +577,7 @@ struct anv_device { uint32_t default_mocs; pthread_mutex_t mutex; + pthread_cond_t queue_submit; }; void anv_device_get_cache_uuid(void *uuid); @@ -1251,11 +1252,23 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); +enum anv_fence_state { + /** Indicates that this is a new (or newly reset fence) */ + ANV_FENCE_STATE_RESET, + + /** Indicates that this fence has been submitted to the GPU but is still + * (as far as we know) in use by the GPU. + */ + ANV_FENCE_STATE_SUBMITTED, + + ANV_FENCE_STATE_SIGNALED, +}; + struct anv_fence { struct anv_bo bo; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec2_objects[1]; - bool ready; + enum anv_fence_state state; }; struct anv_event { diff --git a/src/intel/vulkan/anv_query.c b/src/intel/vulkan/anv_query.c index 4afdaaf..293257b 100644 --- a/src/intel/vulkan/anv_query.c +++ b/src/intel/vulkan/anv_query.c @@ -87,6 +87,9 @@ void anv_DestroyQueryPool( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_query_pool, pool, _pool); + if (!pool) + return; + anv_gem_munmap(pool->bo.map, pool->bo.size); anv_gem_close(device, pool->bo.gem_handle); vk_free2(&device->alloc, pAllocator, pool); diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index 064581d..c504658 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -76,6 +76,9 @@ void anv_DestroySurfaceKHR( ANV_FROM_HANDLE(anv_instance, instance, _instance); ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + if (!surface) + return; + vk_free2(&instance->alloc, pAllocator, surface); } @@ -294,6 +297,9 @@ void anv_DestroySwapchainKHR( ANV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); const VkAllocationCallbacks *alloc; + if (!swapchain) + return; + if (pAllocator) alloc = pAllocator; else @@ -323,13 +329,20 @@ VkResult anv_AcquireNextImageKHR( VkSwapchainKHR _swapchain, uint64_t timeout, VkSemaphore semaphore, - VkFence fence, + VkFence _fence, uint32_t* pImageIndex) { ANV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + + VkResult result = swapchain->acquire_next_image(swapchain, timeout, + semaphore, pImageIndex); - return swapchain->acquire_next_image(swapchain, timeout, semaphore, - pImageIndex); + /* Thanks to implicit sync, the image is ready immediately. */ + if (fence) + fence->state = ANV_FENCE_STATE_SIGNALED; + + return result; } VkResult anv_QueuePresentKHR( diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 0548a5e..f1dfe7b 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -513,6 +513,25 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer *cmd_buffer, } } +/* Set of stage bits for which are pipelined, i.e. they get queued by the + * command streamer for later execution. + */ +#define ANV_PIPELINE_STAGE_PIPELINED_BITS \ + (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | \ + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \ + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \ + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \ + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \ + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | \ + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | \ + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | \ + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | \ + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | \ + VK_PIPELINE_STAGE_TRANSFER_BIT | \ + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | \ + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | \ + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) + void genX(CmdSetEvent)( VkCommandBuffer commandBuffer, VkEvent _event, @@ -522,6 +541,11 @@ void genX(CmdSetEvent)( ANV_FROM_HANDLE(anv_event, event, _event); anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { + pc.StallAtPixelScoreboard = true; + pc.CommandStreamerStallEnable = true; + } + pc.DestinationAddressType = DAT_PPGTT, pc.PostSyncOperation = WriteImmediateData, pc.Address = (struct anv_address) { @@ -541,6 +565,11 @@ void genX(CmdResetEvent)( ANV_FROM_HANDLE(anv_event, event, _event); anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { + pc.StallAtPixelScoreboard = true; + pc.CommandStreamerStallEnable = true; + } + pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WriteImmediateData; pc.Address = (struct anv_address) { diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 185aff6..cd9780d 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -164,6 +164,8 @@ genX(blorp_exec)(struct blorp_batch *batch, genX(flush_pipeline_select_3d)(cmd_buffer); + genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer); + blorp_exec(batch, params); cmd_buffer->state.vb_dirty = ~0; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 2bc7e74..f1b5387 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1343,12 +1343,22 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state surfaces = { 0, }, samplers = { 0, }; VkResult result; - result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; + if (result != VK_SUCCESS) { + result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); + assert(result == VK_SUCCESS); + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + genX(cmd_buffer_emit_state_base_address)(cmd_buffer); + + result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces); + assert(result == VK_SUCCESS); + } + result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers); + assert(result == VK_SUCCESS); + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); @@ -1408,8 +1418,20 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) genX(flush_pipeline_select_gpgpu)(cmd_buffer); - if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) { + /* From the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE: + * + * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless + * the only bits that are changed are scoreboard related: Scoreboard + * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For + * these scoreboard related states, a MEDIA_STATE_FLUSH is + * sufficient." + */ + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { @@ -1661,6 +1683,35 @@ genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer) } } +void +genX(cmd_buffer_emit_gen7_depth_flush)(struct anv_cmd_buffer *cmd_buffer) +{ + if (GEN_GEN >= 8) + return; + + /* From the Haswell PRM, documentation for 3DSTATE_DEPTH_BUFFER: + * + * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e., any + * combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, + * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first + * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit + * set), followed by a pipelined depth cache flush (PIPE_CONTROL with + * Depth Flush Bit set, followed by another pipelined depth stall + * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise + * guarantee that the pipeline from WM onwards is already flushed (e.g., + * via a preceding MI_FLUSH)." + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { + pipe.DepthStallEnable = true; + } + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { + pipe.DepthCacheFlushEnable = true; + } + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { + pipe.DepthStallEnable = true; + } +} + static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { @@ -1677,6 +1728,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) /* FIXME: Implement the PMA stall W/A */ /* FIXME: Width and Height are wrong */ + genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer); + /* Emit 3DSTATE_DEPTH_BUFFER */ if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), db) { diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index e7dcf47..c4493d4 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -231,7 +231,7 @@ brw_upload_cs_prog(struct brw_context *brw) &brw->cs.base.prog_data)) { bool success = brw_codegen_cs_prog(brw, - ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE], + ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE], cp, &key); (void) success; assert(success); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 4baadc9..e4102c6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1984,7 +1984,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, } else { for (unsigned i = 0; i < num_components; i++) { bld.MOV(offset(dst, bld, i), - fs_reg(ATTR, imm_offset + i, dst.type)); + fs_reg(ATTR, imm_offset + i + first_component, dst.type)); } } return; diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index 0eed8f9..b649072 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -213,7 +213,7 @@ static void upload_default_color(struct brw_context *brw, const struct gl_sampler_object *sampler, mesa_format format, GLenum base_format, - bool is_integer_format, + bool is_integer_format, bool is_stencil_sampling, uint32_t *sdc_offset) { union gl_color_union color; @@ -277,7 +277,7 @@ upload_default_color(struct brw_context *brw, uint32_t *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, 4 * 4, 64, sdc_offset); memcpy(sdc, color.ui, 4 * 4); - } else if (brw->is_haswell && is_integer_format) { + } else if (brw->is_haswell && (is_integer_format || is_stencil_sampling)) { /* Haswell's integer border color support is completely insane: * SAMPLER_BORDER_COLOR_STATE is 20 DWords. The first four are * for float colors. The next 12 DWords are MBZ and only exist to @@ -291,10 +291,9 @@ upload_default_color(struct brw_context *brw, memset(sdc, 0, 20 * 4); sdc = &sdc[16]; + bool stencil = format == MESA_FORMAT_S_UINT8 || is_stencil_sampling; const int bits_per_channel = - _mesa_get_format_bits(format, - format == MESA_FORMAT_S_UINT8 ? - GL_STENCIL_BITS : GL_RED_BITS); + _mesa_get_format_bits(format, stencil ? GL_STENCIL_BITS : GL_RED_BITS); /* From the Haswell PRM, "Command Reference: Structures", Page 36: * "If any color channel is missing from the surface format, @@ -389,12 +388,13 @@ upload_default_color(struct brw_context *brw, * Sets the sampler state for a single unit based off of the sampler key * entry. */ -void +static void brw_update_sampler_state(struct brw_context *brw, GLenum target, bool tex_cube_map_seamless, GLfloat tex_unit_lod_bias, mesa_format format, GLenum base_format, bool is_integer_format, + bool is_stencil_sampling, const struct gl_sampler_object *sampler, uint32_t *sampler_state, uint32_t batch_offset_for_sampler_state) @@ -516,8 +516,8 @@ brw_update_sampler_state(struct brw_context *brw, if (wrap_mode_needs_border_color(wrap_s) || wrap_mode_needs_border_color(wrap_t) || wrap_mode_needs_border_color(wrap_r)) { - upload_default_color(brw, sampler, - format, base_format, is_integer_format, + upload_default_color(brw, sampler, format, base_format, + is_integer_format, is_stencil_sampling, &border_color_offset); } @@ -555,7 +555,7 @@ update_sampler_state(struct brw_context *brw, brw_update_sampler_state(brw, texObj->Target, ctx->Texture.CubeMapSeamless, texUnit->LodBias, firstImage->TexFormat, firstImage->_BaseFormat, - texObj->_IsIntegerFormat, + texObj->_IsIntegerFormat, texObj->StencilSampling, sampler, sampler_state, batch_offset_for_sampler_state); } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b42b9af..b8aa97b 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -337,15 +337,6 @@ void brw_emit_sampler_state(struct brw_context *brw, bool non_normalized_coordinates, uint32_t border_color_offset); -void brw_update_sampler_state(struct brw_context *brw, - GLenum target, bool tex_cube_map_seamless, - GLfloat tex_unit_lod_bias, - mesa_format format, GLenum base_format, - bool is_integer_format, - const struct gl_sampler_object *sampler, - uint32_t *sampler_state, - uint32_t batch_offset_for_sampler_state); - /* gen6_wm_state.c */ void gen6_upload_wm_state(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 59c7d21..b0ee289 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -626,7 +626,8 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar); prog_data->include_primitive_id = - (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0; + (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) || + (shader->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)); prog_data->invocations = shader->info.gs.invocations; diff --git a/src/mesa/main/arbprogram.c b/src/mesa/main/arbprogram.c index 911b6fa..53bd5e3 100644 --- a/src/mesa/main/arbprogram.c +++ b/src/mesa/main/arbprogram.c @@ -41,11 +41,6 @@ #include "program/program.h" #include "program/prog_print.h" -#ifdef _MSC_VER -#include <stdlib.h> -#define PATH_MAX _MAX_PATH -#endif - /** * Bind a program (make it current) * \note Called from the GL API dispatcher by both glBindProgramNV @@ -388,12 +383,12 @@ _mesa_ProgramStringARB(GLenum target, GLenum format, GLsizei len, const char *capture_path = _mesa_get_shader_capture_path(); if (capture_path != NULL) { FILE *file; - char filename[PATH_MAX]; const char *shader_type = target == GL_FRAGMENT_PROGRAM_ARB ? "fragment" : "vertex"; + char *filename = + ralloc_asprintf(NULL, "%s/%cp-%u.shader_test", + capture_path, shader_type[0], base->Id); - _mesa_snprintf(filename, sizeof(filename), "%s/%cp-%u.shader_test", - capture_path, shader_type[0], base->Id); file = fopen(filename, "w"); if (file) { fprintf(file, @@ -403,6 +398,7 @@ _mesa_ProgramStringARB(GLenum target, GLenum format, GLsizei len, } else { _mesa_warning(ctx, "Failed to open %s", filename); } + ralloc_free(filename); } } diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c index 7d8a5fd..670438a 100644 --- a/src/mesa/main/hash.c +++ b/src/mesa/main/hash.c @@ -59,7 +59,6 @@ struct _mesa_HashTable { struct hash_table *ht; GLuint MaxKey; /**< highest key inserted so far */ mtx_t Mutex; /**< mutual exclusion lock */ - mtx_t WalkMutex; /**< for _mesa_HashWalk() */ GLboolean InDeleteAll; /**< Debug check */ /** Value that would be in the table for DELETED_KEY_VALUE. */ void *deleted_key_data; @@ -129,8 +128,11 @@ _mesa_NewHashTable(void) } _mesa_hash_table_set_deleted_key(table->ht, uint_key(DELETED_KEY_VALUE)); - mtx_init(&table->Mutex, mtx_plain); - mtx_init(&table->WalkMutex, mtx_plain); + /* + * Needs to be recursive, since the callback in _mesa_HashWalk() + * is allowed to call _mesa_HashRemove(). + */ + mtx_init(&table->Mutex, mtx_recursive); } else { _mesa_error_no_memory(__func__); @@ -161,7 +163,6 @@ _mesa_DeleteHashTable(struct _mesa_HashTable *table) _mesa_hash_table_destroy(table->ht, NULL); mtx_destroy(&table->Mutex); - mtx_destroy(&table->WalkMutex); free(table); } @@ -401,11 +402,6 @@ _mesa_HashDeleteAll(struct _mesa_HashTable *table, /** * Walk over all entries in a hash table, calling callback function for each. - * Note: we use a separate mutex in this function to avoid a recursive - * locking deadlock (in case the callback calls _mesa_HashRemove()) and to - * prevent multiple threads/contexts from getting tangled up. - * A lock-less version of this function could be used when the table will - * not be modified. * \param table the hash table to walk * \param callback the callback function * \param userData arbitrary pointer to pass along to the callback @@ -422,13 +418,13 @@ _mesa_HashWalk(const struct _mesa_HashTable *table, assert(table); assert(callback); - mtx_lock(&table2->WalkMutex); + mtx_lock(&table2->Mutex); hash_table_foreach(table->ht, entry) { callback((uintptr_t)entry->key, entry->data, userData); } if (table->deleted_key_data) callback(DELETED_KEY_VALUE, table->deleted_key_data, userData); - mtx_unlock(&table2->WalkMutex); + mtx_unlock(&table2->Mutex); } static void diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index 8229840..310b745 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -645,7 +645,8 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params) *params = pipe->ActiveProgram ? pipe->ActiveProgram->Name : 0; return; case GL_INFO_LOG_LENGTH: - *params = pipe->InfoLog ? strlen(pipe->InfoLog) + 1 : 0; + *params = (pipe->InfoLog && pipe->InfoLog[0] != '\0') ? + strlen(pipe->InfoLog) + 1 : 0; return; case GL_VALIDATE_STATUS: *params = pipe->Validated; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index c40bb2d..15f324b 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -60,11 +60,6 @@ #include "util/hash_table.h" #include "util/mesa-sha1.h" -#ifdef _MSC_VER -#include <stdlib.h> -#define PATH_MAX _MAX_PATH -#endif - /** * Return mask of GLSL_x flags by examining the MESA_GLSL env var. */ @@ -112,13 +107,6 @@ _mesa_get_shader_capture_path(void) if (!read_env_var) { path = getenv("MESA_SHADER_CAPTURE_PATH"); read_env_var = true; - if (path && - strlen(path) > PATH_MAX - strlen("/fp-4294967295.shader_test")) { - GET_CURRENT_CONTEXT(ctx); - _mesa_warning(ctx, "MESA_SHADER_CAPTURE_PATH too long; ignoring " - "request to capture shaders"); - path = NULL; - } } return path; @@ -654,7 +642,8 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, *params = shProg->Validated; return; case GL_INFO_LOG_LENGTH: - *params = shProg->InfoLog ? strlen(shProg->InfoLog) + 1 : 0; + *params = (shProg->InfoLog && shProg->InfoLog[0] != '\0') ? + strlen(shProg->InfoLog) + 1 : 0; return; case GL_ATTACHED_SHADERS: *params = shProg->NumShaders; @@ -902,7 +891,8 @@ get_shaderiv(struct gl_context *ctx, GLuint name, GLenum pname, GLint *params) *params = shader->CompileStatus; break; case GL_INFO_LOG_LENGTH: - *params = shader->InfoLog ? strlen(shader->InfoLog) + 1 : 0; + *params = (shader->InfoLog && shader->InfoLog[0] != '\0') ? + strlen(shader->InfoLog) + 1 : 0; break; case GL_SHADER_SOURCE_LENGTH: *params = shader->Source ? strlen((char *) shader->Source) + 1 : 0; @@ -1101,11 +1091,8 @@ _mesa_link_program(struct gl_context *ctx, struct gl_shader_program *shProg) const char *capture_path = _mesa_get_shader_capture_path(); if (shProg->Name != 0 && shProg->Name != ~0 && capture_path != NULL) { FILE *file; - char filename[PATH_MAX]; - - _mesa_snprintf(filename, sizeof(filename), "%s/%u.shader_test", - capture_path, shProg->Name); - + char *filename = ralloc_asprintf(NULL, "%s/%u.shader_test", + capture_path, shProg->Name); file = fopen(filename, "w"); if (file) { fprintf(file, "[require]\nGLSL%s >= %u.%02u\n", @@ -1124,6 +1111,8 @@ _mesa_link_program(struct gl_context *ctx, struct gl_shader_program *shProg) } else { _mesa_warning(ctx, "Failed to open %s", filename); } + + ralloc_free(filename); } if (shProg->LinkStatus == GL_FALSE && @@ -1618,9 +1607,9 @@ generate_sha1(const char *source, char sha_str[64]) * * <path>/<stage prefix>_<CHECKSUM>.glsl */ -static void +static char * construct_name(const gl_shader_stage stage, const char *source, - const char *path, char *name, unsigned length) + const char *path) { char sha[64]; static const char *types[] = { @@ -1628,8 +1617,7 @@ construct_name(const gl_shader_stage stage, const char *source, }; generate_sha1(source, sha); - _mesa_snprintf(name, length, "%s/%s_%s.glsl", path, types[stage], - sha); + return ralloc_asprintf(NULL, "%s/%s_%s.glsl", path, types[stage], sha); } /** @@ -1638,7 +1626,6 @@ construct_name(const gl_shader_stage stage, const char *source, static void dump_shader(const gl_shader_stage stage, const char *source) { - char name[PATH_MAX]; static bool path_exists = true; char *dump_path; FILE *f; @@ -1652,7 +1639,7 @@ dump_shader(const gl_shader_stage stage, const char *source) return; } - construct_name(stage, source, dump_path, name, PATH_MAX); + char *name = construct_name(stage, source, dump_path); f = fopen(name, "w"); if (f) { @@ -1663,6 +1650,7 @@ dump_shader(const gl_shader_stage stage, const char *source) _mesa_warning(ctx, "could not open %s for dumping shader (%s)", name, strerror(errno)); } + ralloc_free(name); } /** @@ -1672,7 +1660,6 @@ dump_shader(const gl_shader_stage stage, const char *source) static GLcharARB * read_shader(const gl_shader_stage stage, const char *source) { - char name[PATH_MAX]; char *read_path; static bool path_exists = true; int len, shader_size = 0; @@ -1688,9 +1675,9 @@ read_shader(const gl_shader_stage stage, const char *source) return NULL; } - construct_name(stage, source, read_path, name, PATH_MAX); - + char *name = construct_name(stage, source, read_path); f = fopen(name, "r"); + ralloc_free(name); if (!f) return NULL; diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index 196ee28..a8130ce 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -30,6 +30,7 @@ #include <unistd.h> #include <errno.h> #include <string.h> +#include <pthread.h> #include "wsi_common_wayland.h" @@ -41,8 +42,6 @@ memcpy((dest), (src), (count) * sizeof(*(src))); \ }) -#define MIN_NUM_IMAGES 2 - struct wsi_wayland; struct wsi_wl_display { @@ -321,6 +320,8 @@ wsi_wl_get_display(struct wsi_device *wsi_device, pthread_mutex_unlock(&wsi->mutex); struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display); + if (!display) + return NULL; pthread_mutex_lock(&wsi->mutex); @@ -366,8 +367,16 @@ static VkResult wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface, VkSurfaceCapabilitiesKHR* caps) { - caps->minImageCount = MIN_NUM_IMAGES; - caps->maxImageCount = 4; + /* For true mailbox mode, we need at least 4 images: + * 1) One to scan out from + * 2) One to have queued for scan-out + * 3) One to be currently held by the Wayland compositor + * 4) One to render to + */ + caps->minImageCount = 4; + /* There is no real maximum */ + caps->maxImageCount = 0; + caps->currentExtent = (VkExtent2D) { -1, -1 }; caps->minImageExtent = (VkExtent2D) { 1, 1 }; caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; @@ -397,6 +406,8 @@ wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface, VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; struct wsi_wl_display *display = wsi_wl_get_display(wsi_device, surface->display); + if (!display) + return VK_ERROR_OUT_OF_HOST_MEMORY; uint32_t count = u_vector_length(&display->formats); @@ -487,19 +498,25 @@ wsi_wl_swapchain_get_images(struct wsi_swapchain *wsi_chain, uint32_t *pCount, VkImage *pSwapchainImages) { struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain; + uint32_t ret_count; + VkResult result; if (pSwapchainImages == NULL) { *pCount = chain->image_count; return VK_SUCCESS; } - assert(chain->image_count <= *pCount); - for (uint32_t i = 0; i < chain->image_count; i++) - pSwapchainImages[i] = chain->images[i].image; + result = VK_SUCCESS; + ret_count = chain->image_count; + if (chain->image_count > *pCount) { + ret_count = *pCount; + result = VK_INCOMPLETE; + } - *pCount = chain->image_count; + for (uint32_t i = 0; i < ret_count; i++) + pSwapchainImages[i] = chain->images[i].image; - return VK_SUCCESS; + return result; } static VkResult @@ -685,17 +702,6 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, int num_images = pCreateInfo->minImageCount; - assert(num_images >= MIN_NUM_IMAGES); - - /* For true mailbox mode, we need at least 4 images: - * 1) One to scan out from - * 2) One to have queued for scan-out - * 3) One to be currently held by the Wayland compositor - * 4) One to render to - */ - if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) - num_images = MAX2(num_images, 4); - size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); chain = vk_alloc(pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -826,6 +832,10 @@ wsi_wl_finish_wsi(struct wsi_device *wsi_device, (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; if (wsi) { + struct hash_entry *entry; + hash_table_foreach(wsi->displays, entry) + wsi_wl_display_destroy(wsi, entry->data); + _mesa_hash_table_destroy(wsi->displays, NULL); pthread_mutex_destroy(&wsi->mutex); diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c index 2280651..09718eb 100644 --- a/src/vulkan/wsi/wsi_common_x11.c +++ b/src/vulkan/wsi/wsi_common_x11.c @@ -371,8 +371,16 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; } + /* For true mailbox mode, we need at least 4 images: + * 1) One to scan out from + * 2) One to have queued for scan-out + * 3) One to be currently held by the X server + * 4) One to render to + */ caps->minImageCount = 2; - caps->maxImageCount = 4; + /* There is no real maximum */ + caps->maxImageCount = 0; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; caps->maxImageArrayLayers = 1; @@ -396,11 +404,11 @@ x11_surface_get_formats(VkIcdSurfaceBase *surface, return VK_SUCCESS; } - assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats)); + *pSurfaceFormatCount = MIN2(*pSurfaceFormatCount, ARRAY_SIZE(formats)); typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount); - *pSurfaceFormatCount = ARRAY_SIZE(formats); - return VK_SUCCESS; + return *pSurfaceFormatCount < ARRAY_SIZE(formats) ? + VK_INCOMPLETE : VK_SUCCESS; } static VkResult @@ -413,11 +421,11 @@ x11_surface_get_present_modes(VkIcdSurfaceBase *surface, return VK_SUCCESS; } - assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + *pPresentModeCount = MIN2(*pPresentModeCount, ARRAY_SIZE(present_modes)); typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); - *pPresentModeCount = ARRAY_SIZE(present_modes); - return VK_SUCCESS; + return *pPresentModeCount < ARRAY_SIZE(present_modes) ? + VK_INCOMPLETE : VK_SUCCESS; } VkResult wsi_create_xcb_surface(const VkAllocationCallbacks *pAllocator, @@ -490,19 +498,25 @@ x11_get_images(struct wsi_swapchain *anv_chain, uint32_t* pCount, VkImage *pSwapchainImages) { struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + uint32_t ret_count; + VkResult result; if (pSwapchainImages == NULL) { *pCount = chain->image_count; return VK_SUCCESS; } - assert(chain->image_count <= *pCount); - for (uint32_t i = 0; i < chain->image_count; i++) - pSwapchainImages[i] = chain->images[i].image; + result = VK_SUCCESS; + ret_count = chain->image_count; + if (chain->image_count > *pCount) { + ret_count = *pCount; + result = VK_INCOMPLETE; + } - *pCount = chain->image_count; + for (uint32_t i = 0; i < ret_count; i++) + pSwapchainImages[i] = chain->images[i].image; - return VK_SUCCESS; + return result; } static VkResult @@ -737,16 +751,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); - int num_images = pCreateInfo->minImageCount; - - /* For true mailbox mode, we need at least 4 images: - * 1) One to scan out from - * 2) One to have queued for scan-out - * 3) One to be currently held by the Wayland compositor - * 4) One to render to - */ - if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) - num_images = MAX2(num_images, 4); + const unsigned num_images = pCreateInfo->minImageCount; size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); chain = vk_alloc(pAllocator, size, 8, |