diff --git a/src/overlay.cpp b/src/overlay.cpp index 9a4bbfad..2f5ce940 100644 --- a/src/overlay.cpp +++ b/src/overlay.cpp @@ -71,7 +71,6 @@ struct instance_data { VkInstance instance; struct overlay_params params; - bool pipeline_statistics_enabled; bool first_line_printed; @@ -103,9 +102,6 @@ struct device_data { struct queue_data *graphic_queue; std::vector queues; - - /* For a single frame */ - struct frame_stat frame_stats; }; /* Mapped from VkCommandBuffer */ @@ -116,10 +112,6 @@ struct command_buffer_data { VkCommandBufferLevel level; VkCommandBuffer cmd_buffer; - VkQueryPool timestamp_query_pool; - uint32_t query_index; - - struct frame_stat stats; struct queue_data *queue_data; }; @@ -131,11 +123,6 @@ struct queue_data { VkQueue queue; VkQueueFlags flags; uint32_t family_index; - uint64_t timestamp_mask; - - VkFence queries_fence; - - std::list running_command_buffer; }; struct overlay_draw { @@ -211,20 +198,6 @@ struct swapchain_data { struct frame_stat accumulated_stats; }; -static const VkQueryPipelineStatisticFlags overlay_query_flags = - VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | - VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | - VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | - VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | - VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | - VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | - VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | - VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | - VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | - VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | - VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT; -#define OVERLAY_QUERY_COUNT (11) - // single global lock, for simplicity std::mutex global_lock; typedef std::lock_guard scoped_lock; @@ -290,38 +263,6 @@ static VkLayerDeviceCreateInfo *get_device_chain_info(const VkDeviceCreateInfo * return NULL; } -static struct VkBaseOutStructure * -clone_chain(const struct VkBaseInStructure *chain) -{ - struct VkBaseOutStructure *head = NULL, *tail = NULL; - - vk_foreach_struct_const(item, chain) { - size_t item_size = vk_structure_type_size(item); - struct VkBaseOutStructure *new_item = - (struct VkBaseOutStructure *)malloc(item_size);; - - memcpy(new_item, item, item_size); - - if (!head) - head = new_item; - if (tail) - tail->pNext = new_item; - tail = new_item; - } - - return head; -} - -static void -free_chain(struct VkBaseOutStructure *chain) -{ - while (chain) { - void *node = chain; - chain = chain->pNext; - free(node); - } -} - /**/ static struct instance_data *new_instance_data(VkInstance instance) @@ -381,19 +322,9 @@ static struct queue_data *new_queue_data(VkQueue queue, data->device = device_data; data->queue = queue; data->flags = family_props->queueFlags; - data->timestamp_mask = (1ull << family_props->timestampValidBits) - 1; data->family_index = family_index; map_object(HKEY(data->queue), data); - /* Fence synchronizing access to queries on that queue. */ - VkFenceCreateInfo fence_info = {}; - fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; - VK_CHECK(device_data->vtable.CreateFence(device_data->device, - &fence_info, - NULL, - &data->queries_fence)); - if (data->flags & VK_QUEUE_GRAPHICS_BIT) device_data->graphic_queue = data; @@ -402,8 +333,6 @@ static struct queue_data *new_queue_data(VkQueue queue, static void destroy_queue(struct queue_data *data) { - struct device_data *device_data = data->device; - device_data->vtable.DestroyFence(device_data->device, data->queries_fence, NULL); unmap_object(HKEY(data->queue)); delete data; } @@ -458,16 +387,12 @@ static void destroy_device_data(struct device_data *data) /**/ static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer, VkCommandBufferLevel level, - VkQueryPool timestamp_query_pool, - uint32_t query_index, struct device_data *device_data) { struct command_buffer_data *data = new command_buffer_data(); data->device = device_data; data->cmd_buffer = cmd_buffer; data->level = level; - data->timestamp_query_pool = timestamp_query_pool; - data->query_index = query_index; map_object(HKEY(data->cmd_buffer), data); return data; } @@ -475,8 +400,6 @@ static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_b static void destroy_command_buffer_data(struct command_buffer_data *data) { unmap_object(HKEY(data->cmd_buffer)); - if (data->queue_data) - data->queue_data->running_command_buffer.remove(data); delete data; } @@ -922,7 +845,7 @@ void update_hud_info(struct swapchain_stats& sw_stats, struct overlay_params& pa fps = 1000000.0f * sw_stats.n_frames_since_update / elapsed; if (sw_stats.last_present_time) { - sw_stats.frames_stats[f_idx].stats[OVERLAY_PARAM_ENABLED_frame_timing] = + sw_stats.frames_stats[f_idx].stats[OVERLAY_PLOTS_frame_timing] = now - sw_stats.last_present_time; } @@ -972,9 +895,6 @@ void update_hud_info(struct swapchain_stats& sw_stats, struct overlay_params& pa sw_stats.last_fps_update = now; } - // memset(&device_data->frame_stats, 0, sizeof(device_data->frame_stats)); - // memset(&data->frame_stats, 0, sizeof(device_data->frame_stats)); - sw_stats.last_present_time = now; sw_stats.n_frames++; sw_stats.n_frames_since_update++; @@ -992,15 +912,6 @@ static void snapshot_swapchain_frame(struct swapchain_data *data) // control_client_check(device_data); // process_control_socket(instance_data); // } - - - // not currently used - // memset(&data->sw_stats.frames_stats[f_idx], 0, sizeof(data->sw_stats.frames_stats[f_idx])); - // for (int s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { - // data->sw_stats.frames_stats[f_idx].stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s]; - // data->accumulated_stats.stats[s] += device_data->frame_stats.stats[s] + data->frame_stats.stats[s]; - // } - } static float get_time_stat(void *_data, int _idx) @@ -1262,29 +1173,19 @@ void render_imgui(swapchain_stats& data, struct overlay_params& params, ImVec2& ImGui::PushFont(data.font1); ImGui::TextColored(ImGui::ColorConvertU32ToFloat4(params.engine_color), "%s", "Frametime"); ImGui::PopFont(); - } - for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) { - if (!params.enabled[s] || - s == OVERLAY_PARAM_ENABLED_fps || - s == OVERLAY_PARAM_ENABLED_frame) - continue; char hash[40]; - snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]); - data.stat_selector = (enum overlay_param_enabled) s; + snprintf(hash, sizeof(hash), "##%s", overlay_param_names[OVERLAY_PARAM_ENABLED_frame_timing]); + data.stat_selector = OVERLAY_PLOTS_frame_timing; data.time_dividor = 1000.0f; - if (s == OVERLAY_PARAM_ENABLED_gpu_timing) - data.time_dividor = 1000000.0f; ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(0.0f, 0.0f, 0.0f, 0.0f)); - if (s == OVERLAY_PARAM_ENABLED_frame_timing) { - double min_time = 0.0f; - double max_time = 50.0f; - ImGui::PlotLines(hash, get_time_stat, &data, - ARRAY_SIZE(data.frames_stats), 0, - NULL, min_time, max_time, - ImVec2(ImGui::GetContentRegionAvailWidth() - params.font_size * 2.2, 50)); - } + double min_time = 0.0f; + double max_time = 50.0f; + ImGui::PlotLines(hash, get_time_stat, &data, + ARRAY_SIZE(data.frames_stats), 0, + NULL, min_time, max_time, + ImVec2(ImGui::GetContentRegionAvailWidth() - params.font_size * 2.2, 50)); ImGui::PopStyleColor(); } if (params.enabled[OVERLAY_PARAM_ENABLED_frame_timing]){ @@ -2235,42 +2136,6 @@ static VkResult overlay_QueuePresentKHR( const VkPresentInfoKHR* pPresentInfo) { struct queue_data *queue_data = FIND(struct queue_data, queue); - struct device_data *device_data = queue_data->device; - - device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_frame]++; - - if (!queue_data->running_command_buffer.empty()) { - /* Before getting the query results, make sure the operations have - * completed. - */ - VK_CHECK(device_data->vtable.ResetFences(device_data->device, - 1, &queue_data->queries_fence)); - VK_CHECK(device_data->vtable.QueueSubmit(queue, 0, NULL, queue_data->queries_fence)); - VK_CHECK(device_data->vtable.WaitForFences(device_data->device, - 1, &queue_data->queries_fence, - VK_FALSE, UINT64_MAX)); - - /* Now get the results. */ - while (!queue_data->running_command_buffer.empty()) { - auto cmd_buffer_data = queue_data->running_command_buffer.front(); - queue_data->running_command_buffer.pop_front(); - - if (cmd_buffer_data->timestamp_query_pool) { - uint64_t gpu_timestamps[2] = { 0 }; - VK_CHECK(device_data->vtable.GetQueryPoolResults(device_data->device, - cmd_buffer_data->timestamp_query_pool, - cmd_buffer_data->query_index * 2, 2, - 2 * sizeof(uint64_t), gpu_timestamps, sizeof(uint64_t), - VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT)); - - gpu_timestamps[0] &= queue_data->timestamp_mask; - gpu_timestamps[1] &= queue_data->timestamp_mask; - device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_gpu_timing] += - (gpu_timestamps[1] - gpu_timestamps[0]) * - device_data->properties.limits.timestampPeriod; - } - } - } /* Otherwise we need to add our overlay drawing semaphore to the list of * semaphores to wait on. If we don't do that the presented picture might @@ -2305,10 +2170,7 @@ static VkResult overlay_QueuePresentKHR( present_info.waitSemaphoreCount = 1; } - uint64_t ts0 = os_time_get(); VkResult chain_result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info); - uint64_t ts1 = os_time_get(); - swapchain_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_present_timing] += ts1 - ts0; if (pPresentInfo->pResults) pPresentInfo->pResults[i] = chain_result; if (chain_result != VK_SUCCESS && result == VK_SUCCESS) @@ -2332,59 +2194,9 @@ static VkResult overlay_BeginCommandBuffer( FIND(struct command_buffer_data, commandBuffer); struct device_data *device_data = cmd_buffer_data->device; - memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); - - /* We don't record any query in secondary command buffers, just make sure - * we have the right inheritance. - */ - if (cmd_buffer_data->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { - VkCommandBufferBeginInfo *begin_info = (VkCommandBufferBeginInfo *) - clone_chain((const struct VkBaseInStructure *)pBeginInfo); - VkCommandBufferInheritanceInfo *parent_inhe_info = (VkCommandBufferInheritanceInfo *) - vk_find_struct(begin_info, COMMAND_BUFFER_INHERITANCE_INFO); - VkCommandBufferInheritanceInfo inhe_info = { - VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, - NULL, - VK_NULL_HANDLE, - 0, - VK_NULL_HANDLE, - VK_FALSE, - 0, - overlay_query_flags, - }; - - if (parent_inhe_info) - parent_inhe_info->pipelineStatistics = overlay_query_flags; - else { - inhe_info.pNext = begin_info->pNext; - begin_info->pNext = &inhe_info; - } - - VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); - - if (!parent_inhe_info) - begin_info->pNext = inhe_info.pNext; - - free_chain((struct VkBaseOutStructure *)begin_info); - - return result; - } - /* Otherwise record a begin query as first command. */ VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo); - if (result == VK_SUCCESS) { - if (cmd_buffer_data->timestamp_query_pool) { - device_data->vtable.CmdResetQueryPool(commandBuffer, - cmd_buffer_data->timestamp_query_pool, - cmd_buffer_data->query_index * 2, 2); - device_data->vtable.CmdWriteTimestamp(commandBuffer, - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - cmd_buffer_data->timestamp_query_pool, - cmd_buffer_data->query_index * 2); - } - } - return result; } @@ -2395,13 +2207,6 @@ static VkResult overlay_EndCommandBuffer( FIND(struct command_buffer_data, commandBuffer); struct device_data *device_data = cmd_buffer_data->device; - if (cmd_buffer_data->timestamp_query_pool) { - device_data->vtable.CmdWriteTimestamp(commandBuffer, - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - cmd_buffer_data->timestamp_query_pool, - cmd_buffer_data->query_index * 2 + 1); - } - return device_data->vtable.EndCommandBuffer(commandBuffer); } @@ -2413,8 +2218,6 @@ static VkResult overlay_ResetCommandBuffer( FIND(struct command_buffer_data, commandBuffer); struct device_data *device_data = cmd_buffer_data->device; - memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats)); - return device_data->vtable.ResetCommandBuffer(commandBuffer, flags); } @@ -2427,15 +2230,6 @@ static void overlay_CmdExecuteCommands( FIND(struct command_buffer_data, commandBuffer); struct device_data *device_data = cmd_buffer_data->device; - /* Add the stats of the executed command buffers to the primary one. */ - for (uint32_t c = 0; c < commandBufferCount; c++) { - struct command_buffer_data *sec_cmd_buffer_data = - FIND(struct command_buffer_data, pCommandBuffers[c]); - - for (uint32_t s = 0; s < OVERLAY_PARAM_ENABLED_MAX; s++) - cmd_buffer_data->stats.stats[s] += sec_cmd_buffer_data->stats.stats[s]; - } - device_data->vtable.CmdExecuteCommands(commandBuffer, commandBufferCount, pCommandBuffers); } @@ -2450,29 +2244,11 @@ static VkResult overlay_AllocateCommandBuffers( if (result != VK_SUCCESS) return result; - VkQueryPool timestamp_query_pool = VK_NULL_HANDLE; - if (device_data->instance->params.enabled[OVERLAY_PARAM_ENABLED_gpu_timing]) { - VkQueryPoolCreateInfo pool_info = { - VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, - NULL, - 0, - VK_QUERY_TYPE_TIMESTAMP, - pAllocateInfo->commandBufferCount * 2, - 0, - }; - VK_CHECK(device_data->vtable.CreateQueryPool(device_data->device, &pool_info, - NULL, ×tamp_query_pool)); - } - for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) { new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level, - timestamp_query_pool, - i, device_data); + device_data); } - if (timestamp_query_pool) - map_object(HKEY(timestamp_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount); - return result; } @@ -2491,14 +2267,6 @@ static void overlay_FreeCommandBuffers( if (!cmd_buffer_data) continue; - uint64_t count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->timestamp_query_pool)); - if (count == 1) { - unmap_object(HKEY(cmd_buffer_data->timestamp_query_pool)); - device_data->vtable.DestroyQueryPool(device_data->device, - cmd_buffer_data->timestamp_query_pool, NULL); - } else if (count != 0) { - map_object(HKEY(cmd_buffer_data->timestamp_query_pool), (void *)(uintptr_t)(count - 1)); - } destroy_command_buffer_data(cmd_buffer_data); } @@ -2515,32 +2283,6 @@ static VkResult overlay_QueueSubmit( struct queue_data *queue_data = FIND(struct queue_data, queue); struct device_data *device_data = queue_data->device; - for (uint32_t s = 0; s < submitCount; s++) { - for (uint32_t c = 0; c < pSubmits[s].commandBufferCount; c++) { - struct command_buffer_data *cmd_buffer_data = - FIND(struct command_buffer_data, pSubmits[s].pCommandBuffers[c]); - - /* Merge the submitted command buffer stats into the device. */ - for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++) - device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st]; - - /* Attach the command buffer to the queue so we remember to read its - * pipeline statistics & timestamps at QueuePresent(). - */ - if (!cmd_buffer_data->timestamp_query_pool) - continue; - - auto& q = queue_data->running_command_buffer; - if (std::find(q.begin(), q.end(), cmd_buffer_data) == q.end()) { - cmd_buffer_data->queue_data = queue_data; - q.push_back(cmd_buffer_data); - } else { - fprintf(stderr, "Command buffer submitted multiple times before present.\n" - "This could lead to invalid data.\n"); - } - } - } - return device_data->vtable.QueueSubmit(queue, submitCount, pSubmits, fence); } @@ -2571,10 +2313,6 @@ static VkResult overlay_CreateDevice( if (pCreateInfo->pEnabledFeatures) device_features = *(pCreateInfo->pEnabledFeatures); - if (instance_data->pipeline_statistics_enabled) { - device_features.inheritedQueries = true; - device_features.pipelineStatisticsQuery = true; - } device_info.pEnabledFeatures = &device_features; diff --git a/src/overlay.h b/src/overlay.h index 3aa6c14b..0045efd4 100644 --- a/src/overlay.h +++ b/src/overlay.h @@ -5,12 +5,12 @@ #include "iostats.h" struct frame_stat { - uint64_t stats[OVERLAY_PARAM_ENABLED_MAX]; + uint64_t stats[OVERLAY_PLOTS_MAX]; }; struct swapchain_stats { uint64_t n_frames; - enum overlay_param_enabled stat_selector; + enum overlay_plots stat_selector; double time_dividor; struct frame_stat stats_min, stats_max; struct frame_stat frames_stats[200]; diff --git a/src/overlay_params.cpp b/src/overlay_params.cpp index ec861e3e..cc1434f0 100644 --- a/src/overlay_params.cpp +++ b/src/overlay_params.cpp @@ -397,8 +397,6 @@ parse_overlay_config(struct overlay_params *params, if (env && read_cfg) parse_overlay_env(params, env); - // Command buffer gets reused and timestamps cause hangs for some reason, force off for now - params->enabled[OVERLAY_PARAM_ENABLED_gpu_timing] = false; // Convert from 0xRRGGBB to ImGui's format std::array colors = { ¶ms->crosshair_color, diff --git a/src/overlay_params.h b/src/overlay_params.h index b1e75e8c..d9c08564 100644 --- a/src/overlay_params.h +++ b/src/overlay_params.h @@ -44,10 +44,7 @@ extern "C" { #define OVERLAY_PARAMS \ OVERLAY_PARAM_BOOL(fps) \ - OVERLAY_PARAM_BOOL(frame) \ OVERLAY_PARAM_BOOL(frame_timing) \ - OVERLAY_PARAM_BOOL(present_timing) \ - OVERLAY_PARAM_BOOL(gpu_timing) \ OVERLAY_PARAM_BOOL(core_load) \ OVERLAY_PARAM_BOOL(cpu_temp) \ OVERLAY_PARAM_BOOL(gpu_temp) \ @@ -107,6 +104,11 @@ enum overlay_param_position { LAYER_POSITION_BOTTOM_RIGHT, }; +enum overlay_plots { + OVERLAY_PLOTS_frame_timing, + OVERLAY_PLOTS_MAX, +}; + enum overlay_param_enabled { #define OVERLAY_PARAM_BOOL(name) OVERLAY_PARAM_ENABLED_##name, #define OVERLAY_PARAM_CUSTOM(name)