KRDevice now creates transfer queues for each GPU. Implemented algorithm to keep graphics, compute, and transfer queue families independent when possible.
This commit is contained in:
@@ -42,6 +42,8 @@ KRDevice::KRDevice(KRContext& context, const VkPhysicalDevice& device)
|
||||
, m_graphicsQueue(VK_NULL_HANDLE)
|
||||
, m_computeFamilyQueueIndex(0)
|
||||
, m_computeQueue(VK_NULL_HANDLE)
|
||||
, m_transferFamilyQueueIndex(0)
|
||||
, m_transferQueue(VK_NULL_HANDLE)
|
||||
, m_graphicsCommandPool(VK_NULL_HANDLE)
|
||||
, m_computeCommandPool(VK_NULL_HANDLE)
|
||||
, m_allocator(VK_NULL_HANDLE)
|
||||
@@ -76,7 +78,7 @@ void KRDevice::destroy()
|
||||
m_graphicsStagingBufferAllocation = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
if (m_logicalDevice != VK_NULL_HANDLE) {
|
||||
if (m_graphicsCommandPool != VK_NULL_HANDLE) {
|
||||
vkDestroyCommandPool(m_logicalDevice, m_graphicsCommandPool, nullptr);
|
||||
m_graphicsCommandPool = VK_NULL_HANDLE;
|
||||
}
|
||||
@@ -86,6 +88,11 @@ void KRDevice::destroy()
|
||||
m_computeCommandPool = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
if (m_transferCommandPool != VK_NULL_HANDLE) {
|
||||
vkDestroyCommandPool(m_logicalDevice, m_transferCommandPool, nullptr);
|
||||
m_transferCommandPool = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
if (m_logicalDevice != VK_NULL_HANDLE) {
|
||||
vkDestroyDevice(m_logicalDevice, nullptr);
|
||||
m_logicalDevice = VK_NULL_HANDLE;
|
||||
@@ -110,15 +117,80 @@ bool KRDevice::initialize(const std::vector<const char*>& deviceExtensions)
|
||||
|
||||
uint32_t graphicsFamilyQueue = -1;
|
||||
uint32_t computeFamilyQueue = -1;
|
||||
uint32_t i = 0;
|
||||
for (const auto& queueFamily : queueFamilies) {
|
||||
if (queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
|
||||
graphicsFamilyQueue = i;
|
||||
uint32_t transferFamilyQueue = -1;
|
||||
|
||||
// First, select the transfer queue
|
||||
for (int i = 0; i < queueFamilies.size(); i++) {
|
||||
const VkQueueFamilyProperties& queueFamily = queueFamilies[i];
|
||||
if ((queueFamily.queueFlags & VK_QUEUE_TRANSFER_BIT) == 0) {
|
||||
// This queue does not support transfers. Skip it.
|
||||
continue;
|
||||
}
|
||||
if (queueFamily.queueFlags & VK_QUEUE_COMPUTE_BIT) {
|
||||
if (transferFamilyQueue == -1) {
|
||||
// If we don't already have a transfer queue, take anything that supports VK_QUEUE_TRANSFER_BIT
|
||||
transferFamilyQueue = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
VkQueueFlags priorFlags = queueFamilies[transferFamilyQueue].queueFlags;
|
||||
if ((priorFlags & VK_QUEUE_GRAPHICS_BIT) > (queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT)) {
|
||||
// This is a better queue, as it is specifically for transfers and not graphics
|
||||
transferFamilyQueue = i;
|
||||
continue;
|
||||
}
|
||||
if ((priorFlags & VK_QUEUE_COMPUTE_BIT) > (queueFamily.queueFlags & VK_QUEUE_COMPUTE_BIT)) {
|
||||
// This is a better queue, as it is specifically for transfers and not graphics
|
||||
transferFamilyQueue = i;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Second, select the compute transfer queue
|
||||
for (int i = 0; i < queueFamilies.size(); i++) {
|
||||
const VkQueueFamilyProperties& queueFamily = queueFamilies[i];
|
||||
if ((queueFamily.queueFlags & VK_QUEUE_COMPUTE_BIT) == 0) {
|
||||
// This queue does not support compute. Skip it.
|
||||
continue;
|
||||
}
|
||||
if (computeFamilyQueue == -1) {
|
||||
// If we don't already have a compute queue, take anything that supports VK_QUEUE_COMPUTE_BIT
|
||||
computeFamilyQueue = i;
|
||||
continue;
|
||||
}
|
||||
if (computeFamilyQueue == transferFamilyQueue) {
|
||||
// Avoid sharing a compute queue with the asset streaming
|
||||
computeFamilyQueue = i;
|
||||
continue;
|
||||
}
|
||||
VkQueueFlags priorFlags = queueFamilies[computeFamilyQueue].queueFlags;
|
||||
if ((priorFlags & VK_QUEUE_GRAPHICS_BIT) > (queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT)) {
|
||||
// This is a better queue, as it is specifically for compute and not graphics
|
||||
computeFamilyQueue = i;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < queueFamilies.size(); i++) {
|
||||
const VkQueueFamilyProperties& queueFamily = queueFamilies[i];
|
||||
if ((queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0) {
|
||||
// This queue does not support graphics. Skip it.
|
||||
continue;
|
||||
}
|
||||
if (graphicsFamilyQueue == -1) {
|
||||
// If we don't already have a graphics queue, take anything that supports VK_QUEUE_GRAPHICS_BIT
|
||||
graphicsFamilyQueue = i;
|
||||
continue;
|
||||
}
|
||||
if (graphicsFamilyQueue == transferFamilyQueue) {
|
||||
// Avoid sharing a graphics queue with the asset streaming
|
||||
graphicsFamilyQueue = i;
|
||||
continue;
|
||||
}
|
||||
if (graphicsFamilyQueue == computeFamilyQueue) {
|
||||
// Avoid sharing a graphics queue with compute
|
||||
graphicsFamilyQueue = i;
|
||||
continue;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
if (graphicsFamilyQueue == -1) {
|
||||
// No graphics queue family, not suitable
|
||||
@@ -130,8 +202,14 @@ bool KRDevice::initialize(const std::vector<const char*>& deviceExtensions)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (transferFamilyQueue == -1) {
|
||||
// No transfer queue family, not suitable
|
||||
return false;
|
||||
}
|
||||
|
||||
m_graphicsFamilyQueueIndex = graphicsFamilyQueue;
|
||||
m_computeFamilyQueueIndex = computeFamilyQueue;
|
||||
m_transferFamilyQueueIndex = transferFamilyQueue;
|
||||
|
||||
uint32_t extensionCount;
|
||||
vkEnumerateDeviceExtensionProperties(m_device, nullptr, &extensionCount, nullptr);
|
||||
@@ -151,23 +229,32 @@ bool KRDevice::initialize(const std::vector<const char*>& deviceExtensions)
|
||||
|
||||
// ----
|
||||
|
||||
VkDeviceQueueCreateInfo queueCreateInfo[2]{};
|
||||
VkDeviceQueueCreateInfo queueCreateInfo[3]{};
|
||||
int queueCount = 1;
|
||||
float queuePriority = 1.0f;
|
||||
queueCreateInfo[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
queueCreateInfo[0].queueFamilyIndex = m_graphicsFamilyQueueIndex;
|
||||
queueCreateInfo[0].queueCount = 1;
|
||||
queueCreateInfo[0].pQueuePriorities = &queuePriority;
|
||||
if (m_graphicsFamilyQueueIndex != m_computeFamilyQueueIndex) {
|
||||
queueCount++;
|
||||
queueCreateInfo[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
queueCreateInfo[1].queueFamilyIndex = m_computeFamilyQueueIndex;
|
||||
queueCreateInfo[1].queueCount = 1;
|
||||
queueCreateInfo[1].pQueuePriorities = &queuePriority;
|
||||
}
|
||||
if (m_transferFamilyQueueIndex != m_graphicsFamilyQueueIndex && m_transferFamilyQueueIndex != m_computeFamilyQueueIndex) {
|
||||
queueCount++;
|
||||
queueCreateInfo[2].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
queueCreateInfo[2].queueFamilyIndex = m_transferFamilyQueueIndex;
|
||||
queueCreateInfo[2].queueCount = 1;
|
||||
queueCreateInfo[2].pQueuePriorities = &queuePriority;
|
||||
}
|
||||
|
||||
VkDeviceCreateInfo deviceCreateInfo{};
|
||||
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||
deviceCreateInfo.pQueueCreateInfos = queueCreateInfo;
|
||||
deviceCreateInfo.queueCreateInfoCount = m_graphicsFamilyQueueIndex == m_computeFamilyQueueIndex ? 1 : 2;
|
||||
deviceCreateInfo.queueCreateInfoCount = queueCount;
|
||||
VkPhysicalDeviceFeatures deviceFeatures{};
|
||||
deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
|
||||
deviceCreateInfo.enabledExtensionCount = static_cast<uint32_t>(deviceExtensions.size());
|
||||
@@ -178,6 +265,7 @@ bool KRDevice::initialize(const std::vector<const char*>& deviceExtensions)
|
||||
}
|
||||
vkGetDeviceQueue(m_logicalDevice, m_graphicsFamilyQueueIndex, 0, &m_graphicsQueue);
|
||||
vkGetDeviceQueue(m_logicalDevice, m_computeFamilyQueueIndex, 0, &m_computeQueue);
|
||||
vkGetDeviceQueue(m_logicalDevice, m_transferFamilyQueueIndex, 0, &m_transferQueue);
|
||||
|
||||
VkCommandPoolCreateInfo poolInfo{};
|
||||
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
||||
@@ -197,12 +285,22 @@ bool KRDevice::initialize(const std::vector<const char*>& deviceExtensions)
|
||||
return false;
|
||||
}
|
||||
|
||||
poolInfo.queueFamilyIndex = m_transferFamilyQueueIndex;
|
||||
if (vkCreateCommandPool(m_logicalDevice, &poolInfo, nullptr, &m_transferCommandPool) != VK_SUCCESS) {
|
||||
destroy();
|
||||
// TODO - Log a warning...
|
||||
return false;
|
||||
}
|
||||
|
||||
const int kMaxGraphicsCommandBuffers = 10; // TODO - This needs to be dynamic?
|
||||
m_graphicsCommandBuffers.resize(kMaxGraphicsCommandBuffers);
|
||||
|
||||
const int kMaxComputeCommandBuffers = 4; // TODO - This needs to be dynamic?
|
||||
m_computeCommandBuffers.resize(kMaxComputeCommandBuffers);
|
||||
|
||||
const int kMaxTransferCommandBuffers = 4; // TODO - This needs to be dynamic?
|
||||
m_transferCommandBuffers.resize(kMaxTransferCommandBuffers);
|
||||
|
||||
VkCommandBufferAllocateInfo allocInfo{};
|
||||
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
allocInfo.commandPool = m_graphicsCommandPool;
|
||||
@@ -223,6 +321,14 @@ bool KRDevice::initialize(const std::vector<const char*>& deviceExtensions)
|
||||
return false;
|
||||
}
|
||||
|
||||
allocInfo.commandPool = m_transferCommandPool;
|
||||
allocInfo.commandBufferCount = (uint32_t)m_transferCommandBuffers.size();
|
||||
if (vkAllocateCommandBuffers(m_logicalDevice, &allocInfo, m_transferCommandBuffers.data()) != VK_SUCCESS) {
|
||||
destroy();
|
||||
// TODO - Log a warning
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create Vulkan Memory Allocator instance for this device
|
||||
|
||||
// We are dynamically linking Vulkan, so we need to give VMA some hints
|
||||
|
||||
@@ -65,10 +65,14 @@ public:
|
||||
VkQueue m_graphicsQueue;
|
||||
uint32_t m_computeFamilyQueueIndex;
|
||||
VkQueue m_computeQueue;
|
||||
uint32_t m_transferFamilyQueueIndex;
|
||||
VkQueue m_transferQueue;
|
||||
VkCommandPool m_graphicsCommandPool;
|
||||
VkCommandPool m_computeCommandPool;
|
||||
VkCommandPool m_transferCommandPool;
|
||||
std::vector<VkCommandBuffer> m_graphicsCommandBuffers;
|
||||
std::vector<VkCommandBuffer> m_computeCommandBuffers;
|
||||
std::vector<VkCommandBuffer> m_transferCommandBuffers;
|
||||
VmaAllocator m_allocator;
|
||||
|
||||
// Staging buffer for uploading with the transfer queue
|
||||
|
||||
Reference in New Issue
Block a user