From b748399f1f567640577a1bfd46a38814bf5c22e9 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Tue, 11 Feb 2025 01:43:13 -0600 Subject: [PATCH 01/54] simplify cmake, build dawn in repo, fix render for windows --- .gitignore | 3 + CMakeLists.txt | 29 +------ cmake/example.cmake | 27 +++---- cmake/gpu.cmake | 170 +++++++++++++++++++++++++--------------- cmake/webgpu.cmake | 61 -------------- examples/render/run.cpp | 11 +-- 6 files changed, 132 insertions(+), 169 deletions(-) delete mode 100644 cmake/webgpu.cmake diff --git a/.gitignore b/.gitignore index 1a8b5bc..c7f60c3 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,6 @@ build .cache compile_commands.json +# editor specific +.vscode/* + diff --git a/CMakeLists.txt b/CMakeLists.txt index db89df7..a464b34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,23 +1,11 @@ cmake_minimum_required(VERSION 3.28) project(gpu) -include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/webgpu.cmake") - set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # export compile_commands.json to use with # LSP -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) -option(USE_LOCAL_LIBS - "Use local libraries instead of fetching from the internet" OFF) - -# Ensure the build type is set -if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE - Release - CACHE STRING "Choose the type of build: Debug or Release" FORCE) -endif() - option(FASTBUILD "Option to enable fast builds" OFF) if(FASTBUILD) set(CMAKE_BUILD_TYPE None) # Avoid default flags of predefined build types @@ -30,21 +18,8 @@ if(DEBUG) set(CMAKE_CXX_FLAGS "-O0 -g") endif() -if(WIN64) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWEBGPU_BACKEND_DAWN") -endif() - include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/gpu.cmake") -message(STATUS "CMAKE_CURRENT_SOURCE_DIR: ${CMAKE_CURRENT_SOURCE_DIR}") -message( - STATUS - "Include directories for wgpu: ${CMAKE_CURRENT_SOURCE_DIR}/third_party/headers" -) - add_library(gpud SHARED gpu.hpp) set_target_properties(gpud PROPERTIES LINKER_LANGUAGE CXX) -target_link_libraries(gpud PRIVATE wgpu) -target_link_libraries(gpud PRIVATE webgpu) -target_link_libraries(gpud PRIVATE gpu) -install(TARGETS gpud) +target_link_libraries(gpud PRIVATE gpu) \ No newline at end of file diff --git a/cmake/example.cmake b/cmake/example.cmake index eba8e7c..41b15fd 100644 --- a/cmake/example.cmake +++ b/cmake/example.cmake @@ -1,17 +1,17 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # export compile_commands.json to use with # LSP -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) get_filename_component(PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) get_filename_component(PROJECT_ROOT ${PROJECT_ROOT} DIRECTORY) # Construct potential paths -set(FILEPATH_CURRENT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}") -set(FILEPATH_PROJECT_ROOT "${PROJECT_ROOT}/${FILENAME}") +set(FILEPATH_CURRENT_DIR "${DIRECTORY}/") +set(FILEPATH_PROJECT_ROOT "${PROJECT_ROOT}/") # Include file finding utility script -include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/find_gpu.cmake") +include("${FILEPATH_PROJECT_ROOT}/cmake/find_gpu.cmake") # Check if the file exists in the current directory find_project_root(${CMAKE_CURRENT_SOURCE_DIR} ${FILENAME} @@ -49,20 +49,19 @@ endif() if(NOT TARGET gpu) message(STATUS "GPU_LIB not found") - include("${TARGET_FILE_PATH}/cmake/webgpu.cmake") include("${TARGET_FILE_PATH}/cmake/gpu.cmake") endif() - add_executable(${PROJECT_NAME} run.cpp) target_link_libraries(${PROJECT_NAME} PRIVATE gpu) -target_link_libraries(${PROJECT_NAME} PRIVATE wgpu) -target_link_libraries(${PROJECT_NAME} PRIVATE webgpu) +target_link_libraries(${PROJECT_NAME} PRIVATE ${WEBGPU_DAWN}) -if(WIN32) - # Ensure DLL is copied if on Windows +if(MSVC) +# Copy webgpu_dawn.dll to the build directory add_custom_command( - TARGET ${PROJECT_NAME} - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${DLL_PATH} - $) + TARGET ${PROJECT_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + ${DAWN_INSTALL_PREFIX}/${CMAKE_BUILD_TYPE}/webgpu_dawn.dll + $ + ) endif() + diff --git a/cmake/gpu.cmake b/cmake/gpu.cmake index 08db244..15f3b43 100644 --- a/cmake/gpu.cmake +++ b/cmake/gpu.cmake @@ -1,69 +1,115 @@ -get_filename_component(PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) -get_filename_component(PROJECT_ROOT ${PROJECT_ROOT} DIRECTORY) - -# Construct potential paths -set(FILEPATH_CURRENT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}") -set(FILEPATH_PROJECT_ROOT "${PROJECT_ROOT}/${FILENAME}") - -# Include file finding utility script -include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/find_gpu.cmake") - -# Check if the file exists in the current directory -find_project_root(${CMAKE_CURRENT_SOURCE_DIR} ${FILENAME} TARGET_FILE_PATH) -if("${TARGET_FILE_PATH}" STREQUAL "") - find_project_root(${FILEPATH_CURRENT_DIR} ${FILENAME} TARGET_FILE_PATH) - if("${TARGET_FILE_PATH}" STREQUAL "") - message( - FATAL_ERROR - "File ${FILENAME} not found in either ${CMAKE_CURRENT_SOURCE_DIR} or ${CMAKE_CURRENT_SOURCE_DIR}/../../" - ) - endif() -endif() +set(FILENAME "gpu.hpp") -# Define architecture and build type directories or file names -if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(ARCH "x64") +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}") + set(FILEPATH_PROJECT_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") else() - set(ARCH "x86") + get_filename_component(PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) + get_filename_component(PROJECT_ROOT ${PROJECT_ROOT} DIRECTORY) + + set(FILEPATH_PROJECT_ROOT "${PROJECT_ROOT}/") endif() -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(BUILD_TYPE "Debug") + +include(FetchContent) + +set(FETCHCONTENT_BASE_DIR "${FILEPATH_PROJECT_ROOT}/third_party/fetchcontent/_deps") +set(DAWN_INSTALL_PREFIX "${FETCHCONTENT_BASE_DIR}/dawn-build/out/${CMAKE_BUILD_TYPE}" CACHE INTERNAL "Dawn install location" FORCE) + + +# Before fetching, set configuration options for Dawn. +# These CMake variables are “global” (cached INTERNAL) so that Dawn’s own CMakeLists.txt +# will pick them up. Adjust them as needed. +set(DAWN_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE INTERNAL "Dawn build type" FORCE) +set(DCMAKE_INSTALL_PREFIX ${DAWN_INSTALL_PREFIX} CACHE INTERNAL "Dawn install location" FORCE) + +# Dawn options +set(DAWN_FETCH_DEPENDENCIES ON CACHE INTERNAL "Fetch Dawn dependencies" FORCE) +set(DAWN_ENABLE_INSTALL ON CACHE INTERNAL "Enable Dawn installation" FORCE) +set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE INTERNAL "Build Dawn monolithically" FORCE) +set(DAWN_BUILD_EXAMPLES OFF CACHE INTERNAL "Build Dawn examples" FORCE) +set(DAWN_BUILD_SAMPLES OFF CACHE INTERNAL "Build Dawn samples" FORCE) +set(DAWN_BUILD_TESTS OFF CACHE INTERNAL "Build Dawn tests" FORCE) +set(DAWN_BUILD_UTILS OFF CACHE INTERNAL "Build Dawn utilities" FORCE) +set(TINT_BUILD_TESTS OFF CACHE INTERNAL "Build Tint Tests" FORCE) +set(TINT_BUILD_IR_BINARY OFF CACHE INTERNAL "Build Tint IR binary" FORCE) +set(TINT_BUILD_CMD_TOOLS OFF CACHE INTERNAL "Build Tint command line tools" FORCE) +set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build shared libraries" FORCE) + + +# Set up an install location for Dawn – you can change this to a specific location. + + +FetchContent_Declare( + dawn + DOWNLOAD_COMMAND + cd ${FETCHCONTENT_BASE_DIR}/dawn-src && + git init && + git fetch --depth=1 https://dawn.googlesource.com/dawn && + git reset --hard FETCH_HEAD +) + + +# This call will download the repository and add it as a subdirectory. +FetchContent_MakeAvailable(dawn) + + +# At this point, assuming Dawn’s CMakeLists.txt is written so that an install step is available, +# we trigger a build of its install target. This custom target will build (and install) Dawn +# into ${DAWN_INSTALL_PREFIX}. (If Dawn already adds an install target, you may simply depend on it.) +add_custom_target(build_dawn_config ALL + COMMAND ${CMAKE_COMMAND} ${FETCHCONTENT_BASE_DIR}/dawn-src + -B ${DAWN_INSTALL_PREFIX} + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DDAWN_FETCH_DEPENDENCIES=ON + -DDAWN_ENABLE_INSTALL=ON + -DDAWN_BUILD_MONOLITHIC_LIBRARY=OFF + -DDAWN_BUILD_EXAMPLES=OFF + -DDAWN_BUILD_SAMPLES=OFF + -DDAWN_BUILD_TESTS=OFF + -DDAWN_BUILD_UTILS=OFF + -DTINT_BUILD_TESTS=OFF + -DTINT_BUILD_IR_BINARY=OFF + -DTINT_BUILD_CMD_TOOLS=OFF + -DBUILD_SHARED_LIBS=OFF + -G "${CMAKE_GENERATOR}" + COMMENT "Configuring Dawn build with custom options in ${DAWN_INSTALL_PREFIX}" +) + +add_custom_target(build_dawn_install ALL + COMMAND ${CMAKE_COMMAND} --build ${DAWN_INSTALL_PREFIX} --target install + COMMENT "Installing Dawn into ${DAWN_INSTALL_PREFIX}" +) + +include(${FETCHCONTENT_BASE_DIR}/dawn-build/cmake/DawnTargets.cmake) + +set(GPU_SOURCES + "${FILEPATH_PROJECT_ROOT}/gpu.cpp" + "${FILEPATH_PROJECT_ROOT}/numeric_types/half.cpp" +) + +set(GPU_HEADERS + "${FILEPATH_PROJECT_ROOT}/gpu.hpp" + "${FILEPATH_PROJECT_ROOT}/utils/logging.hpp" + "${FILEPATH_PROJECT_ROOT}/utils/array_utils.hpp" + "${FILEPATH_PROJECT_ROOT}/numeric_types/half.hpp" +) + +if(EMSCRIPTEN) + file(REMOVE "${FILEPATH_PROJECT_ROOT}/webgpu/webgpu.h") else() - set(BUILD_TYPE "Release") + list(APPEND GPU_HEADERS "${DAWN_INSTALL_PREFIX}/gen/webgpu-headers/webgpu.h") endif() -add_library(webgpulib SHARED IMPORTED) -add_library(gpu INTERFACE) -add_library(wgpu INTERFACE) -add_dependencies(gpu webgpulib) -# Define the header-only library -target_include_directories(gpu INTERFACE ${TARGET_FILE_PATH}) - -# Add headers webgpu.h -target_include_directories(wgpu - INTERFACE ${TARGET_FILE_PATH}/third_party/headers) -include(ExternalProject) - -set(DAWN_EXT_PREFIX "${TARGET_FILE_PATH}/third_party/local/dawn") - -ExternalProject_Add( - dawn_project - PREFIX ${DAWN_EXT_PREFIX} - GIT_REPOSITORY "https://dawn.googlesource.com/dawn" - GIT_TAG "main" - SOURCE_DIR "${DAWN_EXT_PREFIX}/source" - BINARY_DIR "${DAWN_EXT_PREFIX}/build" - INSTALL_DIR "${DAWN_EXT_PREFIX}/install" - GIT_SUBMODULES "" - # setting cmake args doesn't work and I don't know why - CONFIGURE_COMMAND - ${CMAKE_COMMAND} -S ${DAWN_EXT_PREFIX}/source -B - ${DAWN_EXT_PREFIX}/build -DDAWN_FETCH_DEPENDENCIES=ON - -DDAWN_ENABLE_INSTALL=ON -DDAWN_BUILD_MONOLITHIC_LIBRARY=ON - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -G ${CMAKE_GENERATOR} - INSTALL_COMMAND ${CMAKE_COMMAND} --install . --prefix - ${DAWN_EXT_PREFIX}/install - LOG_INSTALL ON) -find_library(LIBDAWN dawn PATHS "${DAWN_EXT_PREFIX}/install/lib") -target_link_libraries(webgpulib INTERFACE ${LIBDAWN}) + +# Create the INTERFACE library ‘gpu’ +add_library(gpu STATIC ${GPU_SOURCES} ${GPU_HEADERS}) +target_include_directories(gpu PUBLIC "${FILEPATH_PROJECT_ROOT}") +target_include_directories(gpu PUBLIC "${FILEPATH_PROJECT_ROOT}/third_party/headers") + +# Ensure that the gpu target is built only after Dawn has been installed. +add_dependencies(gpu build_dawn_install) + +find_library(WEBGPU_DAWN + NAMES webgpu_dawn + HINTS "${DAWN_INSTALL_PREFIX}/src/dawn/native/Debug/" +) \ No newline at end of file diff --git a/cmake/webgpu.cmake b/cmake/webgpu.cmake deleted file mode 100644 index c63f1e2..0000000 --- a/cmake/webgpu.cmake +++ /dev/null @@ -1,61 +0,0 @@ -# Specify the filename to search for -set(FILENAME "gpu.hpp") - -get_filename_component(PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) -get_filename_component(PROJECT_ROOT ${PROJECT_ROOT} DIRECTORY) - -# Construct potential paths -set(FILEPATH_CURRENT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}") -set(FILEPATH_PROJECT_ROOT "${PROJECT_ROOT}/${FILENAME}") - -# Include file finding utility script -include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/find_gpu.cmake") - -# Check if the file exists in the current directory -find_project_root(${CMAKE_CURRENT_SOURCE_DIR} ${FILENAME} TARGET_FILE_PATH) -if("${TARGET_FILE_PATH}" STREQUAL "") - find_project_root(${FILEPATH_CURRENT_DIR} ${FILENAME} TARGET_FILE_PATH) - if("${TARGET_FILE_PATH}" STREQUAL "") - message( - FATAL_ERROR - "File ${FILENAME} not found in either ${CMAKE_CURRENT_SOURCE_DIR} or ${CMAKE_CURRENT_SOURCE_DIR}/../../" - ) - endif() -endif() - -include(FetchContent) - -set(FETCHCONTENT_BASE_DIR "${TARGET_FILE_PATH}/third_party/fetchcontent") -set(WEBGPU_DIST_LOCAL_PATH - "${TARGET_FILE_PATH}/third_party/local/WebGPU-distribution") - -if(USE_LOCAL_LIBS) - set(WEBGPU_DIST_GIT_REPO ${WEBGPU_DIST_LOCAL_PATH}) - message(STATUS "Using local WebGPU distribution: ${WEBGPU_DIST_LOCAL_PATH}") -else() - set(WEBGPU_DIST_GIT_REPO - "https://github.com/eliemichel/WebGPU-distribution") -endif() - -option(WEBGPU_TAG "WebGPU distribution tag to use") -if(NOT WEBGPU_TAG) - set(WEBGPU_TAG "dawn") -endif() -message(STATUS "Using WebGPU distribution tag: ${WEBGPU_TAG}") - -if(WEBGPU_TAG STREQUAL "dawn") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWEBGPU_BACKEND_DAWN") - # use specific commit set(WEBGPU_TAG - # "1025b977e1927b6d0327e67352f90feb4bcf8274") set(WEBGPU_TAG - # "acf972b7b909f52e183bdae3971b93bb13d4a29e") - # add_compile_options(-UABSL_INTERNAL_AT_LEAST_CXX20) set(CMAKE_CXX_FLAGS - # "${CMAKE_CXX_FLAGS} -UABSL_INTERNAL_AT_LEAST_CXX20") - message(STATUS "Using Dawn backend") -endif() - -FetchContent_Declare( - webgpu - GIT_REPOSITORY ${WEBGPU_DIST_GIT_REPO} - GIT_TAG ${WEBGPU_TAG} - GIT_SHALLOW TRUE) -FetchContent_MakeAvailable(webgpu) diff --git a/examples/render/run.cpp b/examples/render/run.cpp index f2c6bec..f9a90f9 100644 --- a/examples/render/run.cpp +++ b/examples/render/run.cpp @@ -149,11 +149,12 @@ int main(int argc, char **argv) { std::array raster; for (size_t i = 0; i < screen.size(); ++i) { - size_t index = - std::min(sizeof(intensity) - 2, - std::max(0ul, static_cast(screen[i] * - (sizeof(intensity) - 2)))); - raster[i] = intensity[index]; + // Convert all values to size_t to ensure proper type matching + const size_t intensity_max = sizeof(intensity) - 2; + const size_t scaled_value = static_cast(screen[i] * intensity_max); + size_t index = std::min(intensity_max, + std::max(static_cast(0), scaled_value)); + raster[i] = intensity[index]; } char buffer[(NROWS + 2) * (NCOLS + 2)]; From bbc3addc4a8fb5ed7bf3c9ecf525a2c91f70ff6a Mon Sep 17 00:00:00 2001 From: MichealReed Date: Tue, 11 Feb 2025 13:28:05 -0600 Subject: [PATCH 02/54] More simplification --- CMakeLists.txt | 2 + cmake/example.cmake | 68 ++++++++++-------------------- cmake/find_gpu.cmake | 30 -------------- cmake/gpu.cmake | 99 ++++++++++++++++++++------------------------ 4 files changed, 70 insertions(+), 129 deletions(-) delete mode 100644 cmake/find_gpu.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index a464b34..ca735a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,3 +1,5 @@ +# This only builds a shared lib, see cmake/example.cmake +# and cmake/gpu.cmake for more details cmake_minimum_required(VERSION 3.28) project(gpu) diff --git a/cmake/example.cmake b/cmake/example.cmake index 41b15fd..d92c204 100644 --- a/cmake/example.cmake +++ b/cmake/example.cmake @@ -1,32 +1,20 @@ -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # export compile_commands.json to use with - # LSP -set(CMAKE_CXX_STANDARD 20) -set(CMAKE_CXX_STANDARD_REQUIRED ON) +# Getting Started with CMAKE +# Each example includes this and sets PROJECT_NAME +# cd examples/hello_world +# cmake -S . build/ -DCMAKE_BUILD_TYPE=Release +# cmake --build build/ --config Release +# ./build/hello_world + +if(NOT MSVC) + set(CMAKE_CXX_STANDARD 17) +else() + set(CMAKE_CXX_STANDARD 20) +endif() +# Path finding logic to find our root recipes from nested folders get_filename_component(PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) get_filename_component(PROJECT_ROOT ${PROJECT_ROOT} DIRECTORY) -# Construct potential paths -set(FILEPATH_CURRENT_DIR "${DIRECTORY}/") -set(FILEPATH_PROJECT_ROOT "${PROJECT_ROOT}/") - -# Include file finding utility script -include("${FILEPATH_PROJECT_ROOT}/cmake/find_gpu.cmake") - -# Check if the file exists in the current directory -find_project_root(${CMAKE_CURRENT_SOURCE_DIR} ${FILENAME} - TARGET_FILE_PATH) -if("${TARGET_FILE_PATH}" STREQUAL "") - find_project_root(${FILEPATH_CURRENT_DIR} ${FILENAME} - TARGET_FILE_PATH) - if("${TARGET_FILE_PATH}" STREQUAL "") - message( - FATAL_ERROR - "File ${FILENAME} not found in either ${CMAKE_CURRENT_SOURCE_DIR} or ${CMAKE_CURRENT_SOURCE_DIR}/../../" - ) - endif() -endif() - # Ensure the build type is set if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE @@ -34,34 +22,24 @@ if(NOT CMAKE_BUILD_TYPE) CACHE STRING "Choose the type of build: Debug or Release" FORCE) endif() -# Define architecture and build type directories or file names -if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(ARCH "x64") -else() - set(ARCH "x86") -endif() - -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(BUILD_TYPE "Debug") -else() - set(BUILD_TYPE "Release") -endif() +# Include the gpu.cpp + Dawn library +include("${PROJECT_ROOT}/cmake/gpu.cmake") -if(NOT TARGET gpu) - message(STATUS "GPU_LIB not found") - include("${TARGET_FILE_PATH}/cmake/gpu.cmake") -endif() +# Create the executable add_executable(${PROJECT_NAME} run.cpp) + +# Link gpu + dawn library target_link_libraries(${PROJECT_NAME} PRIVATE gpu) -target_link_libraries(${PROJECT_NAME} PRIVATE ${WEBGPU_DAWN}) +# Certain platforms need to copy the library files to the build directory if(MSVC) -# Copy webgpu_dawn.dll to the build directory + # Copy webgpu_dawn.dll to the build directory + # CMake multigenerators like MSVC need --config Release on + # the cmake --build command or they will output to /Debug add_custom_command( TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${DAWN_INSTALL_PREFIX}/${CMAKE_BUILD_TYPE}/webgpu_dawn.dll - $ - ) + $) endif() diff --git a/cmake/find_gpu.cmake b/cmake/find_gpu.cmake deleted file mode 100644 index b6b7dad..0000000 --- a/cmake/find_gpu.cmake +++ /dev/null @@ -1,30 +0,0 @@ -# file name to find -set(FILENAME "gpu.hpp") - -# Function to check for file existence up the directory hierarchy -function(find_project_root current_dir filename result_var) - set(found FALSE) # Flag to indicate if the file is found - set(current_check_dir "${current_dir}") # Start from the given directory - # using 1 is jsut to supress the cmane-format warning - foreach(i RANGE 0 2 1) - set(filepath "${current_check_dir}/${filename}") - - if(EXISTS "${filepath}") - set(${result_var} - "${current_check_dir}" - PARENT_SCOPE) - set(found TRUE) - break() - endif() - - # Move one level up - get_filename_component(current_check_dir "${current_check_dir}" - DIRECTORY) - endforeach() - - if(NOT found) - set(${result_var} - "" - PARENT_SCOPE) # Set to empty if not found - endif() -endfunction() diff --git a/cmake/gpu.cmake b/cmake/gpu.cmake index 15f3b43..c8f011a 100644 --- a/cmake/gpu.cmake +++ b/cmake/gpu.cmake @@ -1,44 +1,46 @@ set(FILENAME "gpu.hpp") +# Setup project root here. if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}") - set(FILEPATH_PROJECT_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") + set(PROJECT_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") else() get_filename_component(PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) get_filename_component(PROJECT_ROOT ${PROJECT_ROOT} DIRECTORY) - - set(FILEPATH_PROJECT_ROOT "${PROJECT_ROOT}/") + + set(PROJECT_ROOT "${PROJECT_ROOT}/") endif() +message(STATUS "PROJECT_ROOT: ${PROJECT_ROOT}") + include(FetchContent) -set(FETCHCONTENT_BASE_DIR "${FILEPATH_PROJECT_ROOT}/third_party/fetchcontent/_deps") +set(FETCHCONTENT_BASE_DIR "${PROJECT_ROOT}/third_party/fetchcontent/_deps") set(DAWN_INSTALL_PREFIX "${FETCHCONTENT_BASE_DIR}/dawn-build/out/${CMAKE_BUILD_TYPE}" CACHE INTERNAL "Dawn install location" FORCE) # Before fetching, set configuration options for Dawn. -# These CMake variables are “global” (cached INTERNAL) so that Dawn’s own CMakeLists.txt -# will pick them up. Adjust them as needed. -set(DAWN_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE INTERNAL "Dawn build type" FORCE) set(DCMAKE_INSTALL_PREFIX ${DAWN_INSTALL_PREFIX} CACHE INTERNAL "Dawn install location" FORCE) +set(CMAKE_CONFIGURATION_TYPES ${CMAKE_BUILD_TYPE} CACHE INTERNAL "Dawn configuration types" FORCE) -# Dawn options -set(DAWN_FETCH_DEPENDENCIES ON CACHE INTERNAL "Fetch Dawn dependencies" FORCE) -set(DAWN_ENABLE_INSTALL ON CACHE INTERNAL "Enable Dawn installation" FORCE) -set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE INTERNAL "Build Dawn monolithically" FORCE) +# Dawn options for more, +# see https://dawn.googlesource.com/dawn/+/refs/heads/main/CMakeLists.txt +set(DAWN_ALWAYS_ASSERT OFF CACHE INTERNAL "Always assert in Dawn" FORCE) +set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE INTERNAL "Build Dawn monolithically" FORCE) set(DAWN_BUILD_EXAMPLES OFF CACHE INTERNAL "Build Dawn examples" FORCE) set(DAWN_BUILD_SAMPLES OFF CACHE INTERNAL "Build Dawn samples" FORCE) set(DAWN_BUILD_TESTS OFF CACHE INTERNAL "Build Dawn tests" FORCE) -set(DAWN_BUILD_UTILS OFF CACHE INTERNAL "Build Dawn utilities" FORCE) +set(DAWN_ENABLE_INSTALL ON CACHE INTERNAL "Enable Dawn installation" FORCE) +set(DAWN_FETCH_DEPENDENCIES ON CACHE INTERNAL "Fetch Dawn dependencies" FORCE) + set(TINT_BUILD_TESTS OFF CACHE INTERNAL "Build Tint Tests" FORCE) set(TINT_BUILD_IR_BINARY OFF CACHE INTERNAL "Build Tint IR binary" FORCE) set(TINT_BUILD_CMD_TOOLS OFF CACHE INTERNAL "Build Tint command line tools" FORCE) -set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build shared libraries" FORCE) - -# Set up an install location for Dawn – you can change this to a specific location. +set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build shared libraries" FORCE) +# Fetch Setup FetchContent_Declare( dawn DOWNLOAD_COMMAND @@ -49,67 +51,56 @@ FetchContent_Declare( ) -# This call will download the repository and add it as a subdirectory. +# Download the repository and add it as a subdirectory. FetchContent_MakeAvailable(dawn) -# At this point, assuming Dawn’s CMakeLists.txt is written so that an install step is available, -# we trigger a build of its install target. This custom target will build (and install) Dawn -# into ${DAWN_INSTALL_PREFIX}. (If Dawn already adds an install target, you may simply depend on it.) -add_custom_target(build_dawn_config ALL +# Since we require Dawn to be built before linking against it, we need to configure it now. +execute_process( COMMAND ${CMAKE_COMMAND} ${FETCHCONTENT_BASE_DIR}/dawn-src -B ${DAWN_INSTALL_PREFIX} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DDAWN_FETCH_DEPENDENCIES=ON - -DDAWN_ENABLE_INSTALL=ON - -DDAWN_BUILD_MONOLITHIC_LIBRARY=OFF - -DDAWN_BUILD_EXAMPLES=OFF - -DDAWN_BUILD_SAMPLES=OFF - -DDAWN_BUILD_TESTS=OFF - -DDAWN_BUILD_UTILS=OFF - -DTINT_BUILD_TESTS=OFF - -DTINT_BUILD_IR_BINARY=OFF - -DTINT_BUILD_CMD_TOOLS=OFF - -DBUILD_SHARED_LIBS=OFF -G "${CMAKE_GENERATOR}" - COMMENT "Configuring Dawn build with custom options in ${DAWN_INSTALL_PREFIX}" ) -add_custom_target(build_dawn_install ALL - COMMAND ${CMAKE_COMMAND} --build ${DAWN_INSTALL_PREFIX} --target install - COMMENT "Installing Dawn into ${DAWN_INSTALL_PREFIX}" +# Build Dawn +execute_process( + WORKING_DIRECTORY ${FETCHCONTENT_BASE_DIR}/dawn-src + COMMAND ${CMAKE_COMMAND} --build ${DAWN_INSTALL_PREFIX} --config ${CMAKE_BUILD_TYPE} ) -include(${FETCHCONTENT_BASE_DIR}/dawn-build/cmake/DawnTargets.cmake) - +# Add sources set(GPU_SOURCES - "${FILEPATH_PROJECT_ROOT}/gpu.cpp" - "${FILEPATH_PROJECT_ROOT}/numeric_types/half.cpp" + "${PROJECT_ROOT}/gpu.cpp" + "${PROJECT_ROOT}/numeric_types/half.cpp" ) +# Add headers set(GPU_HEADERS - "${FILEPATH_PROJECT_ROOT}/gpu.hpp" - "${FILEPATH_PROJECT_ROOT}/utils/logging.hpp" - "${FILEPATH_PROJECT_ROOT}/utils/array_utils.hpp" - "${FILEPATH_PROJECT_ROOT}/numeric_types/half.hpp" + "${PROJECT_ROOT}/gpu.hpp" + "${PROJECT_ROOT}/utils/logging.hpp" + "${PROJECT_ROOT}/utils/array_utils.hpp" + "${PROJECT_ROOT}/numeric_types/half.hpp" ) +# Emscripten includes a header automatically if(EMSCRIPTEN) - file(REMOVE "${FILEPATH_PROJECT_ROOT}/webgpu/webgpu.h") + file(REMOVE "${PROJECT_ROOT}/webgpu/webgpu.h") else() - list(APPEND GPU_HEADERS "${DAWN_INSTALL_PREFIX}/gen/webgpu-headers/webgpu.h") + list(APPEND GPU_HEADERS "${PROJECT_ROOT}/third_party/headers/webgpu/webgpu.h") endif() -# Create the INTERFACE library ‘gpu’ +# Create the STATIC library for gpu add_library(gpu STATIC ${GPU_SOURCES} ${GPU_HEADERS}) -target_include_directories(gpu PUBLIC "${FILEPATH_PROJECT_ROOT}") -target_include_directories(gpu PUBLIC "${FILEPATH_PROJECT_ROOT}/third_party/headers") +target_include_directories(gpu PUBLIC "${PROJECT_ROOT}") +target_include_directories(gpu PUBLIC "${PROJECT_ROOT}/third_party/headers") -# Ensure that the gpu target is built only after Dawn has been installed. -add_dependencies(gpu build_dawn_install) - -find_library(WEBGPU_DAWN +# Find the monolithic library for Dawn +find_library(WEBGPU_DAWN_MONOLITHIC NAMES webgpu_dawn - HINTS "${DAWN_INSTALL_PREFIX}/src/dawn/native/Debug/" -) \ No newline at end of file + HINTS "${DAWN_INSTALL_PREFIX}/src/dawn/native/${CMAKE_BUILD_TYPE}" +) + +# Link the monolithic library +target_link_libraries(gpu PRIVATE ${WEBGPU_DAWN_MONOLITHIC}) From 2360ba9af7432c33a2703eb2a88706d4150387dc Mon Sep 17 00:00:00 2001 From: MichealReed Date: Tue, 11 Feb 2025 13:44:09 -0600 Subject: [PATCH 03/54] cleanup --- CMakeLists.txt | 2 +- cmake/gpu.cmake | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ca735a9..e8e569a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,4 +24,4 @@ include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/gpu.cmake") add_library(gpud SHARED gpu.hpp) set_target_properties(gpud PROPERTIES LINKER_LANGUAGE CXX) -target_link_libraries(gpud PRIVATE gpu) \ No newline at end of file +target_link_libraries(gpud PRIVATE gpu) diff --git a/cmake/gpu.cmake b/cmake/gpu.cmake index c8f011a..11d6c67 100644 --- a/cmake/gpu.cmake +++ b/cmake/gpu.cmake @@ -41,6 +41,8 @@ set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build shared libraries" FORCE) # Fetch Setup +# Add a commit hash to pin the version of Dawn. +# git fetch --depth=1 url FetchContent_Declare( dawn DOWNLOAD_COMMAND From 30f7594896ecd9c4b7616bd9ad0d03598f0b4939 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Tue, 11 Feb 2025 15:59:24 -0600 Subject: [PATCH 04/54] build path for msvc find library --- cmake/gpu.cmake | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cmake/gpu.cmake b/cmake/gpu.cmake index 11d6c67..1767a50 100644 --- a/cmake/gpu.cmake +++ b/cmake/gpu.cmake @@ -6,7 +6,6 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}") else() get_filename_component(PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) get_filename_component(PROJECT_ROOT ${PROJECT_ROOT} DIRECTORY) - set(PROJECT_ROOT "${PROJECT_ROOT}/") endif() @@ -21,7 +20,6 @@ set(DAWN_INSTALL_PREFIX "${FETCHCONTENT_BASE_DIR}/dawn-build/out/${CMAKE_BUILD_T # Before fetching, set configuration options for Dawn. set(DCMAKE_INSTALL_PREFIX ${DAWN_INSTALL_PREFIX} CACHE INTERNAL "Dawn install location" FORCE) -set(CMAKE_CONFIGURATION_TYPES ${CMAKE_BUILD_TYPE} CACHE INTERNAL "Dawn configuration types" FORCE) # Dawn options for more, # see https://dawn.googlesource.com/dawn/+/refs/heads/main/CMakeLists.txt @@ -98,11 +96,18 @@ add_library(gpu STATIC ${GPU_SOURCES} ${GPU_HEADERS}) target_include_directories(gpu PUBLIC "${PROJECT_ROOT}") target_include_directories(gpu PUBLIC "${PROJECT_ROOT}/third_party/headers") -# Find the monolithic library for Dawn -find_library(WEBGPU_DAWN_MONOLITHIC +# find_library, windows adds extra folder +if(MSVC) + find_library(WEBGPU_DAWN_MONOLITHIC NAMES webgpu_dawn - HINTS "${DAWN_INSTALL_PREFIX}/src/dawn/native/${CMAKE_BUILD_TYPE}" -) + PATHS "${DAWN_INSTALL_PREFIX}/src/dawn/native/${CMAKE_BUILD_TYPE}" + ) +else() + find_library(WEBGPU_DAWN_MONOLITHIC + NAMES webgpu_dawn + PATHS "${DAWN_INSTALL_PREFIX}/src/dawn/native" + ) +endif() # Link the monolithic library target_link_libraries(gpu PRIVATE ${WEBGPU_DAWN_MONOLITHIC}) From 82ff79d1b2853e29f9e5de81c93bef9670535b4d Mon Sep 17 00:00:00 2001 From: MichealReed Date: Tue, 11 Feb 2025 16:13:29 -0600 Subject: [PATCH 05/54] require the libs so we fail early --- cmake/gpu.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/gpu.cmake b/cmake/gpu.cmake index 1767a50..b687e83 100644 --- a/cmake/gpu.cmake +++ b/cmake/gpu.cmake @@ -101,11 +101,13 @@ if(MSVC) find_library(WEBGPU_DAWN_MONOLITHIC NAMES webgpu_dawn PATHS "${DAWN_INSTALL_PREFIX}/src/dawn/native/${CMAKE_BUILD_TYPE}" + REQUIRED ) else() find_library(WEBGPU_DAWN_MONOLITHIC NAMES webgpu_dawn PATHS "${DAWN_INSTALL_PREFIX}/src/dawn/native" + REQUIRED ) endif() From ebd5bcf1b7f3dc7d9f888039adba36cbf6e39b4f Mon Sep 17 00:00:00 2001 From: MichealReed Date: Tue, 11 Feb 2025 17:22:24 -0600 Subject: [PATCH 06/54] use hints for MSVC --- cmake/gpu.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/gpu.cmake b/cmake/gpu.cmake index b687e83..52a348b 100644 --- a/cmake/gpu.cmake +++ b/cmake/gpu.cmake @@ -100,7 +100,7 @@ target_include_directories(gpu PUBLIC "${PROJECT_ROOT}/third_party/headers") if(MSVC) find_library(WEBGPU_DAWN_MONOLITHIC NAMES webgpu_dawn - PATHS "${DAWN_INSTALL_PREFIX}/src/dawn/native/${CMAKE_BUILD_TYPE}" + HINTS "${DAWN_INSTALL_PREFIX}/src/dawn/native/${CMAKE_BUILD_TYPE}" REQUIRED ) else() From d1c0b81a529f49c9bddb2e27021ce3624824c32e Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sun, 16 Feb 2025 18:24:49 -0600 Subject: [PATCH 07/54] adds emscripten support --- .gitignore | 2 + CMakeLists.txt | 1 + cmake/dawn.cmake | 149 ++++++++++++++++++++++++++++++ cmake/example.cmake | 94 ++++++++++++++----- cmake/gpu.cmake | 89 ++---------------- cmake/templates/index.html.in | 22 +++++ examples/shadertui/CMakeLists.txt | 1 + gpu.hpp | 6 +- numeric_types/half.cpp | 2 +- 9 files changed, 260 insertions(+), 106 deletions(-) create mode 100644 cmake/dawn.cmake create mode 100644 cmake/templates/index.html.in diff --git a/.gitignore b/.gitignore index c7f60c3..4dc9cf7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ build/* # any build subdirectory in the tree **/build/ +**/build_web/ examples/hello_gpu/build/* examples/raymarch/build/* docs/html @@ -8,6 +9,7 @@ source .DS_Store third_party/lib/* third_party/local/* +third_party/dawn/* # formatter files .cmake-format.py diff --git a/CMakeLists.txt b/CMakeLists.txt index e8e569a..816cdf3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,7 @@ if(DEBUG) set(CMAKE_CXX_FLAGS "-O0 -g") endif() +include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/dawn.cmake") include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/gpu.cmake") add_library(gpud SHARED gpu.hpp) diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake new file mode 100644 index 0000000..f7ab748 --- /dev/null +++ b/cmake/dawn.cmake @@ -0,0 +1,149 @@ +# Setup directories +set(FETCHCONTENT_BASE_DIR "${PROJECT_ROOT}/third_party") +set(DAWN_DIR "${FETCHCONTENT_BASE_DIR}/dawn" CACHE INTERNAL "") +set(DAWN_BUILD_DIR "${DAWN_DIR}/build" CACHE INTERNAL "") + +if(EMSCRIPTEN) + set(EM_SDK_DIR $ENV{EMSDK} CACHE INTERNAL "") + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_web" CACHE INTERNAL "") +endif() + +function(find_dawn_library) + if(MSVC) + find_library(WEBGPU_DAWN_DEBUG webgpu_dawn + NAMES webgpu_dawn + HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Debug" + ) + find_library(WEBGPU_DAWN_RELEASE webgpu_dawn + NAMES webgpu_dawn + HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Release" + ) + elseif(NOT EMSCRIPTEN AND NOT MSVC) + find_library(WEBGPU_DAWN_LIB + NAMES webgpu_dawn + PATHS "${DAWN_BUILD_DIR}/src/dawn/native" + REQUIRED + ) + endif() + + # Set result variables in parent scope + set(DAWN_BUILD_FOUND ON PARENT_SCOPE) + if(MSVC) + set(WEBGPU_DAWN_DEBUG ${WEBGPU_DAWN_DEBUG} PARENT_SCOPE) + set(WEBGPU_DAWN_RELEASE ${WEBGPU_DAWN_RELEASE} PARENT_SCOPE) + else() + set(WEBGPU_DAWN_LIB ${WEBGPU_DAWN_LIB} PARENT_SCOPE) + endif() +endfunction() + +# Enable find for no dawn rebuilds with flutter run +set(ENABLE_DAWN_FIND OFF CACHE BOOL "Enable finding Dawn" FORCE) +set(DAWN_BUILD_FOUND OFF CACHE BOOL "Dawn build found" FORCE) +if(ENABLE_DAWN_FIND) + # find_library, windows adds extra folder + if(MSVC) + find_library(WEBGPU_DAWN_DEBUG webgpu_dawn + NAMES webgpu_dawn + HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Debug" + ) + find_library(WEBGPU_DAWN_RELEASE webgpu_dawn + NAMES webgpu_dawn + HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Release" + ) + set(DAWN_BUILD_FOUND ON) + elseif(NOT EMSCRIPTEN AND NOT MSVC) + find_library(WEBGPU_DAWN_LIB + NAMES webgpu_dawn + PATHS "${DAWN_BUILD_DIR}/src/dawn/native" + REQUIRED + ) + set(DAWN_BUILD_FOUND ON) + else() + set(DAWN_BUILD_FOUND ON) + endif() +endif() + +# Dawn options for more, +# see https://dawn.googlesource.com/dawn/+/refs/heads/main/CMakeLists.txt +set(DAWN_ALWAYS_ASSERT OFF CACHE INTERNAL "Always assert in Dawn" FORCE) +set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE INTERNAL "Build Dawn monolithically" FORCE) +set(DAWN_BUILD_EXAMPLES OFF CACHE INTERNAL "Build Dawn examples" FORCE) +set(DAWN_BUILD_SAMPLES OFF CACHE INTERNAL "Build Dawn samples" FORCE) +set(DAWN_BUILD_TESTS OFF CACHE INTERNAL "Build Dawn tests" FORCE) +set(DAWN_ENABLE_INSTALL OFF CACHE INTERNAL "Enable Dawn installation" FORCE) +set(DAWN_FETCH_DEPENDENCIES ON CACHE INTERNAL "Fetch Dawn dependencies" FORCE) +set(TINT_BUILD_TESTS OFF CACHE INTERNAL "Build Tint Tests" FORCE) +set(TINT_BUILD_IR_BINARY OFF CACHE INTERNAL "Build Tint IR binary" FORCE) +set(TINT_BUILD_CMD_TOOLS OFF CACHE INTERNAL "Build Tint command line tools" FORCE) + +if(NOT DAWN_BUILD_FOUND) + include(FetchContent) + message("webgpu_dawn not found start building") + if(EMSCRIPTEN) + set(EMSCRIPTEN_DIR "${EM_SDK_DIR}/upstream/emscripten" CACHE INTERNAL "" FORCE) + set(DAWN_EMSCRIPTEN_TOOLCHAIN ${EMSCRIPTEN_DIR} CACHE INTERNAL "" FORCE) + endif() + + FetchContent_Declare( + dawn + DOWNLOAD_DIR ${DAWN_DIR} + SOURCE_DIR ${DAWN_DIR} + SUBBUILD_DIR ${DAWN_BUILD_DIR}/tmp + BINARY_DIR ${DAWN_BUILD_DIR} + DOWNLOAD_COMMAND + cd ${DAWN_DIR} && + git init && + git fetch --depth=1 https://dawn.googlesource.com/dawn && + git reset --hard FETCH_HEAD + ) + + # Download the repository and add it as a subdirectory. + FetchContent_MakeAvailable(dawn) + + # attempt fix flutter rebuilds + set(CMAKE_INCLUDE_PATH "${CMAKE_INCLUDE_PATH};${DAWN_DIR}/src" CACHE INTERNAL "") + + execute_process( + WORKING_DIRECTORY ${DAWN_DIR} + COMMAND ${CMAKE_COMMAND} -S ${DAWN_DIR} + -B ${DAWN_BUILD_DIR} + ) + + # Build Dawn + execute_process( + COMMAND ${CMAKE_COMMAND} --build ${DAWN_BUILD_DIR} + ) + + # find_library, windows adds extra folder + if(MSVC) + find_library(WEBGPU_DAWN_DEBUG webgpu_dawn + NAMES webgpu_dawn + HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Debug" + ) + find_library(WEBGPU_DAWN_RELEASE webgpu_dawn + NAMES webgpu_dawn + HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Release" + ) + set(DAWN_BUILD_FOUND ON) + elseif(NOT EMSCRIPTEN AND NOT MSVC) + find_library(WEBGPU_DAWN_LIB + NAMES webgpu_dawn + PATHS "${DAWN_BUILD_DIR}/src/dawn/native" + REQUIRED + ) + set(DAWN_BUILD_FOUND ON) + else() + set(DAWN_BUILD_FOUND ON) + endif() +endif() + +if(EMSCRIPTEN) + add_library(webgpu_dawn INTERFACE IMPORTED) + target_include_directories(webgpu_dawn INTERFACE ${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/include) + target_include_directories(webgpu_dawn INTERFACE ${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/include/webgpu/webgpu.h) + target_link_libraries(webgpu_dawn INTERFACE ${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_enum_tables.js) + target_link_libraries(webgpu_dawn INTERFACE ${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_struct_info.js) + target_link_libraries(webgpu_dawn INTERFACE ${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_sig_info.js) + target_link_libraries(webgpu_dawn INTERFACE ${DAWN_DIR}/third_party/emdawnwebgpu/library_webgpu.js) +else() +endif() \ No newline at end of file diff --git a/cmake/example.cmake b/cmake/example.cmake index d92c204..99578fd 100644 --- a/cmake/example.cmake +++ b/cmake/example.cmake @@ -1,9 +1,14 @@ # Getting Started with CMAKE -# Each example includes this and sets PROJECT_NAME -# cd examples/hello_world -# cmake -S . build/ -DCMAKE_BUILD_TYPE=Release -# cmake --build build/ --config Release -# ./build/hello_world +# Each example includes this and sets PROJECT_NAME. +# +# Example usage: +# cd examples/hello_world +# cmake -S . build/ -DCMAKE_BUILD_TYPE=Release +# cmake --build build/ --config Release +# ./build/hello_world (or serve the output .js/.wasm for Emscripten) +# or for emscripten +# emcmake cmake -S . -B ./build_web -DCMAKE_BUILD_TYPE=Release +# cmake --build build_web --config Release if(NOT MSVC) set(CMAKE_CXX_STANDARD 17) @@ -11,35 +16,82 @@ else() set(CMAKE_CXX_STANDARD 20) endif() -# Path finding logic to find our root recipes from nested folders +# Locate the project root (two levels up from the current source dir) get_filename_component(PROJECT_ROOT ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) get_filename_component(PROJECT_ROOT ${PROJECT_ROOT} DIRECTORY) -# Ensure the build type is set -if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE - Release - CACHE STRING "Choose the type of build: Debug or Release" FORCE) -endif() - -# Include the gpu.cpp + Dawn library +# Include external libraries and helper scripts (dawn and gpu) +include("${PROJECT_ROOT}/cmake/dawn.cmake") include("${PROJECT_ROOT}/cmake/gpu.cmake") # Create the executable add_executable(${PROJECT_NAME} run.cpp) -# Link gpu + dawn library +# Platform-specific linking & build settings +if(EMSCRIPTEN) + # Emscripten-specific configuration + + # Define a web output directory (adjust as needed) + set(WEB_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/web_build") + + # If necessary, include the generated WebGPU include dirs first. + include_directories(BEFORE "${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/include/") + + # Create a helper library for WebGPU support. + add_library(webgpu_web "${DAWN_DIR}/third_party/emdawnwebgpu/webgpu.cpp") + target_link_libraries(${PROJECT_NAME} PRIVATE webgpu_web) + + # Set Emscripten-specific link flags that enable WASM output and expose certain symbols. + # Needed to use updated version, emdawnwebgpu + set_target_properties(${PROJECT_NAME} PROPERTIES LINK_FLAGS "\ + -sUSE_WEBGPU=0 \ + -sWASM=1 \ + -DDAWN_EMSCRIPTEN_TOOLCHAIN=${EMSCRIPTEN_DIR} \ + -sEXPORTED_FUNCTIONS=_main,_malloc,_free,_memcpy \ + -sEXPORTED_RUNTIME_METHODS=ccall \ + -sUSE_GLFW=3 \ + -sALLOW_MEMORY_GROWTH=1 -sSTACK_SIZE=10000000 \ + -sASYNCIFY \ + --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_enum_tables.js \ + --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_struct_info.js \ + --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_sig_info.js \ + --js-library=${DAWN_DIR}/third_party/emdawnwebgpu/library_webgpu.js \ + --closure-args=--externs=${EMSCRIPTEN_DIR}/src/closure-externs/webgpu-externs.js \ + -O3 \ + ") + +else() + # Non-Emscripten (desktop) linking + if(MSVC) + target_link_libraries(gpu + PRIVATE + $<$:${WEBGPU_DAWN_DEBUG}> + $<$:${WEBGPU_DAWN_RELEASE}> + ) + else() + target_link_libraries(gpu PRIVATE webgpu_dawn) + endif() +endif() + +# Link the gpu/dawn library to the executable. target_link_libraries(${PROJECT_NAME} PRIVATE gpu) -# Certain platforms need to copy the library files to the build directory +# Platform-specific post-build actions (e.g. copying DLLs for MSVC) if(MSVC) - # Copy webgpu_dawn.dll to the build directory - # CMake multigenerators like MSVC need --config Release on - # the cmake --build command or they will output to /Debug add_custom_command( TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy - ${DAWN_INSTALL_PREFIX}/${CMAKE_BUILD_TYPE}/webgpu_dawn.dll - $) + ${DAWN_BUILD_DIR}/$/webgpu_dawn.dll + $ + COMMENT "Copying webgpu_dawn.dll to the build directory" + ) endif() +if(EMSCRIPTEN) + + # Configure the HTML file by replacing @PROJECT_NAME@ with the actual target name. + configure_file(${PROJECT_ROOT}cmake/templates/index.html.in + ${CMAKE_CURRENT_BINARY_DIR}/index.html + @ONLY) + +endif() \ No newline at end of file diff --git a/cmake/gpu.cmake b/cmake/gpu.cmake index 52a348b..6cce9e6 100644 --- a/cmake/gpu.cmake +++ b/cmake/gpu.cmake @@ -11,68 +11,11 @@ endif() message(STATUS "PROJECT_ROOT: ${PROJECT_ROOT}") - -include(FetchContent) - -set(FETCHCONTENT_BASE_DIR "${PROJECT_ROOT}/third_party/fetchcontent/_deps") -set(DAWN_INSTALL_PREFIX "${FETCHCONTENT_BASE_DIR}/dawn-build/out/${CMAKE_BUILD_TYPE}" CACHE INTERNAL "Dawn install location" FORCE) - - -# Before fetching, set configuration options for Dawn. -set(DCMAKE_INSTALL_PREFIX ${DAWN_INSTALL_PREFIX} CACHE INTERNAL "Dawn install location" FORCE) - -# Dawn options for more, -# see https://dawn.googlesource.com/dawn/+/refs/heads/main/CMakeLists.txt -set(DAWN_ALWAYS_ASSERT OFF CACHE INTERNAL "Always assert in Dawn" FORCE) -set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE INTERNAL "Build Dawn monolithically" FORCE) -set(DAWN_BUILD_EXAMPLES OFF CACHE INTERNAL "Build Dawn examples" FORCE) -set(DAWN_BUILD_SAMPLES OFF CACHE INTERNAL "Build Dawn samples" FORCE) -set(DAWN_BUILD_TESTS OFF CACHE INTERNAL "Build Dawn tests" FORCE) -set(DAWN_ENABLE_INSTALL ON CACHE INTERNAL "Enable Dawn installation" FORCE) -set(DAWN_FETCH_DEPENDENCIES ON CACHE INTERNAL "Fetch Dawn dependencies" FORCE) - -set(TINT_BUILD_TESTS OFF CACHE INTERNAL "Build Tint Tests" FORCE) -set(TINT_BUILD_IR_BINARY OFF CACHE INTERNAL "Build Tint IR binary" FORCE) -set(TINT_BUILD_CMD_TOOLS OFF CACHE INTERNAL "Build Tint command line tools" FORCE) - -set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Build shared libraries" FORCE) - - -# Fetch Setup -# Add a commit hash to pin the version of Dawn. -# git fetch --depth=1 url -FetchContent_Declare( - dawn - DOWNLOAD_COMMAND - cd ${FETCHCONTENT_BASE_DIR}/dawn-src && - git init && - git fetch --depth=1 https://dawn.googlesource.com/dawn && - git reset --hard FETCH_HEAD -) - - -# Download the repository and add it as a subdirectory. -FetchContent_MakeAvailable(dawn) - - -# Since we require Dawn to be built before linking against it, we need to configure it now. -execute_process( - COMMAND ${CMAKE_COMMAND} ${FETCHCONTENT_BASE_DIR}/dawn-src - -B ${DAWN_INSTALL_PREFIX} - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -G "${CMAKE_GENERATOR}" -) - -# Build Dawn -execute_process( - WORKING_DIRECTORY ${FETCHCONTENT_BASE_DIR}/dawn-src - COMMAND ${CMAKE_COMMAND} --build ${DAWN_INSTALL_PREFIX} --config ${CMAKE_BUILD_TYPE} -) - # Add sources set(GPU_SOURCES "${PROJECT_ROOT}/gpu.cpp" "${PROJECT_ROOT}/numeric_types/half.cpp" + "${DAWN_BUILD_DIR}/gen/include/dawn/webgpu.h" ) # Add headers @@ -81,35 +24,17 @@ set(GPU_HEADERS "${PROJECT_ROOT}/utils/logging.hpp" "${PROJECT_ROOT}/utils/array_utils.hpp" "${PROJECT_ROOT}/numeric_types/half.hpp" + ) -# Emscripten includes a header automatically -if(EMSCRIPTEN) - file(REMOVE "${PROJECT_ROOT}/webgpu/webgpu.h") -else() - list(APPEND GPU_HEADERS "${PROJECT_ROOT}/third_party/headers/webgpu/webgpu.h") -endif() - - # Create the STATIC library for gpu add_library(gpu STATIC ${GPU_SOURCES} ${GPU_HEADERS}) +set_target_properties(gpu PROPERTIES LINKER_LANGUAGE CXX) target_include_directories(gpu PUBLIC "${PROJECT_ROOT}") -target_include_directories(gpu PUBLIC "${PROJECT_ROOT}/third_party/headers") - -# find_library, windows adds extra folder -if(MSVC) - find_library(WEBGPU_DAWN_MONOLITHIC - NAMES webgpu_dawn - HINTS "${DAWN_INSTALL_PREFIX}/src/dawn/native/${CMAKE_BUILD_TYPE}" - REQUIRED - ) +if(NOT EMSCRIPTEN) + target_include_directories(gpu PUBLIC "${DAWN_BUILD_DIR}/gen/include/dawn/") else() - find_library(WEBGPU_DAWN_MONOLITHIC - NAMES webgpu_dawn - PATHS "${DAWN_INSTALL_PREFIX}/src/dawn/native" - REQUIRED - ) + target_include_directories(gpu PUBLIC "${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/include/") + target_include_directories(gpu PUBLIC "${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/include/webgpu/") endif() -# Link the monolithic library -target_link_libraries(gpu PRIVATE ${WEBGPU_DAWN_MONOLITHIC}) diff --git a/cmake/templates/index.html.in b/cmake/templates/index.html.in new file mode 100644 index 0000000..1bd64ca --- /dev/null +++ b/cmake/templates/index.html.in @@ -0,0 +1,22 @@ + + + + + Codestin Search App + + + + + + + \ No newline at end of file diff --git a/examples/shadertui/CMakeLists.txt b/examples/shadertui/CMakeLists.txt index 0938023..b728fc8 100644 --- a/examples/shadertui/CMakeLists.txt +++ b/examples/shadertui/CMakeLists.txt @@ -1,3 +1,4 @@ +# Not working yet needs update with libs for emscripten cmake_minimum_required(VERSION 3.28) project(shadertui) diff --git a/gpu.hpp b/gpu.hpp index 5327fe7..edc8b38 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -15,7 +15,7 @@ #include // std::pair #include -#include "webgpu/webgpu.h" +#include "webgpu.h" #include "numeric_types/half.hpp" #include "utils/logging.hpp" @@ -910,6 +910,7 @@ inline Context createContext( // If the device was created, set up logging and fetch the queue if (devData.status == WGPURequestDeviceStatus_Success) { + #ifndef __EMSCRIPTEN__ WGPULoggingCallbackInfo loggingCallbackInfo { .nextInChain = nullptr, .callback = @@ -925,6 +926,7 @@ inline Context createContext( .userdata2 = nullptr }; wgpuDeviceSetLoggingCallback(ctx.device, loggingCallbackInfo); + #endif ctx.queue = wgpuDeviceGetQueue(ctx.device); } } @@ -1206,7 +1208,7 @@ inline void toCPU(Context &ctx, WGPUBuffer buffer, void *data, size_t size) { } wgpuQueueSubmit(ctx.queue, 1, &op.commandBuffer); wgpuCommandBufferRelease(op.commandBuffer); - CallbackData callbackData = {op.readbackBuffer, bufferSize, data, &op.promise, + CallbackData callbackData = {op.readbackBuffer, static_cast(bufferSize), data, &op.promise, &op.future}; WGPUQueueWorkDoneCallbackInfo workDoneCallbackInfo = { diff --git a/numeric_types/half.cpp b/numeric_types/half.cpp index e5bdaf0..fe5aab7 100644 --- a/numeric_types/half.cpp +++ b/numeric_types/half.cpp @@ -241,7 +241,7 @@ fn main( } } -int main() { +int testMain() { printf("\nHalf-precision float tests\n==========================\n"); printf("\nRegular values float round trips\n\n"); From 9247b79f3f31b87a19bc7dc0ae524608e8eea593 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sun, 16 Feb 2025 18:26:57 -0600 Subject: [PATCH 08/54] remove redundant find function --- cmake/dawn.cmake | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake index f7ab748..b9394d4 100644 --- a/cmake/dawn.cmake +++ b/cmake/dawn.cmake @@ -8,34 +8,6 @@ if(EMSCRIPTEN) set(DAWN_BUILD_DIR "${DAWN_DIR}/build_web" CACHE INTERNAL "") endif() -function(find_dawn_library) - if(MSVC) - find_library(WEBGPU_DAWN_DEBUG webgpu_dawn - NAMES webgpu_dawn - HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Debug" - ) - find_library(WEBGPU_DAWN_RELEASE webgpu_dawn - NAMES webgpu_dawn - HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Release" - ) - elseif(NOT EMSCRIPTEN AND NOT MSVC) - find_library(WEBGPU_DAWN_LIB - NAMES webgpu_dawn - PATHS "${DAWN_BUILD_DIR}/src/dawn/native" - REQUIRED - ) - endif() - - # Set result variables in parent scope - set(DAWN_BUILD_FOUND ON PARENT_SCOPE) - if(MSVC) - set(WEBGPU_DAWN_DEBUG ${WEBGPU_DAWN_DEBUG} PARENT_SCOPE) - set(WEBGPU_DAWN_RELEASE ${WEBGPU_DAWN_RELEASE} PARENT_SCOPE) - else() - set(WEBGPU_DAWN_LIB ${WEBGPU_DAWN_LIB} PARENT_SCOPE) - endif() -endfunction() - # Enable find for no dawn rebuilds with flutter run set(ENABLE_DAWN_FIND OFF CACHE BOOL "Enable finding Dawn" FORCE) set(DAWN_BUILD_FOUND OFF CACHE BOOL "Dawn build found" FORCE) From 3e59576f2b752a4f6255445d0569583d87f38d44 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sun, 16 Feb 2025 18:42:27 -0600 Subject: [PATCH 09/54] clean linker flags --- cmake/example.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/example.cmake b/cmake/example.cmake index 99578fd..192358f 100644 --- a/cmake/example.cmake +++ b/cmake/example.cmake @@ -9,6 +9,7 @@ # or for emscripten # emcmake cmake -S . -B ./build_web -DCMAKE_BUILD_TYPE=Release # cmake --build build_web --config Release +# python3 -m http.server 8080 --d build_web if(NOT MSVC) set(CMAKE_CXX_STANDARD 17) @@ -50,14 +51,13 @@ if(EMSCRIPTEN) -sEXPORTED_FUNCTIONS=_main,_malloc,_free,_memcpy \ -sEXPORTED_RUNTIME_METHODS=ccall \ -sUSE_GLFW=3 \ - -sALLOW_MEMORY_GROWTH=1 -sSTACK_SIZE=10000000 \ + -sALLOW_MEMORY_GROWTH=1 \ -sASYNCIFY \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_enum_tables.js \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_struct_info.js \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_sig_info.js \ --js-library=${DAWN_DIR}/third_party/emdawnwebgpu/library_webgpu.js \ --closure-args=--externs=${EMSCRIPTEN_DIR}/src/closure-externs/webgpu-externs.js \ - -O3 \ ") else() From 0653a4b524e7bbb6e91d2fe02c827ee7782d5b65 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sun, 16 Feb 2025 18:47:51 -0600 Subject: [PATCH 10/54] needs large stack size --- cmake/example.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/example.cmake b/cmake/example.cmake index 192358f..8216077 100644 --- a/cmake/example.cmake +++ b/cmake/example.cmake @@ -51,7 +51,7 @@ if(EMSCRIPTEN) -sEXPORTED_FUNCTIONS=_main,_malloc,_free,_memcpy \ -sEXPORTED_RUNTIME_METHODS=ccall \ -sUSE_GLFW=3 \ - -sALLOW_MEMORY_GROWTH=1 \ + -sALLOW_MEMORY_GROWTH=1 -sSTACK_SIZE=10000000 \ -sASYNCIFY \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_enum_tables.js \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_struct_info.js \ From 9f4059b7d1ae5a0eba4fa26772a42a73064a66bf Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sun, 16 Feb 2025 18:50:13 -0600 Subject: [PATCH 11/54] use stack in MB instead --- cmake/example.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/example.cmake b/cmake/example.cmake index 8216077..6f195ec 100644 --- a/cmake/example.cmake +++ b/cmake/example.cmake @@ -51,7 +51,7 @@ if(EMSCRIPTEN) -sEXPORTED_FUNCTIONS=_main,_malloc,_free,_memcpy \ -sEXPORTED_RUNTIME_METHODS=ccall \ -sUSE_GLFW=3 \ - -sALLOW_MEMORY_GROWTH=1 -sSTACK_SIZE=10000000 \ + -sALLOW_MEMORY_GROWTH=1 -sSTACK_SIZE=5MB \ -sASYNCIFY \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_enum_tables.js \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_struct_info.js \ From 78ae4685a15faf8f962d8f63a66f26fa86ed24ca Mon Sep 17 00:00:00 2001 From: MichealReed Date: Mon, 17 Feb 2025 17:30:11 -0600 Subject: [PATCH 12/54] must set DAWN_EMSCRIPTEN_TOOLCHAIN for build too --- cmake/dawn.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake index b9394d4..46d7403 100644 --- a/cmake/dawn.cmake +++ b/cmake/dawn.cmake @@ -6,6 +6,7 @@ set(DAWN_BUILD_DIR "${DAWN_DIR}/build" CACHE INTERNAL "") if(EMSCRIPTEN) set(EM_SDK_DIR $ENV{EMSDK} CACHE INTERNAL "") set(DAWN_BUILD_DIR "${DAWN_DIR}/build_web" CACHE INTERNAL "") + set(DAWN_EMSCRIPTEN_TOOLCHAIN ${EM_SDK_DIR}/upstream/emscripten CACHE INTERNAL "" FORCE) endif() # Enable find for no dawn rebuilds with flutter run From 6197322e8e48e1c761043d6ca6badc89724b95e3 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Mon, 17 Feb 2025 22:59:26 -0600 Subject: [PATCH 13/54] EOF fixes --- cmake/dawn.cmake | 2 +- cmake/example.cmake | 2 +- cmake/templates/index.html.in | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake index 46d7403..2ead9ae 100644 --- a/cmake/dawn.cmake +++ b/cmake/dawn.cmake @@ -119,4 +119,4 @@ if(EMSCRIPTEN) target_link_libraries(webgpu_dawn INTERFACE ${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_sig_info.js) target_link_libraries(webgpu_dawn INTERFACE ${DAWN_DIR}/third_party/emdawnwebgpu/library_webgpu.js) else() -endif() \ No newline at end of file +endif() diff --git a/cmake/example.cmake b/cmake/example.cmake index 6f195ec..7cf1f8d 100644 --- a/cmake/example.cmake +++ b/cmake/example.cmake @@ -94,4 +94,4 @@ if(EMSCRIPTEN) ${CMAKE_CURRENT_BINARY_DIR}/index.html @ONLY) -endif() \ No newline at end of file +endif() diff --git a/cmake/templates/index.html.in b/cmake/templates/index.html.in index 1bd64ca..b6f130c 100644 --- a/cmake/templates/index.html.in +++ b/cmake/templates/index.html.in @@ -19,4 +19,4 @@ } - \ No newline at end of file + From 9ac780bef1c6813f43855f7d9d7d33a733876c45 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Wed, 19 Feb 2025 16:30:50 -0600 Subject: [PATCH 14/54] refactors async --- cmake/templates/index.html.in | 2 +- examples/hello_world/run.cpp | 10 +- gpu.hpp | 838 ++++++++++++++++++++++------------ numeric_types/half.cpp | 15 +- 4 files changed, 550 insertions(+), 315 deletions(-) diff --git a/cmake/templates/index.html.in b/cmake/templates/index.html.in index b6f130c..6b5957b 100644 --- a/cmake/templates/index.html.in +++ b/cmake/templates/index.html.in @@ -12,7 +12,7 @@ if (typeof Module !== 'undefined') { Module.onRuntimeInitialized = function() { // Optionally, pass arguments to main in an array. - Module._main([]); + Module.ccall('main', 'number', [], [], { async: true }); }; } else { console.error('Module is undefined. Check that your generated JS file is loaded properly.'); diff --git a/examples/hello_world/run.cpp b/examples/hello_world/run.cpp index 7453869..06970a7 100644 --- a/examples/hello_world/run.cpp +++ b/examples/hello_world/run.cpp @@ -38,12 +38,14 @@ int main(int argc, char **argv) { Tensor output = createTensor(ctx, Shape{N}, kf32); std::promise promise; std::future future = promise.get_future(); - Kernel op = createKernel(ctx, {kGelu, 256, kf32}, + std::future kernelFuture = createKernel(ctx, {kGelu, 256, kf32}, Bindings{input, output}, {cdiv(N, 256), 1, 1}); - dispatchKernel(ctx, op, promise); - wait(ctx, future); - toCPU(ctx, output, outputArr.data(), sizeof(outputArr)); + Kernel op = waitForFuture(ctx.instance, kernelFuture); + std::future dispatchFuture = dispatchKernel(ctx, op); + waitForFuture(ctx.instance, dispatchFuture); + std::future cpuFuture = toCPU(ctx, output, outputArr.data(), sizeof(outputArr)); + waitForFuture(ctx.instance, cpuFuture); for (int i = 0; i < 12; ++i) { printf(" gelu(%.2f) = %.2f\n", inputArr[i], outputArr[i]); } diff --git a/gpu.hpp b/gpu.hpp index edc8b38..052c674 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -1,6 +1,7 @@ #ifndef GPU_HPP #define GPU_HPP +#include "webgpu.h" #include #include #include @@ -15,15 +16,15 @@ #include // std::pair #include -#include "webgpu.h" +#ifndef __EMSCRIPTEN__ -#include "numeric_types/half.hpp" -#include "utils/logging.hpp" - -#ifdef __EMSCRIPTEN__ +#else #include "emscripten/emscripten.h" #endif +#include "numeric_types/half.hpp" +#include "utils/logging.hpp" + #ifdef USE_DAWN_API #include "dawn/native/DawnNative.h" #endif @@ -430,8 +431,8 @@ struct CallbackData { WGPUBuffer buffer; // managed by owning Kernel size_t bufferSize; void *output; // non-owning, only for target memory in toCPU, not used for - // kernel invocations - std::promise *promise; + // kernel invocations + std::shared_ptr> promise; std::future *future; }; @@ -530,32 +531,27 @@ struct Context { // Default constructor Context() = default; - Context(Context&& other) noexcept - : instance(other.instance), - adapter(other.adapter), - device(other.device), + Context(Context &&other) noexcept + : instance(other.instance), adapter(other.adapter), device(other.device), queue(other.queue), // Re‐initialize pools to point to *this*: - pool(this), - kernelPool(this), - adapterStatus(other.adapterStatus), - deviceStatus(other.deviceStatus) - { + pool(this), kernelPool(this), adapterStatus(other.adapterStatus), + deviceStatus(other.deviceStatus) { LOG(kDefLog, kTrace, "Moving Context ownership"); // Move over the resources in the pools: - pool.data = std::move(other.pool.data); + pool.data = std::move(other.pool.data); kernelPool.data = std::move(other.kernelPool.data); // Null out handles in the source so its destructor won't release them. other.instance = nullptr; - other.adapter = nullptr; - other.device = nullptr; - other.queue = nullptr; + other.adapter = nullptr; + other.device = nullptr; + other.queue = nullptr; // other.adapterStatus = 0; // other.deviceStatus = 0; } - Context& operator=(Context&& other) noexcept { + Context &operator=(Context &&other) noexcept { if (this != &other) { // Free any existing resources. In most cases, this should be a no-op // since we typically shouldn't have two active initialized Context @@ -625,7 +621,7 @@ inline Tensor createTensor(TensorPool &pool, WGPUDevice &device, size_t numElements = size(shape); size_t size = sizeBytes(dtype) * numElements; WGPUBufferDescriptor bufferDesc = { - .label = {.data = nullptr, .length = 0}, + .label = {.data = nullptr, .length = 0}, .usage = usage, .size = size, }; @@ -794,6 +790,162 @@ inline void check(bool condition, const char *message, } } +/** + * @brief Pumps events until the provided future is ready. + * + * This helper template function continuously checks the status of the provided std::future + * until it becomes ready. On Emscripten builds, it yields control to the JavaScript event loop + * using emscripten_sleep to allow asynchronous callbacks to execute. On other platforms, it + * processes events from the given WGPUInstance using wgpuInstanceProcessEvents. Once the future + * is ready, its value is returned. + * + * @tparam T The type of the value contained in the future. + * @param instance The WGPUInstance used to process events. + * @param f The future to wait on. + * @return T The value retrieved from the ready future. + * + * @code + * std::future deviceFuture = requestDeviceAsync(adapter, devDescriptor); + * WGPUDevice device = waitForFuture(instance, deviceFuture); + * @endcode + */ +template +T waitForFuture(WGPUInstance instance, std::future &f) { +#ifdef __EMSCRIPTEN__ + // Poll until the future is ready. + while (f.wait_for(std::chrono::milliseconds(0)) != + std::future_status::ready) { + // Yield control to the JS event loop. + emscripten_sleep(1); + } + return f.get(); +#else + while (f.wait_for(std::chrono::milliseconds(0)) != + std::future_status::ready) { + wgpuInstanceProcessEvents(instance); + } + return f.get(); +#endif +} + +// Context Callbacks & Helpers + +/** + * @brief Adapter callback function invoked upon completion of an asynchronous WebGPU adapter request. + * + * This callback is triggered when the request for a WebGPU adapter completes. It verifies whether + * the adapter was successfully obtained. On failure, it logs an error message (in Emscripten builds) + * and sets an exception on the associated promise. On success, it sets the value of the promise with + * the obtained adapter. Finally, it frees the allocated memory for the promise pointer. + * + * @param status The status of the adapter request. Expected to be WGPURequestAdapterStatus_Success on success. + * @param adapter The WGPUAdapter obtained on a successful request. + * @param message A string view containing additional information about the adapter request. + * @param userdata1 A pointer to a heap-allocated std::shared_ptr>. + * @param userdata2 Unused. + */ +inline void adapterCallback(WGPURequestAdapterStatus status, + WGPUAdapter adapter, WGPUStringView message, + void *userdata1, void * /*userdata2*/) { + auto *promisePtr = + reinterpret_cast> *>(userdata1); + if (status != WGPURequestAdapterStatus_Success) { +#ifdef __EMSCRIPTEN__ + LOG(kDefLog, kError, "Could not get WebGPU adapter: %.*s", + static_cast(message.length), message.data); +#endif + (*promisePtr) + ->set_exception(std::make_exception_ptr( + std::runtime_error("Request WebGPU adapter failed"))); + } else { + (*promisePtr)->set_value(adapter); + } + delete promisePtr; +} + +/** + * @brief Callback function invoked upon completion of an asynchronous WebGPU device request. + * + * This callback is triggered when the request for a WebGPU device completes. It verifies that + * the device was successfully created. On success, the callback sets the value of the associated + * promise; otherwise, it sets an exception. After fulfilling the promise, it frees the allocated + * memory for the promise pointer. + * + * @param status The status of the device request. Expected to be WGPURequestDeviceStatus_Success on success. + * @param device The WGPUDevice obtained on successful request. + * @param message A string view containing additional information about the device request. + * @param userdata1 A pointer to a heap-allocated std::shared_ptr>. + * @param userdata2 Unused. + */ +inline void deviceCallback(WGPURequestDeviceStatus status, WGPUDevice device, + WGPUStringView message, void *userdata1, + void * /*userdata2*/) { + auto *promisePtr = + reinterpret_cast> *>(userdata1); + if (status != WGPURequestDeviceStatus_Success) { + (*promisePtr) + ->set_exception(std::make_exception_ptr( + std::runtime_error("Request WebGPU device failed"))); + } else { + LOG(kDefLog, kTrace, "Device Request succeeded %p", + static_cast(device)); + (*promisePtr)->set_value(device); + } + delete promisePtr; +} + +/** + * @brief Asynchronously requests a WebGPU adapter from the given instance. + * + * This helper function wraps the asynchronous call to request an adapter using the WebGPU API. + * It sets up a promise and registers an adapter callback, returning a future that will eventually + * hold the requested WGPUAdapter. + * + * @param instance The WGPUInstance from which to request the adapter. + * @param adapterOpts The options for requesting the adapter. + * @return std::future A future that will eventually hold the created WGPUAdapter. + */ +inline std::future +requestAdapterAsync(WGPUInstance instance, + const WGPURequestAdapterOptions &adapterOpts) { + auto promise = std::make_shared>(); + auto *promisePtr = new std::shared_ptr>(promise); + + WGPURequestAdapterCallbackInfo callbackInfo{ + .mode = WGPUCallbackMode_AllowSpontaneous, + .callback = adapterCallback, + .userdata1 = promisePtr, + .userdata2 = nullptr}; + wgpuInstanceRequestAdapter(instance, &adapterOpts, callbackInfo); + return promise->get_future(); +} + +/** + * @brief Asynchronously requests a WebGPU device from a given adapter. + * + * This helper function wraps the asynchronous call to request a device using the WebGPU API. + * It sets up a promise and registers a device callback, returning a future that will be fulfilled + * once the device is available. + * + * @param adapter The WGPUAdapter to request the device from. + * @param devDescriptor The descriptor specifying the characteristics of the requested device. + * @return std::future A future that will eventually hold the created WGPUDevice. + */ +inline std::future +requestDeviceAsync(WGPUAdapter adapter, + const WGPUDeviceDescriptor &devDescriptor) { + auto promise = std::make_shared>(); + auto *promisePtr = new std::shared_ptr>(promise); + + WGPURequestDeviceCallbackInfo deviceCallbackInfo{ + .mode = WGPUCallbackMode_AllowSpontaneous, + .callback = deviceCallback, + .userdata1 = promisePtr, + .userdata2 = nullptr}; + wgpuAdapterRequestDevice(adapter, &devDescriptor, deviceCallbackInfo); + return promise->get_future(); +} + /** * @brief Factory function to create a GPU context, which aggregates WebGPU API * handles to interact with the GPU including the instance, adapter, device, and @@ -812,12 +964,10 @@ inline void check(bool condition, const char *message, * @return Context instance representing the created GPU context * */ -inline Context createContext( - const WGPUInstanceDescriptor &desc = {}, - const WGPURequestAdapterOptions &adapterOpts = {}, - const WGPUDeviceDescriptor &devDescriptor = {}) -{ - Context ctx; // stack-allocated +inline Context createContext(const WGPUInstanceDescriptor &desc = {}, + const WGPURequestAdapterOptions &adapterOpts = {}, + const WGPUDeviceDescriptor &devDescriptor = {}) { + Context ctx; // Stack-allocated Context. #ifdef __EMSCRIPTEN__ ctx.instance = wgpuCreateInstance(nullptr); @@ -826,115 +976,50 @@ inline Context createContext( #endif check(ctx.instance, "Initialize WebGPU", __FILE__, __LINE__); + // Request the adapter asynchronously. LOG(kDefLog, kTrace, "Requesting adapter"); - { - struct AdapterData { - WGPUAdapter adapter = nullptr; - bool requestEnded = false; - WGPURequestAdapterStatus status; - }; - AdapterData adapterData; - - auto onAdapterRequestEnded = [](WGPURequestAdapterStatus status, - WGPUAdapter adapter, - WGPUStringView message, - void *pUserData, void *) { - auto &ad = *reinterpret_cast(pUserData); - ad.status = status; -#ifdef __EMSCRIPTEN__ - if (status != WGPURequestAdapterStatus_Success) { - LOG(kDefLog, kError, "Could not get WebGPU adapter: %.*s", - static_cast(message.length), message.data); - } -#endif - check(status == WGPURequestAdapterStatus_Success, - "Request WebGPU adapter", __FILE__, __LINE__); - ad.adapter = adapter; - ad.requestEnded = true; - }; - - WGPURequestAdapterCallbackInfo callbackInfo { - .mode = WGPUCallbackMode_AllowSpontaneous, - .callback = onAdapterRequestEnded, - .userdata1 = &adapterData, - .userdata2 = nullptr - }; - wgpuInstanceRequestAdapter(ctx.instance, &adapterOpts, callbackInfo); - - while (!adapterData.requestEnded) { - processEvents(ctx.instance); - } - ctx.adapter = adapterData.adapter; - ctx.adapterStatus = adapterData.status; + try { + auto adapterFuture = requestAdapterAsync(ctx.instance, adapterOpts); + // Pump events until the adapter future is ready. + ctx.adapter = waitForFuture(ctx.instance, adapterFuture); + ctx.adapterStatus = WGPURequestAdapterStatus_Success; + } catch (const std::exception &ex) { + check(false, ex.what(), __FILE__, __LINE__); } + // Request the device asynchronously. LOG(kDefLog, kTrace, "Requesting device"); - { - struct DeviceData { - WGPUDevice device = nullptr; - bool requestEnded = false; - WGPURequestDeviceStatus status; - }; - DeviceData devData; - - auto onDeviceRequestEnded = [](WGPURequestDeviceStatus status, - WGPUDevice device, - WGPUStringView message, - void *pUserData, void *) { - auto &dd = *reinterpret_cast(pUserData); - dd.status = status; - check(status == WGPURequestDeviceStatus_Success, - "Could not get WebGPU device.", __FILE__, __LINE__); - LOG(kDefLog, kTrace, "Device Request succeeded %p", - static_cast(device)); - dd.device = device; - dd.requestEnded= true; - }; - - WGPURequestDeviceCallbackInfo deviceCallbackInfo { - .mode = WGPUCallbackMode_AllowSpontaneous, - .callback = onDeviceRequestEnded, - .userdata1= &devData, - .userdata2= nullptr - }; - wgpuAdapterRequestDevice(ctx.adapter, &devDescriptor, deviceCallbackInfo); - - LOG(kDefLog, kTrace, "Waiting for device request to end"); - while (!devData.requestEnded) { - processEvents(ctx.instance); - } + try { + auto deviceFuture = requestDeviceAsync(ctx.adapter, devDescriptor); + // Pump events until the device future is ready. + ctx.device = waitForFuture(ctx.instance, deviceFuture); + ctx.deviceStatus = WGPURequestDeviceStatus_Success; LOG(kDefLog, kTrace, "Device request ended"); - ctx.device = devData.device; - ctx.deviceStatus = devData.status; - - // If the device was created, set up logging and fetch the queue - if (devData.status == WGPURequestDeviceStatus_Success) { - #ifndef __EMSCRIPTEN__ - WGPULoggingCallbackInfo loggingCallbackInfo { + // If the device was created, set up logging and fetch the queue. +#ifndef __EMSCRIPTEN__ + WGPULoggingCallbackInfo loggingCallbackInfo{ .nextInChain = nullptr, .callback = - [](WGPULoggingType type, WGPUStringView message, - void *, void *) { - LOG(kDefLog, kError, "Device logging callback: %.*s", - static_cast(message.length), message.data); - if (type == WGPULoggingType_Error) { - throw std::runtime_error("Device error logged."); - } - }, + [](WGPULoggingType type, WGPUStringView message, void *, void *) { + LOG(kDefLog, kError, "Device logging callback: %.*s", + static_cast(message.length), message.data); + if (type == WGPULoggingType_Error) { + throw std::runtime_error("Device error logged."); + } + }, .userdata1 = nullptr, - .userdata2 = nullptr - }; - wgpuDeviceSetLoggingCallback(ctx.device, loggingCallbackInfo); - #endif - ctx.queue = wgpuDeviceGetQueue(ctx.device); - } + .userdata2 = nullptr}; + wgpuDeviceSetLoggingCallback(ctx.device, loggingCallbackInfo); +#endif + ctx.queue = wgpuDeviceGetQueue(ctx.device); + } catch (const std::exception &ex) { + check(false, ex.what(), __FILE__, __LINE__); } return std::move(ctx); } - #ifdef USE_DAWN_API /** * @brief Factory function to create a GPU context, which aggregates WebGPU API @@ -1066,11 +1151,76 @@ createContextByGpuIdx(int gpuIdx, const WGPUInstanceDescriptor &desc = {}, } #endif -inline void wait(Context &ctx, std::future &future) { - while (future.wait_for(std::chrono::seconds(0)) != - std::future_status::ready) { - processEvents(ctx.instance); - } +/** + * @brief Callback function invoked upon completion of an asynchronous GPU buffer mapping. + * + * This callback is triggered when the GPU buffer mapping for a readback buffer is completed. + * It verifies that the mapping operation was successful, retrieves the mapped memory, + * copies the data from the GPU buffer to a CPU memory region, unmaps the buffer, + * signals the completion by fulfilling the associated promise, and cleans up the allocated callback data. + * + * @param status The mapping status. Expected to be WGPUMapAsyncStatus_Success on success. + * @param message A string view containing additional information about the mapping operation. + * @param userdata1 A pointer to a heap-allocated CallbackData structure containing the GPU buffer, + * buffer size, destination CPU memory pointer, and a promise for signaling completion. + * @param userdata2 Unused. + */ +inline void bufferMapCallback(WGPUMapAsyncStatus status, WGPUStringView message, + void *userdata1, void * /*userdata2*/) { + CallbackData *cbData = reinterpret_cast(userdata1); + // Check that mapping succeeded. + check(status == WGPUMapAsyncStatus_Success, "Map readbackBuffer", __FILE__, + __LINE__); + + // Get the mapped memory. + const void *mappedData = + wgpuBufferGetConstMappedRange(cbData->buffer, 0, cbData->bufferSize); + check(mappedData, "Get mapped range", __FILE__, __LINE__); + + // Copy the data from the mapped GPU buffer to the CPU memory. + memcpy(cbData->output, mappedData, cbData->bufferSize); + + // Unmap the buffer. + wgpuBufferUnmap(cbData->buffer); + + // Signal that the copy has completed. + // Ensure you use the arrow operator on the shared_ptr to call set_value(). + cbData->promise->set_value(); + + // Clean up the dynamically allocated callback data. + delete cbData; +} + +/** + * @brief Callback function invoked when the GPU queue’s submitted work is complete. + * + * This callback is registered with the GPU queue after submitting work. When invoked, + * it verifies that all queued work completed successfully, and then sets up the buffer + * mapping callback to initiate the asynchronous mapping of a readback buffer. The readback + * buffer is mapped to access the processed data on the CPU. + * + * @param status The status of the completed work. Expected to be WGPUQueueWorkDoneStatus_Success on success. + * @param userdata1 A pointer to a heap-allocated CallbackData structure containing the readback buffer, + * buffer size, destination CPU memory pointer, and a promise to signal completion. + * @param userdata2 Unused. + */ +inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, + void *userdata1, void * /*userdata2*/) { + CallbackData *cbData = reinterpret_cast(userdata1); + // Ensure the queue work finished successfully. + check(status == WGPUQueueWorkDoneStatus_Success, "Queue work done", __FILE__, + __LINE__); + + // Set up the buffer mapping callback information. + WGPUBufferMapCallbackInfo mapCallbackInfo; + mapCallbackInfo.mode = WGPUCallbackMode_AllowSpontaneous; + mapCallbackInfo.callback = bufferMapCallback; + mapCallbackInfo.userdata1 = cbData; + mapCallbackInfo.userdata2 = nullptr; + + // Begin the asynchronous mapping of the readback buffer. + wgpuBufferMapAsync(cbData->buffer, WGPUMapMode_Read, 0, cbData->bufferSize, + mapCallbackInfo); } /** @@ -1085,45 +1235,35 @@ inline void wait(Context &ctx, std::future &future) { * toCPU(ctx, tensor, data, bufferSize); * @endcode */ -inline void toCPU(Context &ctx, Tensor &tensor, void *data, size_t bufferSize, - CopyData &op) { +inline std::future toCPU(Context &ctx, Tensor &tensor, void *data, + size_t bufferSize, CopyData &op) { + // Submit the command buffer and release it. wgpuQueueSubmit(ctx.queue, 1, &op.commandBuffer); wgpuCommandBufferRelease(op.commandBuffer); - CallbackData callbackData = {op.readbackBuffer, bufferSize, data, &op.promise, - &op.future}; - WGPUQueueWorkDoneCallbackInfo workDoneCallbackInfo = { - .mode = WGPUCallbackMode_AllowSpontaneous, - .callback = - [](WGPUQueueWorkDoneStatus status, void *userdata1, void *userdata2) { - check(status == WGPUQueueWorkDoneStatus_Success, "Queue work done", - __FILE__, __LINE__); - const auto *data = static_cast(userdata1); - WGPUBufferMapCallbackInfo mapCallbackInfo = { - .mode = WGPUCallbackMode_AllowSpontaneous, - .callback = - [](WGPUMapAsyncStatus status, WGPUStringView message, - void *userdata1, void *userdata2) { - const auto *data = static_cast(userdata1); - check(status == WGPUMapAsyncStatus_Success, - "Map readbackBuffer", __FILE__, __LINE__); - const void *mappedData = wgpuBufferGetConstMappedRange( - data->buffer, /*offset=*/0, data->bufferSize); - check(mappedData, "Get mapped range", __FILE__, __LINE__); - memcpy(data->output, mappedData, data->bufferSize); - wgpuBufferUnmap(data->buffer); - data->promise->set_value(); - }, - .userdata1 = const_cast(data), - .userdata2 = nullptr}; - wgpuBufferMapAsync(data->buffer, WGPUMapMode_Read, 0, - data->bufferSize, mapCallbackInfo); - }, - .userdata1 = &callbackData, - .userdata2 = nullptr}; + // Create a promise and get its future. + auto promise = std::make_shared>(); + + // Allocate callback data so it remains valid until the async + // chain finishes. + CallbackData *cbData = new CallbackData{ + op.readbackBuffer, // The GPU buffer to be read back. + bufferSize, + data, // CPU memory destination. + promise // The promise to be signaled. + }; + + // Set up the work-done callback to initiate the buffer mapping. + WGPUQueueWorkDoneCallbackInfo workDoneCallbackInfo; + workDoneCallbackInfo.mode = WGPUCallbackMode_AllowSpontaneous; + workDoneCallbackInfo.callback = queueWorkDoneCallback; + workDoneCallbackInfo.userdata1 = cbData; // Pass the callback data. + workDoneCallbackInfo.userdata2 = nullptr; + + // Begin the asynchronous chain by registering the queue work-done callback. wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); - wait(ctx, op.future); + return promise->get_future(); } /** @@ -1141,31 +1281,59 @@ inline void toCPU(Context &ctx, Tensor &tensor, void *data, size_t bufferSize, * @param[in] bufferSize Size of the data buffer in bytes * @param[out] data Pointer to the CPU memory to copy the data to */ -inline void toCPU(Context &ctx, Tensor &tensor, void *data, size_t bufferSize) { - CopyData op; - op.future = op.promise.get_future(); - { - WGPUBufferDescriptor readbackBufferDescriptor = { - .label = {.data = nullptr, .length = 0}, - .usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead, - .size = bufferSize, - }; - op.readbackBuffer = - wgpuDeviceCreateBuffer(ctx.device, &readbackBufferDescriptor); - } - { - WGPUCommandEncoder commandEncoder; - commandEncoder = wgpuDeviceCreateCommandEncoder(ctx.device, nullptr); - wgpuCommandEncoderCopyBufferToBuffer(commandEncoder, tensor.data.buffer, 0, - op.readbackBuffer, 0, bufferSize); - op.commandBuffer = wgpuCommandEncoderFinish(commandEncoder, nullptr); - wgpuCommandEncoderRelease(commandEncoder); - check(op.commandBuffer, "Create command buffer", __FILE__, __LINE__); - } - toCPU(ctx, tensor, data, bufferSize, op); - if (op.readbackBuffer) { - wgpuBufferRelease(op.readbackBuffer); - } +inline std::future toCPU(Context &ctx, Tensor &tensor, void *data, + size_t bufferSize) { + // Create a promise that will later be satisfied when the async copy + // completes. + auto promise = std::make_shared>(); + + // Create a readback buffer that will be used for copying and mapping. + WGPUBufferDescriptor readbackBufferDescriptor = { + .label = {.data = nullptr, .length = 0}, + .usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead, + .size = bufferSize, + }; + WGPUBuffer readbackBuffer = + wgpuDeviceCreateBuffer(ctx.device, &readbackBufferDescriptor); + + // Create a command encoder and record a copy from the tensor GPU buffer + WGPUCommandEncoder commandEncoder = + wgpuDeviceCreateCommandEncoder(ctx.device, nullptr); + wgpuCommandEncoderCopyBufferToBuffer(commandEncoder, tensor.data.buffer, 0, + readbackBuffer, 0, bufferSize); + // Finish recording by creating a command buffer and release the encoder. + WGPUCommandBuffer commandBuffer = + wgpuCommandEncoderFinish(commandEncoder, nullptr); + wgpuCommandEncoderRelease(commandEncoder); + check(commandBuffer, "Create command buffer", __FILE__, __LINE__); + + // Submit the work to the queue and release the command buffer immediately. + wgpuQueueSubmit(ctx.queue, 1, &commandBuffer); + wgpuCommandBufferRelease(commandBuffer); + + // Allocate callback data + CallbackData *cbData = new CallbackData{ + readbackBuffer, // The readback buffer to map. + bufferSize, // The size of the copy. + data, // CPU memory destination. + promise // The promise to signal when done. + }; + + // Set up the work-done callback. When the queue’s submitted work is + // completed, it is routed to queueWorkDoneCallback which then starts the + // asynchronous map. + WGPUQueueWorkDoneCallbackInfo workDoneCallbackInfo = { + .mode = WGPUCallbackMode_AllowSpontaneous, + .callback = queueWorkDoneCallback, + .userdata1 = cbData, + .userdata2 = nullptr, + }; + + // Register the callback. The async chain continues inside + // queueWorkDoneCallback. + wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); + + return promise->get_future(); } /** @@ -1176,76 +1344,74 @@ inline void toCPU(Context &ctx, Tensor &tensor, void *data, size_t bufferSize) { * @param[out] data Array of floats to copy the data to * * @code - * toCPU(ctx, tensor, data); + * std::future toCPUFuture = toCPU(ctx, tensor, data); + * WaitForFuture(ctx.instance, toCPUFuture); * @endcode */ template -void toCPU(Context &ctx, Tensor &tensor, std::array &data) { - toCPU(ctx, tensor, data.data(), sizeof(data)); +inline std::future toCPU(Context &ctx, Tensor &tensor, + std::array &data) { + return toCPU(ctx, tensor, data.data(), sizeof(data)); } -inline void toCPU(Context &ctx, WGPUBuffer buffer, void *data, size_t size) { +inline std::future toCPU(Context &ctx, WGPUBuffer buffer, void *data, + size_t size) { + // The size (in bytes) for the copy. uint64_t bufferSize = size; + + // Create an operation structure (here we reuse CopyData solely for its + // members that we need to create a readback buffer and command buffer). CopyData op; - op.future = op.promise.get_future(); + + // Create the promise that will be fulfilled once the copy is done. + auto promise = std::make_shared>(); + + // Create a readback buffer that we can map for reading. { WGPUBufferDescriptor readbackBufferDescriptor = { - .label = {.data = nullptr, .length = 0}, + .label = {.data = nullptr, .length = 0}, .usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead, .size = bufferSize, }; op.readbackBuffer = wgpuDeviceCreateBuffer(ctx.device, &readbackBufferDescriptor); } + + // Create a command encoder which copies from the provided buffer to the + // readback buffer. { - WGPUCommandEncoder commandEncoder; - commandEncoder = wgpuDeviceCreateCommandEncoder(ctx.device, nullptr); + WGPUCommandEncoder commandEncoder = + wgpuDeviceCreateCommandEncoder(ctx.device, nullptr); wgpuCommandEncoderCopyBufferToBuffer(commandEncoder, buffer, 0, op.readbackBuffer, 0, bufferSize); op.commandBuffer = wgpuCommandEncoderFinish(commandEncoder, nullptr); wgpuCommandEncoderRelease(commandEncoder); check(op.commandBuffer, "Create command buffer", __FILE__, __LINE__); } + + // Submit the command and release the command buffer. wgpuQueueSubmit(ctx.queue, 1, &op.commandBuffer); wgpuCommandBufferRelease(op.commandBuffer); - CallbackData callbackData = {op.readbackBuffer, static_cast(bufferSize), data, &op.promise, - &op.future}; + // Allocate callback data + CallbackData *cbData = new CallbackData{ + op.readbackBuffer, // The readback buffer created above. + static_cast(bufferSize), // Size of the copy. + data, // Destination CPU memory. + promise // Our promise to satisfy when done. + }; + + // Set up the queue work-done callback info. WGPUQueueWorkDoneCallbackInfo workDoneCallbackInfo = { .mode = WGPUCallbackMode_AllowSpontaneous, - .callback = - [](WGPUQueueWorkDoneStatus status, void *userdata1, void *userdata2) { - check(status == WGPUQueueWorkDoneStatus_Success, "Queue work done", - __FILE__, __LINE__); - const auto *data = static_cast(userdata1); - WGPUBufferMapCallbackInfo mapCallbackInfo = { - .mode = WGPUCallbackMode_AllowSpontaneous, - .callback = - [](WGPUMapAsyncStatus status, WGPUStringView message, - void *userdata1, void *userdata2) { - const auto *data = static_cast(userdata1); - check(status == WGPUMapAsyncStatus_Success, - "Map readbackBuffer", __FILE__, __LINE__); - const void *mappedData = wgpuBufferGetConstMappedRange( - data->buffer, /*offset=*/0, data->bufferSize); - check(mappedData, "Get mapped range", __FILE__, __LINE__); - memcpy(data->output, mappedData, data->bufferSize); - wgpuBufferUnmap(data->buffer); - data->promise->set_value(); - }, - .userdata1 = const_cast(data), - .userdata2 = nullptr}; - wgpuBufferMapAsync(data->buffer, WGPUMapMode_Read, 0, - data->bufferSize, mapCallbackInfo); - }, - .userdata1 = &callbackData, + .callback = queueWorkDoneCallback, // Our free function callback. + .userdata1 = cbData, // Pass the callback data pointer. .userdata2 = nullptr}; + + // Start the asynchronous chain by registering the work-done callback. wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); - wait(ctx, op.future); - if (op.readbackBuffer) { - wgpuBufferRelease(op.readbackBuffer); - } + return promise->get_future(); } /** @@ -1376,6 +1542,19 @@ inline Shape cdiv(Shape total, Shape group) { return result; } +/** + * @brief Packages the shader compilation information along with a promise for asynchronous signaling. + * + * This structure holds a pointer to a CompilationInfo instance that collects + * details such as status, messages, line numbers, and positions from the shader compilation. + * It also contains a shared pointer to a std::promise which is used to signal the completion + * of the asynchronous shader compilation process. + */ +struct CompData { + CompilationInfo *compInfo; + std::shared_ptr> compPromise; +}; + /** * @brief A factory function to create a kernel on the GPU. The kernel is * created with the given WGSL code, input tensors, output tensor, and @@ -1399,34 +1578,38 @@ inline Shape cdiv(Shape total, Shape group) { * @return Kernel instance representing the created kernel * * @code - * Kernel kernel = createKernel(ctx, code, dataBindings, numInputs, + * std::future kernelFuture = createKernel(ctx, code, dataBindings, numInputs, output, nThreads, params, paramsSize); + * Kernel kernel = WaitForFuture(ctx.instance, kernelFuture); * @endcode - * output, nThreads, params, paramsSize); + */ -inline Kernel createKernel(Context& ctx, const KernelCode &code, - const Tensor *dataBindings, size_t numTensors, - const size_t *viewOffsets, - const Shape &totalWorkgroups, - const void *params = nullptr, size_t paramsSize = 0, - CompilationInfo *compilationInfo = nullptr, - const char *cacheKey = nullptr) { +inline std::future +createKernel(Context &ctx, const KernelCode &code, const Tensor *dataBindings, + size_t numTensors, const size_t *viewOffsets, + const Shape &totalWorkgroups, const void *params = nullptr, + size_t paramsSize = 0, CompilationInfo *compilationInfo = nullptr, + const char *cacheKey = nullptr) { // Create a cache key by the pointer values of the data bindings and the // kernel code if (cacheKey != nullptr && ctx.kernelPool.data.find(cacheKey) != ctx.kernelPool.data.end()) { - LOG(kDefLog, kInfo, "Kernel cache hit"); - return ctx.kernelPool.data[cacheKey]; + std::promise ready; + ready.set_value(ctx.kernelPool.data[cacheKey]); + return ready.get_future(); } + // Create an outer promise for the new kernel. + std::promise outerPromise; + std::future outerFuture = outerPromise.get_future(); + assert(totalWorkgroups.rank == 3); WGPUDevice device = ctx.device; WGPUQueue queue = ctx.queue; Kernel op(new RawKernel()); - // paramIndex is the index into bgLayoutEntries for the parameters buffer If // there are no parameters for the kernel, paramsSize == 0 and paramIndex is // effectively undefined (== -1) - size_t paramIndex = -1; + size_t paramIndex = static_cast(-1); // Note: paramIndex is undefined unless paramsSize > 0 size_t numBindings = numTensors; if (paramsSize > 0) { @@ -1435,11 +1618,13 @@ inline Kernel createKernel(Context& ctx, const KernelCode &code, // op.buffers, op.bufferSizes and // bgLayoutEntries } + op->buffers = std::make_unique(numBindings); op->bufferSizes = std::make_unique(numBindings); op->numBindings = numBindings; - std::vector bgLayoutEntries(numBindings); + // Create layout entries for input buffers + std::vector bgLayoutEntries(numBindings); for (size_t i = 0; i < numTensors; ++i) { bgLayoutEntries[i] = WGPUBindGroupLayoutEntry{ .binding = static_cast(i), @@ -1452,8 +1637,6 @@ inline Kernel createKernel(Context& ctx, const KernelCode &code, }; } if (paramsSize > 0) { - LOG(kDefLog, kInfo, "Create layout entry for the params buffer"); - // Create layout entry for the params buffer bgLayoutEntries[paramIndex] = WGPUBindGroupLayoutEntry{ .binding = static_cast(paramIndex), .visibility = WGPUShaderStage_Compute, @@ -1466,10 +1649,11 @@ inline Kernel createKernel(Context& ctx, const KernelCode &code, } WGPUBindGroupLayoutDescriptor bgLayoutDesc = { .entryCount = static_cast(bgLayoutEntries.size()), - .entries = bgLayoutEntries.data(), - }; + .entries = bgLayoutEntries.data()}; WGPUBindGroupLayout bgLayout = wgpuDeviceCreateBindGroupLayout(device, &bgLayoutDesc); + + // Assign buffers from dataBindings. for (size_t i = 0; i < numTensors; ++i) { op->buffers[i] = dataBindings[i].data.buffer; op->bufferSizes[i] = dataBindings[i].data.size; @@ -1477,7 +1661,7 @@ inline Kernel createKernel(Context& ctx, const KernelCode &code, // Create a buffer for the Params struct if (paramsSize > 0) { WGPUBufferDescriptor paramsBufferDesc = { - .label = {.data = nullptr, .length = 0}, + .label = {.data = nullptr, .length = 0}, .usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst, .size = paramsSize, .mappedAtCreation = false, @@ -1489,6 +1673,8 @@ inline Kernel createKernel(Context& ctx, const KernelCode &code, } else { LOG(kDefLog, kTrace, "No params buffer needed"); } + + // Build bind group entries and the bind group. std::vector bindGroupEntries(numBindings); for (size_t i = 0; i < numTensors; ++i) { bindGroupEntries[i] = WGPUBindGroupEntry{ @@ -1516,6 +1702,7 @@ inline Kernel createKernel(Context& ctx, const KernelCode &code, }; op->bindGroup = wgpuDeviceCreateBindGroup(device, &bindGroupDesc); + // Create pipeline layout. WGPUPipelineLayoutDescriptor pipelineLayoutDesc = { .bindGroupLayoutCount = 1, .bindGroupLayouts = &bgLayout, @@ -1523,63 +1710,101 @@ inline Kernel createKernel(Context& ctx, const KernelCode &code, WGPUPipelineLayout pipelineLayout = wgpuDeviceCreatePipelineLayout(device, &pipelineLayoutDesc); + // Prepare the WGSL source and shader module descriptor. WGPUShaderSourceWGSL wgslDesc = { .chain = {.sType = WGPUSType_ShaderSourceWGSL}, .code = {.data = code.data.c_str(), .length = code.data.length()}}; - WGPUShaderModuleDescriptor shaderModuleDesc = {}; shaderModuleDesc.nextInChain = &wgslDesc.chain; shaderModuleDesc.label = {code.label.c_str(), code.label.length()}; - WGPUComputePipelineDescriptor computePipelineDesc = {}; - computePipelineDesc.layout = pipelineLayout; - computePipelineDesc.compute.module = + // Create the shader module. + WGPUShaderModule shaderModule = wgpuDeviceCreateShaderModule(device, &shaderModuleDesc); + // If compilation info is requested, register the callback immediately. + if (compilationInfo) { + auto compPromise = std::make_shared>(); + std::future compFuture = compPromise->get_future(); + // Allocate helper data to pass to the callback. + auto *compData = new CompData{compilationInfo, compPromise}; + + auto compilationCallback = [](WGPUCompilationInfoRequestStatus status, + WGPUCompilationInfo const *info, + void *userdata1, void * /*userdata2*/) { + CompData *cd = reinterpret_cast(userdata1); + if (info && cd->compInfo) { + cd->compInfo->status = status; + for (uint32_t i = 0; i < info->messageCount; ++i) { + cd->compInfo->messages.push_back( + std::string(info->messages[i].message.data, + info->messages[i].message.length)); + cd->compInfo->lineNums.push_back(info->messages[i].lineNum); + cd->compInfo->linePos.push_back(info->messages[i].linePos); + } + cd->compInfo->finished = true; + } + cd->compPromise->set_value(); + delete cd; + }; + + WGPUCompilationInfoCallbackInfo compilationCallbackInfo = {}; + compilationCallbackInfo.mode = WGPUCallbackMode_AllowSpontaneous; + compilationCallbackInfo.callback = compilationCallback; + compilationCallbackInfo.userdata1 = compData; + compilationCallbackInfo.userdata2 = nullptr; + + // Register callback and then wait for the result. + wgpuShaderModuleGetCompilationInfo(shaderModule, compilationCallbackInfo); + waitForFuture(ctx.instance, compFuture); + } + + // Now create the compute pipeline using the shader module. + WGPUComputePipelineDescriptor computePipelineDesc = {}; + computePipelineDesc.layout = pipelineLayout; + computePipelineDesc.compute.module = shaderModule; computePipelineDesc.compute.entryPoint = {code.entryPoint.c_str(), code.entryPoint.length()}; computePipelineDesc.label = {code.label.c_str(), code.label.length()}; - op->computePipeline = wgpuDeviceCreateComputePipeline(device, &computePipelineDesc); + op->totalWorkgroups = {totalWorkgroups[0], totalWorkgroups[1], totalWorkgroups[2]}; + resetCommandBuffer(device, op); if (cacheKey != nullptr) ctx.kernelPool.data[cacheKey] = op; - auto compilationInfoCallback = [](WGPUCompilationInfoRequestStatus status, - WGPUCompilationInfo const *compilationInfo, - void *userdata1, void *userdata2) { - CompilationInfo *result = static_cast(userdata1); - if (compilationInfo && result) { - result->status = status; - for (uint32_t i = 0; i < compilationInfo->messageCount; ++i) { - printf("Message %d: %.*s\n", i, - static_cast(compilationInfo->messages[i].message.length), - compilationInfo->messages[i].message.data); - result->messages.push_back( - std::string(compilationInfo->messages[i].message.data, - compilationInfo->messages[i].message.length)); - result->lineNums.push_back(compilationInfo->messages[i].lineNum); - result->linePos.push_back(compilationInfo->messages[i].linePos); - } - result->finished = true; - } else { - LOG(kDefLog, kTrace, "No compilation info or result"); - } - }; - - WGPUCompilationInfoCallbackInfo compilationCallbackInfo = { - .mode = WGPUCallbackMode_AllowSpontaneous, - .callback = compilationInfoCallback, - .userdata1 = static_cast(compilationInfo), - .userdata2 = nullptr}; + outerPromise.set_value(op); + return outerFuture; +} - while (compilationInfo && !compilationInfo->finished) { - processEvents(ctx.instance); +/** + * @brief Free‑standing callback for dispatchKernel’s asynchronous work‐done. + * + * This callback is invoked when the GPU queue signals the completion of the submitted + * workload for a kernel dispatch. It receives the work-done status and a userdata pointer, + * which is expected to be a heap‑allocated pointer to a std::promise. + * + * On success, the promise is fulfilled by calling set_value(). Otherwise, it is set with an exception. + * After setting the promise state, the allocated memory for the promise is freed. + * + * @param status The status of the work done. Expected to be WGPUQueueWorkDoneStatus_Success on success. + * @param userdata1 A heap allocated pointer to std::promise which is set when the work is done. + * @param userdata2 Unused. + */ +inline void dispatchKernelCallback(WGPUQueueWorkDoneStatus status, + void *userdata1, void * /*userdata2*/) { + // Cast the userdata pointer back to our heap‑allocated promise. + auto *p = reinterpret_cast *>(userdata1); + if (status == WGPUQueueWorkDoneStatus_Success) { + p->set_value(); + } else { + p->set_exception(std::make_exception_ptr( + std::runtime_error("Queue work did not complete successfully."))); } - return op; + delete p; // free the heap allocation } /** @@ -1599,17 +1824,17 @@ inline Kernel createKernel(Context& ctx, const KernelCode &code, * @return Kernel instance representing the created kernel * * @code - * Kernel kernel = createKernel(ctx, code, tensorData, output, + * std::future kernelFuture = createKernel(ctx, code, tensorData, output,totalWorkgroups, params); + * Kernel kernel = WaitForFuture(ctx.instance, kernelFuture); * @endcode - * totalWorkgroups, params); */ template -Kernel createKernel(Context &ctx, const KernelCode &code, - const Bindings &dataBindings, - const Shape &totalWorkgroups, - const ParamsType ¶ms = ParamsType{}, - CompilationInfo *compilationInfo = nullptr, - const char *cacheKey = nullptr) { +std::future createKernel(Context &ctx, const KernelCode &code, + const Bindings &dataBindings, + const Shape &totalWorkgroups, + const ParamsType ¶ms = ParamsType{}, + CompilationInfo *compilationInfo = nullptr, + const char *cacheKey = nullptr) { if constexpr (!IsNoParam) { return createKernel(ctx, code, dataBindings.data.data(), numInputs, dataBindings.viewOffsets.data(), totalWorkgroups, @@ -1637,30 +1862,37 @@ Kernel createKernel(Context &ctx, const KernelCode &code, * @param[in] promise Promise to set when the kernel has finished executing * * @code - * dispatchKernel(ctx, kernel); + * std::future dispatchFuture = dispatchKernel(ctx, kernel); + * WaitForFuture(ctx.instance, dispatchFuture); * @endcode */ -inline void dispatchKernel(Context &ctx, Kernel &kernel, - std::promise &promise) { +inline std::future dispatchKernel(Context &ctx, Kernel &kernel) { + // If the kernel was used before, reset the command buffer. if (kernel->used) { resetCommandBuffer(ctx.device, kernel); } + + // Submit the command buffer and release it. wgpuQueueSubmit(ctx.queue, 1, &kernel->commandBuffer); wgpuCommandBufferRelease(kernel->commandBuffer); kernel->used = true; - WGPUQueueWorkDoneCallbackInfo workDoneCallbackInfo = { - .mode = WGPUCallbackMode_AllowSpontaneous, - .callback = - [](WGPUQueueWorkDoneStatus status, void *userdata1, void *userdata2) { - check(status == WGPUQueueWorkDoneStatus_Success, "Queue work done", - __FILE__, __LINE__); - auto *promise = static_cast *>(userdata1); - promise->set_value(); - }, - .userdata1 = &promise, - .userdata2 = nullptr}; + // Allocate a promise on the heap so it remains valid beyond this function’s + // scope. + std::promise *promise = new std::promise(); + std::future future = promise->get_future(); + + // Set up the callback info. + WGPUQueueWorkDoneCallbackInfo workDoneCallbackInfo = {}; + workDoneCallbackInfo.mode = WGPUCallbackMode_AllowSpontaneous; + workDoneCallbackInfo.callback = dispatchKernelCallback; + workDoneCallbackInfo.userdata1 = reinterpret_cast(promise); + workDoneCallbackInfo.userdata2 = nullptr; + + // IMPORTANT: Pass the address of the callback info structure. wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); + + return future; } } // namespace gpu diff --git a/numeric_types/half.cpp b/numeric_types/half.cpp index fe5aab7..75d9dc4 100644 --- a/numeric_types/half.cpp +++ b/numeric_types/half.cpp @@ -189,7 +189,8 @@ void testContainers() { std::array h = {1.0f, 0.5f, 2.0f, 3.14f, 1.0, 2.0, 3.0, 4.0}; Tensor devH = createTensor(ctx, {h.size()}, kf16, h.data()); std::array h2; - toCPU(ctx, devH, h2.data(), sizeof(h2)); + std::future toCPUFuture = toCPU(ctx, devH, h2.data(), sizeof(h2)); + waitForFuture(ctx.instance, toCPUFuture); for (int i = 0; i < 8; ++i) { printResult(h[i].data == h2[i].data, "Container round trip", static_cast(h[i]), static_cast(h2[i])); @@ -228,13 +229,13 @@ fn main( } Tensor input = createTensor(ctx, Shape{N}, kf16, inputArr.data()); Tensor output = createTensor(ctx, Shape{N}, kf16); - std::promise promise; - std::future future = promise.get_future(); - Kernel op = createKernel(ctx, {kGelu, 256, kf16}, Bindings{input, output}, + std::future kernelFuture = createKernel(ctx, {kGelu, 256, kf16}, Bindings{input, output}, {cdiv(N, 256), 1, 1}); - dispatchKernel(ctx, op, promise); - wait(ctx, future); - toCPU(ctx, output, outputArr.data(), sizeof(outputArr)); + Kernel op = waitForFuture(ctx.instance, kernelFuture); + std::future dispatchFuture = dispatchKernel(ctx, op); + waitForFuture(ctx.instance, dispatchFuture); + std::future toCPUFuture = toCPU(ctx, output, outputArr.data(), sizeof(outputArr)); + waitForFuture(ctx.instance, toCPUFuture); for (int i = 0; i < 12; ++i) { printf(" gelu(%.2f) = %.2f\n", static_cast(inputArr[i]), static_cast(outputArr[i])); From 14e7ab59a67329573bc69a7dfce5d431ba8777b3 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Wed, 19 Feb 2025 18:06:26 -0600 Subject: [PATCH 15/54] use async context waitForContext() --- cmake/example.cmake | 4 +- examples/hello_world/run.cpp | 4 +- gpu.hpp | 279 ++++++++++++++++++++++------------- numeric_types/half.cpp | 5 +- 4 files changed, 180 insertions(+), 112 deletions(-) diff --git a/cmake/example.cmake b/cmake/example.cmake index 7cf1f8d..5953876 100644 --- a/cmake/example.cmake +++ b/cmake/example.cmake @@ -45,14 +45,16 @@ if(EMSCRIPTEN) # Set Emscripten-specific link flags that enable WASM output and expose certain symbols. # Needed to use updated version, emdawnwebgpu set_target_properties(${PROJECT_NAME} PROPERTIES LINK_FLAGS "\ + -O3 \ -sUSE_WEBGPU=0 \ -sWASM=1 \ -DDAWN_EMSCRIPTEN_TOOLCHAIN=${EMSCRIPTEN_DIR} \ -sEXPORTED_FUNCTIONS=_main,_malloc,_free,_memcpy \ -sEXPORTED_RUNTIME_METHODS=ccall \ -sUSE_GLFW=3 \ - -sALLOW_MEMORY_GROWTH=1 -sSTACK_SIZE=5MB \ + -sALLOW_MEMORY_GROWTH=1 -sSTACK_SIZE=15MB \ -sASYNCIFY \ + -sASYNCIFY_DEBUG \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_enum_tables.js \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_struct_info.js \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_sig_info.js \ diff --git a/examples/hello_world/run.cpp b/examples/hello_world/run.cpp index 06970a7..c9f22c7 100644 --- a/examples/hello_world/run.cpp +++ b/examples/hello_world/run.cpp @@ -28,7 +28,7 @@ int main(int argc, char **argv) { printf("--------------\n\n"); // std::unique_ptr ctx = createContext(); - Context ctx = createContext(); + Context ctx = waitForContext(); static constexpr size_t N = 10000; std::array inputArr, outputArr; for (int i = 0; i < N; ++i) { @@ -36,8 +36,6 @@ int main(int argc, char **argv) { } Tensor input = createTensor(ctx, Shape{N}, kf32, inputArr.data()); Tensor output = createTensor(ctx, Shape{N}, kf32); - std::promise promise; - std::future future = promise.get_future(); std::future kernelFuture = createKernel(ctx, {kGelu, 256, kf32}, Bindings{input, output}, {cdiv(N, 256), 1, 1}); diff --git a/gpu.hpp b/gpu.hpp index 052c674..0119108 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -793,10 +793,11 @@ inline void check(bool condition, const char *message, /** * @brief Pumps events until the provided future is ready. * - * This helper template function continuously checks the status of the provided std::future - * until it becomes ready. On Emscripten builds, it yields control to the JavaScript event loop - * using emscripten_sleep to allow asynchronous callbacks to execute. On other platforms, it - * processes events from the given WGPUInstance using wgpuInstanceProcessEvents. Once the future + * This helper template function continuously checks the status of the provided + * std::future until it becomes ready. On Emscripten builds, it yields + * control to the JavaScript event loop using emscripten_sleep to allow + * asynchronous callbacks to execute. On other platforms, it processes events + * from the given WGPUInstance using wgpuInstanceProcessEvents. Once the future * is ready, its value is returned. * * @tparam T The type of the value contained in the future. @@ -805,8 +806,8 @@ inline void check(bool condition, const char *message, * @return T The value retrieved from the ready future. * * @code - * std::future deviceFuture = requestDeviceAsync(adapter, devDescriptor); - * WGPUDevice device = waitForFuture(instance, deviceFuture); + * std::future deviceFuture = requestDeviceAsync(adapter, + * devDescriptor); WGPUDevice device = waitForFuture(instance, deviceFuture); * @endcode */ template @@ -831,17 +832,56 @@ T waitForFuture(WGPUInstance instance, std::future &f) { // Context Callbacks & Helpers /** - * @brief Adapter callback function invoked upon completion of an asynchronous WebGPU adapter request. + * @brief Waits for the provided std::future to become ready by polling its status. * - * This callback is triggered when the request for a WebGPU adapter completes. It verifies whether - * the adapter was successfully obtained. On failure, it logs an error message (in Emscripten builds) - * and sets an exception on the associated promise. On success, it sets the value of the promise with - * the obtained adapter. Finally, it frees the allocated memory for the promise pointer. + * This helper template function continuously checks the status of the provided std::future until it is ready. + * On Emscripten builds, it yields control to the JavaScript event loop using emscripten_sleep(1) for smooth asynchronous behavior. + * On non-Emscripten platforms, it sleeps for a short duration (10 milliseconds) between checks. + * Once the future is ready, its value is returned. * - * @param status The status of the adapter request. Expected to be WGPURequestAdapterStatus_Success on success. + * @tparam T The type of the value contained in the future. + * @param f The future to wait on. + * @return T The value retrieved from the ready future. + * + * @code + * std::future contextFuture = createContext(); + * Context ctx = waitForContextFuture(contextFuture); + * @endcode + */ +template T waitForContextFuture(std::future &f) { + #ifdef __EMSCRIPTEN__ + while (f.wait_for(std::chrono::milliseconds(0)) != + std::future_status::ready) { + emscripten_sleep(1); // Yield back to the JS event loop. + } + return f.get(); + #else + while (f.wait_for(std::chrono::milliseconds(0)) != + std::future_status::ready) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + return f.get(); + #endif + } + +/** + * @brief Adapter callback function invoked upon completion of an asynchronous + * WebGPU adapter request. + * + * This callback is triggered when the request for a WebGPU adapter completes. + * It verifies whether the adapter was successfully obtained. On failure, it + * logs an error message (in Emscripten builds) and sets an exception on the + * associated promise. On success, it sets the value of the promise with the + * obtained adapter. Finally, it frees the allocated memory for the promise + * pointer. + * + * @param status The status of the adapter request. Expected to be + * WGPURequestAdapterStatus_Success on success. * @param adapter The WGPUAdapter obtained on a successful request. - * @param message A string view containing additional information about the adapter request. - * @param userdata1 A pointer to a heap-allocated std::shared_ptr>. + * @param message A string view containing additional information about the + * adapter request. + * @param userdata1 A pointer to a heap-allocated + * std::shared_ptr>. * @param userdata2 Unused. */ inline void adapterCallback(WGPURequestAdapterStatus status, @@ -864,17 +904,22 @@ inline void adapterCallback(WGPURequestAdapterStatus status, } /** - * @brief Callback function invoked upon completion of an asynchronous WebGPU device request. + * @brief Callback function invoked upon completion of an asynchronous WebGPU + * device request. * - * This callback is triggered when the request for a WebGPU device completes. It verifies that - * the device was successfully created. On success, the callback sets the value of the associated - * promise; otherwise, it sets an exception. After fulfilling the promise, it frees the allocated - * memory for the promise pointer. + * This callback is triggered when the request for a WebGPU device completes. It + * verifies that the device was successfully created. On success, the callback + * sets the value of the associated promise; otherwise, it sets an exception. + * After fulfilling the promise, it frees the allocated memory for the promise + * pointer. * - * @param status The status of the device request. Expected to be WGPURequestDeviceStatus_Success on success. + * @param status The status of the device request. Expected to be + * WGPURequestDeviceStatus_Success on success. * @param device The WGPUDevice obtained on successful request. - * @param message A string view containing additional information about the device request. - * @param userdata1 A pointer to a heap-allocated std::shared_ptr>. + * @param message A string view containing additional information about the + * device request. + * @param userdata1 A pointer to a heap-allocated + * std::shared_ptr>. * @param userdata2 Unused. */ inline void deviceCallback(WGPURequestDeviceStatus status, WGPUDevice device, @@ -897,13 +942,14 @@ inline void deviceCallback(WGPURequestDeviceStatus status, WGPUDevice device, /** * @brief Asynchronously requests a WebGPU adapter from the given instance. * - * This helper function wraps the asynchronous call to request an adapter using the WebGPU API. - * It sets up a promise and registers an adapter callback, returning a future that will eventually - * hold the requested WGPUAdapter. + * This helper function wraps the asynchronous call to request an adapter using + * the WebGPU API. It sets up a promise and registers an adapter callback, + * returning a future that will eventually hold the requested WGPUAdapter. * * @param instance The WGPUInstance from which to request the adapter. * @param adapterOpts The options for requesting the adapter. - * @return std::future A future that will eventually hold the created WGPUAdapter. + * @return std::future A future that will eventually hold the + * created WGPUAdapter. */ inline std::future requestAdapterAsync(WGPUInstance instance, @@ -923,13 +969,15 @@ requestAdapterAsync(WGPUInstance instance, /** * @brief Asynchronously requests a WebGPU device from a given adapter. * - * This helper function wraps the asynchronous call to request a device using the WebGPU API. - * It sets up a promise and registers a device callback, returning a future that will be fulfilled - * once the device is available. + * This helper function wraps the asynchronous call to request a device using + * the WebGPU API. It sets up a promise and registers a device callback, + * returning a future that will be fulfilled once the device is available. * * @param adapter The WGPUAdapter to request the device from. - * @param devDescriptor The descriptor specifying the characteristics of the requested device. - * @return std::future A future that will eventually hold the created WGPUDevice. + * @param devDescriptor The descriptor specifying the characteristics of the + * requested device. + * @return std::future A future that will eventually hold the + * created WGPUDevice. */ inline std::future requestDeviceAsync(WGPUAdapter adapter, @@ -964,60 +1012,62 @@ requestDeviceAsync(WGPUAdapter adapter, * @return Context instance representing the created GPU context * */ -inline Context createContext(const WGPUInstanceDescriptor &desc = {}, - const WGPURequestAdapterOptions &adapterOpts = {}, - const WGPUDeviceDescriptor &devDescriptor = {}) { - Context ctx; // Stack-allocated Context. +inline std::future +createContext(const WGPUInstanceDescriptor &desc = {}, + const WGPURequestAdapterOptions &adapterOpts = {}, + const WGPUDeviceDescriptor &devDescriptor = {}) { -#ifdef __EMSCRIPTEN__ - ctx.instance = wgpuCreateInstance(nullptr); -#else - ctx.instance = wgpuCreateInstance(&desc); -#endif - check(ctx.instance, "Initialize WebGPU", __FILE__, __LINE__); + auto promise = std::make_shared>(); - // Request the adapter asynchronously. - LOG(kDefLog, kTrace, "Requesting adapter"); + // On native platforms, run our context creation in a detached thread. + + Context ctx; + ctx.instance = wgpuCreateInstance(&desc); + if (!ctx.instance) { + promise->set_exception(std::make_exception_ptr( + std::runtime_error("Failed to create WebGPU instance."))); + return promise->get_future(); + } try { auto adapterFuture = requestAdapterAsync(ctx.instance, adapterOpts); - // Pump events until the adapter future is ready. ctx.adapter = waitForFuture(ctx.instance, adapterFuture); ctx.adapterStatus = WGPURequestAdapterStatus_Success; } catch (const std::exception &ex) { - check(false, ex.what(), __FILE__, __LINE__); + promise->set_exception(std::make_exception_ptr(ex)); + return promise->get_future(); } - - // Request the device asynchronously. - LOG(kDefLog, kTrace, "Requesting device"); try { auto deviceFuture = requestDeviceAsync(ctx.adapter, devDescriptor); - // Pump events until the device future is ready. ctx.device = waitForFuture(ctx.instance, deviceFuture); ctx.deviceStatus = WGPURequestDeviceStatus_Success; - LOG(kDefLog, kTrace, "Device request ended"); - - // If the device was created, set up logging and fetch the queue. -#ifndef __EMSCRIPTEN__ - WGPULoggingCallbackInfo loggingCallbackInfo{ - .nextInChain = nullptr, - .callback = - [](WGPULoggingType type, WGPUStringView message, void *, void *) { - LOG(kDefLog, kError, "Device logging callback: %.*s", - static_cast(message.length), message.data); - if (type == WGPULoggingType_Error) { - throw std::runtime_error("Device error logged."); - } - }, - .userdata1 = nullptr, - .userdata2 = nullptr}; - wgpuDeviceSetLoggingCallback(ctx.device, loggingCallbackInfo); -#endif - ctx.queue = wgpuDeviceGetQueue(ctx.device); } catch (const std::exception &ex) { - check(false, ex.what(), __FILE__, __LINE__); + promise->set_exception(std::make_exception_ptr(ex)); + return promise->get_future(); } + ctx.queue = wgpuDeviceGetQueue(ctx.device); + promise->set_value(std::move(ctx)); + + return promise->get_future(); +} - return std::move(ctx); +/** + * @brief Synchronously waits for and returns the created GPU context. + * + * This function invokes the asynchronous createContext() factory function to create a GPU + * context, then waits for its completion using waitForContextFuture. The returned Context + * holds handles to the WebGPU instance, adapter, device, and queue, and is used for subsequent + * GPU operations. + * + * @return Context The fully initialized GPU context. + * + * @code + * Context ctx = waitForContext(); + * // Now ctx can be used for GPU operations. + * @endcode + */ +inline Context waitForContext() { + std::future contextFuture = createContext(); + return waitForContextFuture(contextFuture); } #ifdef USE_DAWN_API @@ -1152,17 +1202,22 @@ createContextByGpuIdx(int gpuIdx, const WGPUInstanceDescriptor &desc = {}, #endif /** - * @brief Callback function invoked upon completion of an asynchronous GPU buffer mapping. - * - * This callback is triggered when the GPU buffer mapping for a readback buffer is completed. - * It verifies that the mapping operation was successful, retrieves the mapped memory, - * copies the data from the GPU buffer to a CPU memory region, unmaps the buffer, - * signals the completion by fulfilling the associated promise, and cleans up the allocated callback data. - * - * @param status The mapping status. Expected to be WGPUMapAsyncStatus_Success on success. - * @param message A string view containing additional information about the mapping operation. - * @param userdata1 A pointer to a heap-allocated CallbackData structure containing the GPU buffer, - * buffer size, destination CPU memory pointer, and a promise for signaling completion. + * @brief Callback function invoked upon completion of an asynchronous GPU + * buffer mapping. + * + * This callback is triggered when the GPU buffer mapping for a readback buffer + * is completed. It verifies that the mapping operation was successful, + * retrieves the mapped memory, copies the data from the GPU buffer to a CPU + * memory region, unmaps the buffer, signals the completion by fulfilling the + * associated promise, and cleans up the allocated callback data. + * + * @param status The mapping status. Expected to be WGPUMapAsyncStatus_Success + * on success. + * @param message A string view containing additional information about the + * mapping operation. + * @param userdata1 A pointer to a heap-allocated CallbackData structure + * containing the GPU buffer, buffer size, destination CPU memory pointer, and a + * promise for signaling completion. * @param userdata2 Unused. */ inline void bufferMapCallback(WGPUMapAsyncStatus status, WGPUStringView message, @@ -1192,16 +1247,20 @@ inline void bufferMapCallback(WGPUMapAsyncStatus status, WGPUStringView message, } /** - * @brief Callback function invoked when the GPU queue’s submitted work is complete. - * - * This callback is registered with the GPU queue after submitting work. When invoked, - * it verifies that all queued work completed successfully, and then sets up the buffer - * mapping callback to initiate the asynchronous mapping of a readback buffer. The readback - * buffer is mapped to access the processed data on the CPU. - * - * @param status The status of the completed work. Expected to be WGPUQueueWorkDoneStatus_Success on success. - * @param userdata1 A pointer to a heap-allocated CallbackData structure containing the readback buffer, - * buffer size, destination CPU memory pointer, and a promise to signal completion. + * @brief Callback function invoked when the GPU queue’s submitted work is + * complete. + * + * This callback is registered with the GPU queue after submitting work. When + * invoked, it verifies that all queued work completed successfully, and then + * sets up the buffer mapping callback to initiate the asynchronous mapping of a + * readback buffer. The readback buffer is mapped to access the processed data + * on the CPU. + * + * @param status The status of the completed work. Expected to be + * WGPUQueueWorkDoneStatus_Success on success. + * @param userdata1 A pointer to a heap-allocated CallbackData structure + * containing the readback buffer, buffer size, destination CPU memory pointer, + * and a promise to signal completion. * @param userdata2 Unused. */ inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, @@ -1543,12 +1602,14 @@ inline Shape cdiv(Shape total, Shape group) { } /** - * @brief Packages the shader compilation information along with a promise for asynchronous signaling. + * @brief Packages the shader compilation information along with a promise for + * asynchronous signaling. * * This structure holds a pointer to a CompilationInfo instance that collects - * details such as status, messages, line numbers, and positions from the shader compilation. - * It also contains a shared pointer to a std::promise which is used to signal the completion - * of the asynchronous shader compilation process. + * details such as status, messages, line numbers, and positions from the shader + * compilation. It also contains a shared pointer to a std::promise which + * is used to signal the completion of the asynchronous shader compilation + * process. */ struct CompData { CompilationInfo *compInfo; @@ -1578,10 +1639,11 @@ struct CompData { * @return Kernel instance representing the created kernel * * @code - * std::future kernelFuture = createKernel(ctx, code, dataBindings, numInputs, output, nThreads, params, paramsSize); + * std::future kernelFuture = createKernel(ctx, code, dataBindings, + numInputs, output, nThreads, params, paramsSize); * Kernel kernel = WaitForFuture(ctx.instance, kernelFuture); * @endcode - + */ inline std::future createKernel(Context &ctx, const KernelCode &code, const Tensor *dataBindings, @@ -1783,15 +1845,19 @@ createKernel(Context &ctx, const KernelCode &code, const Tensor *dataBindings, /** * @brief Free‑standing callback for dispatchKernel’s asynchronous work‐done. * - * This callback is invoked when the GPU queue signals the completion of the submitted - * workload for a kernel dispatch. It receives the work-done status and a userdata pointer, - * which is expected to be a heap‑allocated pointer to a std::promise. + * This callback is invoked when the GPU queue signals the completion of the + * submitted workload for a kernel dispatch. It receives the work-done status + * and a userdata pointer, which is expected to be a heap‑allocated pointer to a + * std::promise. * - * On success, the promise is fulfilled by calling set_value(). Otherwise, it is set with an exception. - * After setting the promise state, the allocated memory for the promise is freed. + * On success, the promise is fulfilled by calling set_value(). Otherwise, it is + * set with an exception. After setting the promise state, the allocated memory + * for the promise is freed. * - * @param status The status of the work done. Expected to be WGPUQueueWorkDoneStatus_Success on success. - * @param userdata1 A heap allocated pointer to std::promise which is set when the work is done. + * @param status The status of the work done. Expected to be + * WGPUQueueWorkDoneStatus_Success on success. + * @param userdata1 A heap allocated pointer to std::promise which is set + * when the work is done. * @param userdata2 Unused. */ inline void dispatchKernelCallback(WGPUQueueWorkDoneStatus status, @@ -1824,8 +1890,9 @@ inline void dispatchKernelCallback(WGPUQueueWorkDoneStatus status, * @return Kernel instance representing the created kernel * * @code - * std::future kernelFuture = createKernel(ctx, code, tensorData, output,totalWorkgroups, params); - * Kernel kernel = WaitForFuture(ctx.instance, kernelFuture); + * std::future kernelFuture = createKernel(ctx, code, tensorData, + * output,totalWorkgroups, params); Kernel kernel = WaitForFuture(ctx.instance, + * kernelFuture); * @endcode */ template diff --git a/numeric_types/half.cpp b/numeric_types/half.cpp index 75d9dc4..21a0005 100644 --- a/numeric_types/half.cpp +++ b/numeric_types/half.cpp @@ -185,7 +185,7 @@ void testContainers() { testRoundTrip(h[3]); } { - Context ctx = createContext(); + Context ctx = waitForContext(); std::array h = {1.0f, 0.5f, 2.0f, 3.14f, 1.0, 2.0, 3.0, 4.0}; Tensor devH = createTensor(ctx, {h.size()}, kf16, h.data()); std::array h2; @@ -215,13 +215,14 @@ fn main( } } )"; - Context ctx = createContext( + std::future futureContext = createContext( {}, {}, /*device descriptor, enabling f16 in WGSL*/ { .requiredFeatureCount = 1, .requiredFeatures = std::array{WGPUFeatureName_ShaderF16}.data(), }); + Context ctx = waitForContextFuture(futureContext); static constexpr size_t N = 10000; std::array inputArr, outputArr; for (int i = 0; i < N; ++i) { From 9a08f8a875d74fda1644adbb367edfdf2f70838a Mon Sep 17 00:00:00 2001 From: MichealReed Date: Thu, 20 Feb 2025 13:54:02 -0600 Subject: [PATCH 16/54] adds sync wrappers --- cmake/example.cmake | 1 - examples/hello_world/run.cpp | 11 +- examples/render/run.cpp | 6 +- gpu.hpp | 372 +++++++++++++++++++++++++---------- numeric_types/half.cpp | 19 +- 5 files changed, 283 insertions(+), 126 deletions(-) diff --git a/cmake/example.cmake b/cmake/example.cmake index 5953876..cf697b5 100644 --- a/cmake/example.cmake +++ b/cmake/example.cmake @@ -54,7 +54,6 @@ if(EMSCRIPTEN) -sUSE_GLFW=3 \ -sALLOW_MEMORY_GROWTH=1 -sSTACK_SIZE=15MB \ -sASYNCIFY \ - -sASYNCIFY_DEBUG \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_enum_tables.js \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_struct_info.js \ --js-library=${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_sig_info.js \ diff --git a/examples/hello_world/run.cpp b/examples/hello_world/run.cpp index c9f22c7..77549cf 100644 --- a/examples/hello_world/run.cpp +++ b/examples/hello_world/run.cpp @@ -28,7 +28,7 @@ int main(int argc, char **argv) { printf("--------------\n\n"); // std::unique_ptr ctx = createContext(); - Context ctx = waitForContext(); + Context ctx = createContext(); static constexpr size_t N = 10000; std::array inputArr, outputArr; for (int i = 0; i < N; ++i) { @@ -36,14 +36,11 @@ int main(int argc, char **argv) { } Tensor input = createTensor(ctx, Shape{N}, kf32, inputArr.data()); Tensor output = createTensor(ctx, Shape{N}, kf32); - std::future kernelFuture = createKernel(ctx, {kGelu, 256, kf32}, + Kernel op = createKernel(ctx, {kGelu, 256, kf32}, Bindings{input, output}, {cdiv(N, 256), 1, 1}); - Kernel op = waitForFuture(ctx.instance, kernelFuture); - std::future dispatchFuture = dispatchKernel(ctx, op); - waitForFuture(ctx.instance, dispatchFuture); - std::future cpuFuture = toCPU(ctx, output, outputArr.data(), sizeof(outputArr)); - waitForFuture(ctx.instance, cpuFuture); + dispatchKernel(ctx, op); + toCPU(ctx, output, outputArr.data(), sizeof(outputArr)); for (int i = 0; i < 12; ++i) { printf(" gelu(%.2f) = %.2f\n", inputArr[i], outputArr[i]); } diff --git a/examples/render/run.cpp b/examples/render/run.cpp index f9a90f9..64122cd 100644 --- a/examples/render/run.cpp +++ b/examples/render/run.cpp @@ -124,10 +124,8 @@ int main(int argc, char **argv) { cdiv({NCOLS, NROWS, 1}, wgSize), params); printf("\033[2J\033[H"); while (true) { - std::promise promise; - std::future future = promise.get_future(); - dispatchKernel(ctx, renderKernel, promise); - wait(ctx, future); + + dispatchKernel(ctx, renderKernel); toCPU(ctx, devScreen, screen.data(), sizeof(screen)); params.time = getCurrentTimeInMilliseconds() - zeroTime; diff --git a/gpu.hpp b/gpu.hpp index 0119108..e050c87 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -807,11 +807,10 @@ inline void check(bool condition, const char *message, * * @code * std::future deviceFuture = requestDeviceAsync(adapter, - * devDescriptor); WGPUDevice device = waitForFuture(instance, deviceFuture); + * devDescriptor); WGPUDevice device = wait(instance, deviceFuture); * @endcode */ -template -T waitForFuture(WGPUInstance instance, std::future &f) { +template T wait(Context &ctx, std::future &f) { #ifdef __EMSCRIPTEN__ // Poll until the future is ready. while (f.wait_for(std::chrono::milliseconds(0)) != @@ -823,7 +822,7 @@ T waitForFuture(WGPUInstance instance, std::future &f) { #else while (f.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { - wgpuInstanceProcessEvents(instance); + wgpuInstanceProcessEvents(ctx.instance); } return f.get(); #endif @@ -832,12 +831,15 @@ T waitForFuture(WGPUInstance instance, std::future &f) { // Context Callbacks & Helpers /** - * @brief Waits for the provided std::future to become ready by polling its status. + * @brief Waits for the provided std::future to become ready by polling its + * status. * - * This helper template function continuously checks the status of the provided std::future until it is ready. - * On Emscripten builds, it yields control to the JavaScript event loop using emscripten_sleep(1) for smooth asynchronous behavior. - * On non-Emscripten platforms, it sleeps for a short duration (10 milliseconds) between checks. - * Once the future is ready, its value is returned. + * This helper template function continuously checks the status of the provided + * std::future until it is ready. On Emscripten builds, it yields control to + * the JavaScript event loop using emscripten_sleep(1) for smooth asynchronous + * behavior. On non-Emscripten platforms, it sleeps for a short duration (10 + * milliseconds) between checks. Once the future is ready, its value is + * returned. * * @tparam T The type of the value contained in the future. * @param f The future to wait on. @@ -849,20 +851,20 @@ T waitForFuture(WGPUInstance instance, std::future &f) { * @endcode */ template T waitForContextFuture(std::future &f) { - #ifdef __EMSCRIPTEN__ - while (f.wait_for(std::chrono::milliseconds(0)) != - std::future_status::ready) { - emscripten_sleep(1); // Yield back to the JS event loop. - } - return f.get(); - #else - while (f.wait_for(std::chrono::milliseconds(0)) != - std::future_status::ready) { - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - } - return f.get(); - #endif +#ifdef __EMSCRIPTEN__ + while (f.wait_for(std::chrono::milliseconds(0)) != + std::future_status::ready) { + emscripten_sleep(1); // Yield back to the JS event loop. + } + return f.get(); +#else + while (f.wait_for(std::chrono::milliseconds(0)) != + std::future_status::ready) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); } + return f.get(); +#endif +} /** * @brief Adapter callback function invoked upon completion of an asynchronous @@ -1013,9 +1015,9 @@ requestDeviceAsync(WGPUAdapter adapter, * */ inline std::future -createContext(const WGPUInstanceDescriptor &desc = {}, - const WGPURequestAdapterOptions &adapterOpts = {}, - const WGPUDeviceDescriptor &devDescriptor = {}) { +createContextAsync(const WGPUInstanceDescriptor &desc = {}, + const WGPURequestAdapterOptions &adapterOpts = {}, + const WGPUDeviceDescriptor &devDescriptor = {}) { auto promise = std::make_shared>(); @@ -1030,7 +1032,7 @@ createContext(const WGPUInstanceDescriptor &desc = {}, } try { auto adapterFuture = requestAdapterAsync(ctx.instance, adapterOpts); - ctx.adapter = waitForFuture(ctx.instance, adapterFuture); + ctx.adapter = wait(ctx, adapterFuture); ctx.adapterStatus = WGPURequestAdapterStatus_Success; } catch (const std::exception &ex) { promise->set_exception(std::make_exception_ptr(ex)); @@ -1038,7 +1040,7 @@ createContext(const WGPUInstanceDescriptor &desc = {}, } try { auto deviceFuture = requestDeviceAsync(ctx.adapter, devDescriptor); - ctx.device = waitForFuture(ctx.instance, deviceFuture); + ctx.device = wait(ctx, deviceFuture); ctx.deviceStatus = WGPURequestDeviceStatus_Success; } catch (const std::exception &ex) { promise->set_exception(std::make_exception_ptr(ex)); @@ -1053,10 +1055,11 @@ createContext(const WGPUInstanceDescriptor &desc = {}, /** * @brief Synchronously waits for and returns the created GPU context. * - * This function invokes the asynchronous createContext() factory function to create a GPU - * context, then waits for its completion using waitForContextFuture. The returned Context - * holds handles to the WebGPU instance, adapter, device, and queue, and is used for subsequent - * GPU operations. + * This function invokes the asynchronous createContext() factory function to + * create a GPU context, then waits for its completion using + * waitForContextFuture. The returned Context holds handles to the WebGPU + * instance, adapter, device, and queue, and is used for subsequent GPU + * operations. * * @return Context The fully initialized GPU context. * @@ -1065,8 +1068,11 @@ createContext(const WGPUInstanceDescriptor &desc = {}, * // Now ctx can be used for GPU operations. * @endcode */ -inline Context waitForContext() { - std::future contextFuture = createContext(); +inline Context createContext(const WGPUInstanceDescriptor &desc = {}, + const WGPURequestAdapterOptions &adapterOpts = {}, + const WGPUDeviceDescriptor &devDescriptor = {}) { + std::future contextFuture = + createContextAsync(desc, adapterOpts, devDescriptor); return waitForContextFuture(contextFuture); } @@ -1294,8 +1300,8 @@ inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, * toCPU(ctx, tensor, data, bufferSize); * @endcode */ -inline std::future toCPU(Context &ctx, Tensor &tensor, void *data, - size_t bufferSize, CopyData &op) { +inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, + size_t bufferSize, CopyData &op) { // Submit the command buffer and release it. wgpuQueueSubmit(ctx.queue, 1, &op.commandBuffer); wgpuCommandBufferRelease(op.commandBuffer); @@ -1340,8 +1346,8 @@ inline std::future toCPU(Context &ctx, Tensor &tensor, void *data, * @param[in] bufferSize Size of the data buffer in bytes * @param[out] data Pointer to the CPU memory to copy the data to */ -inline std::future toCPU(Context &ctx, Tensor &tensor, void *data, - size_t bufferSize) { +inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, + size_t bufferSize) { // Create a promise that will later be satisfied when the async copy // completes. auto promise = std::make_shared>(); @@ -1395,26 +1401,8 @@ inline std::future toCPU(Context &ctx, Tensor &tensor, void *data, return promise->get_future(); } -/** - * @brief Overload of the toCPU function to copy data from a GPU buffer to CPU - * memory for an array of floats instead of a pointer to a float buffer. - * @param[in] ctx Context instance to manage the operation - * @param[in] tensor Tensor instance representing the GPU buffer to copy from - * @param[out] data Array of floats to copy the data to - * - * @code - * std::future toCPUFuture = toCPU(ctx, tensor, data); - * WaitForFuture(ctx.instance, toCPUFuture); - * @endcode - */ -template -inline std::future toCPU(Context &ctx, Tensor &tensor, - std::array &data) { - return toCPU(ctx, tensor, data.data(), sizeof(data)); -} - -inline std::future toCPU(Context &ctx, WGPUBuffer buffer, void *data, - size_t size) { +inline std::future toCPUAsync(Context &ctx, WGPUBuffer buffer, void *data, + size_t size) { // The size (in bytes) for the copy. uint64_t bufferSize = size; @@ -1473,6 +1461,92 @@ inline std::future toCPU(Context &ctx, WGPUBuffer buffer, void *data, return promise->get_future(); } +/** + * @brief Overload of the toCPU function to copy data from a GPU buffer to CPU + * memory for an array of floats instead of a pointer to a float buffer. + * @param[in] ctx Context instance to manage the operation + * @param[in] tensor Tensor instance representing the GPU buffer to copy from + * @param[out] data Array of floats to copy the data to + * + * @code + * std::future toCPUFuture = toCPU(ctx, tensor, data); + * wait(ctx, toCPUFuture); + * @endcode + */ +template +inline std::future toCPUAsync(Context &ctx, Tensor &tensor, + std::array &data) { + return toCPUAsync(ctx, tensor, data.data(), sizeof(data)); +} + +/** + * @brief Synchronous wrapper for copying from a Tensor GPU buffer to CPU + * memory. + * + * This function synchronously waits for the asynchronous copy operation to + * complete, ensuring that the data is fully transferred from the GPU buffer to + * the CPU memory before returning. + * + * @param ctx Context instance to manage the operation + * @param tensor Tensor instance representing the GPU buffer to copy from + * @param data Pointer to the CPU memory to copy the data to + * @param bufferSize Size of the data buffer in bytes + * @param instance WGPUInstance used for processing events during waiting + * + * @code + * toCPU(ctx, tensor, data, bufferSize, instance); + * @endcode + */ +inline void toCPU(Context &ctx, Tensor &tensor, void *data, size_t bufferSize) { + auto future = toCPUAsync(ctx, tensor, data, bufferSize); + wait(ctx, future); +} + +/** + * @brief Synchronous wrapper for copying from a GPU buffer to CPU memory. + * + * This function synchronously waits for the asynchronous copy operation to + * complete, ensuring that the data is fully transferred from the GPU buffer to + * the CPU memory before returning. + * + * @param ctx Context instance to manage the operation + * @param buffer WGPUBuffer instance representing the GPU buffer to copy from + * @param data Pointer to the CPU memory to copy the data to + * @param size Size of the data buffer in bytes + * @param instance WGPUInstance used for processing events during waiting + * + * @code + * toCPU(ctx, buffer, data, size, instance); + * @endcode + */ +inline void toCPU(Context &ctx, WGPUBuffer buffer, void *data, size_t size) { + auto future = toCPUAsync(ctx, buffer, data, size); + wait(ctx, future); +} + +/** + * @brief Synchronous wrapper for copying from a Tensor GPU buffer to CPU + * memory for an array of floats instead of a pointer to a float buffer. + * + * This function synchronously waits for the asynchronous copy operation to + * complete, ensuring that the data is fully transferred from the GPU buffer to + * the CPU memory before returning. + * + * @param ctx Context instance to manage the operation + * @param tensor Tensor instance representing the GPU buffer to copy from + * @param data Array of floats to copy the data to + * @param instance WGPUInstance used for processing events during waiting + * + * @code + * toCPU(ctx, tensor, data, instance); + * @endcode + */ +template +inline void toCPU(Context &ctx, Tensor &tensor, std::array &data) { + auto future = toCPUAsync(ctx, tensor, data); + wait(ctx, future); +} + /** * @brief Copies data from CPU memory to a GPU buffer. The toGPU overloads are * effectively a convenience wrapper around the WebGPU API call @@ -1617,9 +1691,9 @@ struct CompData { }; /** - * @brief A factory function to create a kernel on the GPU. The kernel is - * created with the given WGSL code, input tensors, output tensor, and - * optional parameters. + * @brief A factory function to create a kernel asynchronously on the GPU. + * The kernel is created with the given WGSL code, input tensors, + * output tensor, and optional parameters. * * Note that the values of the input tensors are not used here, only the * reference handles to the underlying buffers as well as the size of the @@ -1639,18 +1713,19 @@ struct CompData { * @return Kernel instance representing the created kernel * * @code - * std::future kernelFuture = createKernel(ctx, code, dataBindings, + * std::future kernelFuture = createKernelAsync(ctx, code, dataBindings, numInputs, output, nThreads, params, paramsSize); - * Kernel kernel = WaitForFuture(ctx.instance, kernelFuture); + * Kernel kernel = wait(ctx.instance, kernelFuture); * @endcode */ inline std::future -createKernel(Context &ctx, const KernelCode &code, const Tensor *dataBindings, - size_t numTensors, const size_t *viewOffsets, - const Shape &totalWorkgroups, const void *params = nullptr, - size_t paramsSize = 0, CompilationInfo *compilationInfo = nullptr, - const char *cacheKey = nullptr) { +createKernelAsync(Context &ctx, const KernelCode &code, + const Tensor *dataBindings, size_t numTensors, + const size_t *viewOffsets, const Shape &totalWorkgroups, + const void *params = nullptr, size_t paramsSize = 0, + CompilationInfo *compilationInfo = nullptr, + const char *cacheKey = nullptr) { // Create a cache key by the pointer values of the data bindings and the // kernel code if (cacheKey != nullptr && @@ -1818,7 +1893,7 @@ createKernel(Context &ctx, const KernelCode &code, const Tensor *dataBindings, // Register callback and then wait for the result. wgpuShaderModuleGetCompilationInfo(shaderModule, compilationCallbackInfo); - waitForFuture(ctx.instance, compFuture); + wait(ctx, compFuture); } // Now create the compute pipeline using the shader module. @@ -1842,35 +1917,81 @@ createKernel(Context &ctx, const KernelCode &code, const Tensor *dataBindings, return outerFuture; } -/** - * @brief Free‑standing callback for dispatchKernel’s asynchronous work‐done. +/* + * @brief Overload which wraps the createKernelAsync factory function to create + * a kernel on the GPU. This overload uses takes a pointer and size for the + * input tensors instead of a static collection and a void pointer for params + * instead of a static type. * - * This callback is invoked when the GPU queue signals the completion of the - * submitted workload for a kernel dispatch. It receives the work-done status - * and a userdata pointer, which is expected to be a heap‑allocated pointer to a - * std::promise. + * @param[in] ctx Context instance to manage the kernel + * @param[in] code WGSL code for the kernel + * @param[in] dataBindings Pointer to a span of tensors bound to the kernel + * @param[in] numTensors Number of tensors in the dataBindings span + * @param[in] totalWorkgroups Number of workgroups in the x, y, z grid, must be + * a Shape of rank == 3. + * @param[in] params Optional parameters for the kernel. If the kernel does + * not have any parameters, use NoParam. + * @return Kernel instance representing the created kernel * - * On success, the promise is fulfilled by calling set_value(). Otherwise, it is - * set with an exception. After setting the promise state, the allocated memory - * for the promise is freed. + * @code + * std::future kernelFuture = createKernel(ctx, code, tensorData, + * output,totalWorkgroups, params); Kernel kernel = wait(ctx.instance, + * kernelFuture); + * @endcode + */ +inline Kernel createKernel(Context &ctx, const KernelCode &code, + const Tensor *dataBindings, size_t numTensors, + const size_t *viewOffsets, + const Shape &totalWorkgroups, + const void *params = nullptr, size_t paramsSize = 0, + CompilationInfo *compilationInfo = nullptr, + const char *cacheKey = nullptr) { + std::future kernelFuture = createKernelAsync( + ctx, code, dataBindings, numTensors, viewOffsets, totalWorkgroups, params, + paramsSize, compilationInfo, cacheKey); + return wait(ctx, kernelFuture); +} + +/** + * @brief Overload which wraps the createKernelAsync factory function to create + * a kernel asynchronously on the GPU. This overload uses takes a static + * collection of input tensors instead of a pointer and a statically determined + * ParamsType instead of casting params to a void pointer. * - * @param status The status of the work done. Expected to be - * WGPUQueueWorkDoneStatus_Success on success. - * @param userdata1 A heap allocated pointer to std::promise which is set - * when the work is done. - * @param userdata2 Unused. + * @param[in] ctx Context instance to manage the kernel + * @param[in] code WGSL code for the kernel + * @param[in] dataBindings A Bindings of tensors whose GPU buffers are bound + * to the kernel as inputs and outputs. + * @param[in] totalWorkgroups Number of workgroups in the x, y, z grid, must be + * a Shape of rank == 3. + * @param[in] params Optional parameters for the kernel. If the kernel does + * not have any parameters, use NoParam. + * @return Kernel instance representing the created kernel + * + * @code + * std::future kernelFuture = createKernel(ctx, code, tensorData, + * output,totalWorkgroups, params); Kernel kernel = wait(ctx.instance, + * kernelFuture); + * @endcode */ -inline void dispatchKernelCallback(WGPUQueueWorkDoneStatus status, - void *userdata1, void * /*userdata2*/) { - // Cast the userdata pointer back to our heap‑allocated promise. - auto *p = reinterpret_cast *>(userdata1); - if (status == WGPUQueueWorkDoneStatus_Success) { - p->set_value(); +template +std::future +createKernelAsync(Context &ctx, const KernelCode &code, + const Bindings &dataBindings, + const Shape &totalWorkgroups, + const ParamsType ¶ms = ParamsType{}, + CompilationInfo *compilationInfo = nullptr, + const char *cacheKey = nullptr) { + if constexpr (!IsNoParam) { + return createKernelAsync(ctx, code, dataBindings.data.data(), numInputs, + dataBindings.viewOffsets.data(), totalWorkgroups, + reinterpret_cast(¶ms), + sizeof(ParamsType), compilationInfo, cacheKey); } else { - p->set_exception(std::make_exception_ptr( - std::runtime_error("Queue work did not complete successfully."))); + return createKernelAsync(ctx, code, dataBindings.data.data(), numInputs, + dataBindings.viewOffsets.data(), totalWorkgroups, + nullptr, 0, compilationInfo, cacheKey); } - delete p; // free the heap allocation } /** @@ -1890,18 +2011,17 @@ inline void dispatchKernelCallback(WGPUQueueWorkDoneStatus status, * @return Kernel instance representing the created kernel * * @code - * std::future kernelFuture = createKernel(ctx, code, tensorData, - * output,totalWorkgroups, params); Kernel kernel = WaitForFuture(ctx.instance, - * kernelFuture); + * Kernel kernel = createKernel(ctx, code, tensorData, output,totalWorkgroups, + * params); * @endcode */ template -std::future createKernel(Context &ctx, const KernelCode &code, - const Bindings &dataBindings, - const Shape &totalWorkgroups, - const ParamsType ¶ms = ParamsType{}, - CompilationInfo *compilationInfo = nullptr, - const char *cacheKey = nullptr) { +Kernel createKernel(Context &ctx, const KernelCode &code, + const Bindings &dataBindings, + const Shape &totalWorkgroups, + const ParamsType ¶ms = ParamsType{}, + CompilationInfo *compilationInfo = nullptr, + const char *cacheKey = nullptr) { if constexpr (!IsNoParam) { return createKernel(ctx, code, dataBindings.data.data(), numInputs, dataBindings.viewOffsets.data(), totalWorkgroups, @@ -1914,6 +2034,37 @@ std::future createKernel(Context &ctx, const KernelCode &code, } } +/** + * @brief Free‑standing callback for dispatchKernel’s asynchronous work‐done. + * + * This callback is invoked when the GPU queue signals the completion of the + * submitted workload for a kernel dispatch. It receives the work-done status + * and a userdata pointer, which is expected to be a heap‑allocated pointer to a + * std::promise. + * + * On success, the promise is fulfilled by calling set_value(). Otherwise, it is + * set with an exception. After setting the promise state, the allocated memory + * for the promise is freed. + * + * @param status The status of the work done. Expected to be + * WGPUQueueWorkDoneStatus_Success on success. + * @param userdata1 A heap allocated pointer to std::promise which is set + * when the work is done. + * @param userdata2 Unused. + */ +inline void dispatchKernelCallback(WGPUQueueWorkDoneStatus status, + void *userdata1, void * /*userdata2*/) { + // Cast the userdata pointer back to our heap‑allocated promise. + auto *p = reinterpret_cast *>(userdata1); + if (status == WGPUQueueWorkDoneStatus_Success) { + p->set_value(); + } else { + p->set_exception(std::make_exception_ptr( + std::runtime_error("Queue work did not complete successfully."))); + } + delete p; // free the heap allocation +} + /** * @brief Asynchronously submits a kernel to the GPU queue for execution. * It also sets up a callback to notify when the kernel has finished executing @@ -1930,10 +2081,10 @@ std::future createKernel(Context &ctx, const KernelCode &code, * * @code * std::future dispatchFuture = dispatchKernel(ctx, kernel); - * WaitForFuture(ctx.instance, dispatchFuture); + * wait(ctx.instance, dispatchFuture); * @endcode */ -inline std::future dispatchKernel(Context &ctx, Kernel &kernel) { +inline std::future dispatchKernelAsync(Context &ctx, Kernel &kernel) { // If the kernel was used before, reset the command buffer. if (kernel->used) { resetCommandBuffer(ctx.device, kernel); @@ -1962,6 +2113,23 @@ inline std::future dispatchKernel(Context &ctx, Kernel &kernel) { return future; } +/** + * @brief Synchronous wrapper for dispatchKernelAsync. This function submits + * the kernel to the GPU queue and waits for it to finish executing. + * + * @param[in] ctx Context instance to manage the kernel, from which the queue + * for the GPU is obtained + * @param[in] kernel Kernel instance to dispatch + * + * @code + * dispatchKernel(ctx, kernel); + * @endcode + */ +inline void dispatchKernel(Context &ctx, Kernel &kernel) { + auto future = dispatchKernelAsync(ctx, kernel); + wait(ctx, future); +} + } // namespace gpu #endif // GPU_H diff --git a/numeric_types/half.cpp b/numeric_types/half.cpp index 21a0005..c183754 100644 --- a/numeric_types/half.cpp +++ b/numeric_types/half.cpp @@ -185,12 +185,11 @@ void testContainers() { testRoundTrip(h[3]); } { - Context ctx = waitForContext(); + Context ctx = createContext(); std::array h = {1.0f, 0.5f, 2.0f, 3.14f, 1.0, 2.0, 3.0, 4.0}; Tensor devH = createTensor(ctx, {h.size()}, kf16, h.data()); std::array h2; - std::future toCPUFuture = toCPU(ctx, devH, h2.data(), sizeof(h2)); - waitForFuture(ctx.instance, toCPUFuture); + toCPU(ctx, devH, h2.data(), sizeof(h2)); for (int i = 0; i < 8; ++i) { printResult(h[i].data == h2[i].data, "Container round trip", static_cast(h[i]), static_cast(h2[i])); @@ -215,14 +214,13 @@ fn main( } } )"; - std::future futureContext = createContext( + Context ctx = createContext( {}, {}, /*device descriptor, enabling f16 in WGSL*/ { .requiredFeatureCount = 1, .requiredFeatures = std::array{WGPUFeatureName_ShaderF16}.data(), }); - Context ctx = waitForContextFuture(futureContext); static constexpr size_t N = 10000; std::array inputArr, outputArr; for (int i = 0; i < N; ++i) { @@ -230,20 +228,17 @@ fn main( } Tensor input = createTensor(ctx, Shape{N}, kf16, inputArr.data()); Tensor output = createTensor(ctx, Shape{N}, kf16); - std::future kernelFuture = createKernel(ctx, {kGelu, 256, kf16}, Bindings{input, output}, + Kernel op = createKernel(ctx, {kGelu, 256, kf16}, Bindings{input, output}, {cdiv(N, 256), 1, 1}); - Kernel op = waitForFuture(ctx.instance, kernelFuture); - std::future dispatchFuture = dispatchKernel(ctx, op); - waitForFuture(ctx.instance, dispatchFuture); - std::future toCPUFuture = toCPU(ctx, output, outputArr.data(), sizeof(outputArr)); - waitForFuture(ctx.instance, toCPUFuture); + dispatchKernel(ctx, op); + toCPU(ctx, output, outputArr.data(), sizeof(outputArr)); for (int i = 0; i < 12; ++i) { printf(" gelu(%.2f) = %.2f\n", static_cast(inputArr[i]), static_cast(outputArr[i])); } } -int testMain() { +int testHalfMain() { printf("\nHalf-precision float tests\n==========================\n"); printf("\nRegular values float round trips\n\n"); From 95e587d71d25ab74207648ca91500a7594bff870 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Thu, 20 Feb 2025 16:34:35 -0600 Subject: [PATCH 17/54] refactors the byIdx context function and sets USE_DAWN_API compile def on native --- cmake/dawn.cmake | 2 + cmake/gpu.cmake | 2 + examples/hello_world/run.cpp | 6 + gpu.hpp | 308 ++++++++++++++++++++++------------- 4 files changed, 201 insertions(+), 117 deletions(-) diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake index 2ead9ae..c6fed94 100644 --- a/cmake/dawn.cmake +++ b/cmake/dawn.cmake @@ -7,6 +7,8 @@ if(EMSCRIPTEN) set(EM_SDK_DIR $ENV{EMSDK} CACHE INTERNAL "") set(DAWN_BUILD_DIR "${DAWN_DIR}/build_web" CACHE INTERNAL "") set(DAWN_EMSCRIPTEN_TOOLCHAIN ${EM_SDK_DIR}/upstream/emscripten CACHE INTERNAL "" FORCE) +else() + add_compile_definitions(USE_DAWN_API) endif() # Enable find for no dawn rebuilds with flutter run diff --git a/cmake/gpu.cmake b/cmake/gpu.cmake index 6cce9e6..f936991 100644 --- a/cmake/gpu.cmake +++ b/cmake/gpu.cmake @@ -32,7 +32,9 @@ add_library(gpu STATIC ${GPU_SOURCES} ${GPU_HEADERS}) set_target_properties(gpu PROPERTIES LINKER_LANGUAGE CXX) target_include_directories(gpu PUBLIC "${PROJECT_ROOT}") if(NOT EMSCRIPTEN) + target_include_directories(gpu PUBLIC "${DAWN_BUILD_DIR}/gen/include/") target_include_directories(gpu PUBLIC "${DAWN_BUILD_DIR}/gen/include/dawn/") + target_include_directories(gpu PUBLIC "${DAWN_DIR}/include/") else() target_include_directories(gpu PUBLIC "${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/include/") target_include_directories(gpu PUBLIC "${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/include/webgpu/") diff --git a/examples/hello_world/run.cpp b/examples/hello_world/run.cpp index 77549cf..b44934b 100644 --- a/examples/hello_world/run.cpp +++ b/examples/hello_world/run.cpp @@ -28,7 +28,13 @@ int main(int argc, char **argv) { printf("--------------\n\n"); // std::unique_ptr ctx = createContext(); + #ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); + auto adaptersList = listAdapters(ctx); + LOG(kDefLog, kInfo, "Available GPU adapters:\n%s", adaptersList.c_str()); + #else Context ctx = createContext(); + #endif static constexpr size_t N = 10000; std::array inputArr, outputArr; for (int i = 0; i < N; ++i) { diff --git a/gpu.hpp b/gpu.hpp index e050c87..906371c 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -16,9 +16,8 @@ #include // std::pair #include -#ifndef __EMSCRIPTEN__ -#else +#ifdef __EMSCRIPTEN__ #include "emscripten/emscripten.h" #endif @@ -255,6 +254,26 @@ inline std::string toString(const Shape &shape) { */ inline std::string toString(size_t value) { return std::to_string(value); } +/** + * @brief Converts a WGPUStringView to an std::string. + * + * If the view's data is null, an empty string is returned. If the view's + * length equals WGPU_STRLEN, it is assumed to be null‑terminated; otherwise, + * the explicit length is used. + * + * @param strView The WGPUStringView to convert. + * @return std::string The resulting standard string. + */ +inline std::string formatWGPUStringView(WGPUStringView strView) { + if (!strView.data) { + return ""; + } + if (strView.length == WGPU_STRLEN) { + return std::string(strView.data); + } + return std::string(strView.data, strView.length); +} + /** * @brief simple in-place string replacement helper function for substituting * placeholders in a WGSL string template. @@ -1076,136 +1095,191 @@ inline Context createContext(const WGPUInstanceDescriptor &desc = {}, return waitForContextFuture(contextFuture); } -#ifdef USE_DAWN_API +#ifndef __EMSCRIPTEN__ +#if USE_DAWN_API /** - * @brief Factory function to create a GPU context, which aggregates WebGPU API - * handles to interact with the GPU including the instance, adapter, device, and - * queue. + * @brief Retrieves the list of available GPU adapters from the Dawn instance. * - * The function takes gpu index to support for multi GPUs. - * To activate this function, it needs not only webgpu's headers but also DAWN's - * headers. + * This function creates a Dawn instance using the provided context's instance + * handle, then enumerates and returns the available GPU adapters as a vector. * - * If dawn is used, it also sets up an error callback for device loss. + * @param ctx The Context containing the WebGPU instance handle. + * @return std::vector A vector of available GPU + * adapters. + * + * @code + * std::vector adapters = getAdapters(ctx); + * @endcode + */ +inline std::vector getAdapters(Context &ctx) { + dawn::native::Instance dawnInstance( + reinterpret_cast(ctx.instance)); + return dawnInstance.EnumerateAdapters(); +} + +/** + * @brief Formats the given vector of Dawn adapters into a single concatenated string. * - * @param[in] gpuIdx GPU index - * @param[in] desc Instance descriptor for the WebGPU instance (optional) - * @param[in] devDescriptor Device descriptor for the WebGPU device (optional) - * @return Context instance representing the created GPU context + * This function iterates over each Dawn adapter in the provided vector, retrieves its + * description using the WebGPU API, and converts the description from a WGPUStringView + * to an std::string using the formatWGPUStringView helper. The resulting descriptions + * are concatenated into a single string separated by newline characters. * + * @param adapters A vector of Dawn adapters obtained from a WebGPU instance. + * @return std::string A newline-delimited string listing each adapter's description. + * * @code - * Context ctx = createContextByGpuIdx(1); + * std::string adapterList = formatAdapters(adapters); * @endcode */ -inline Context -createContextByGpuIdx(int gpuIdx, const WGPUInstanceDescriptor &desc = {}, - const WGPUDeviceDescriptor &devDescriptor = {}) { - Context context; - { -#ifdef __EMSCRIPTEN__ - // Emscripten does not support the instance descriptor - // and throws an assertion error if it is not nullptr. - context.instance = wgpuCreateInstance(nullptr); -#else - context.instance = wgpuCreateInstance(&desc); -#endif - // check status - check(context.instance, "Initialize WebGPU", __FILE__, __LINE__); +inline std::string formatAdapters(const std::vector &adapters) { + std::string adapterList; + for (size_t i = 0; i < adapters.size(); ++i) { + auto adapterPtr = adapters[i].Get(); + if (adapterPtr) { + WGPUAdapterInfo info = {}; + wgpuAdapterGetInfo(adapterPtr, &info); + std::string desc = formatWGPUStringView(info.description); + adapterList += "GPU Adapter [" + std::to_string(i) + "]: " + desc + "\n"; + wgpuAdapterInfoFreeMembers(info); + } } + return adapterList; +} - LOG(kDefLog, kInfo, "Requesting adapter"); - { - std::vector adapters = - dawn::native::Instance( - reinterpret_cast(context.instance)) - .EnumerateAdapters(); - LOG(kDefLog, kInfo, "The number of GPUs=%d\n", adapters.size()); - // Note: Second gpu is not available on Macos, but the number of GPUs is 2 - // on Macos. - // Calling wgpuAdapterGetInfo function for the second gpu becomes - // segfault. When you check all GPUs on linux, uncomment out following - // codes. - // - // for (size_t i = 0; i < adapters.size(); i++) { - // WGPUAdapterInfo info {}; - // auto ptr = adapters[i].Get(); - // if (ptr && adapters[i]) { - // wgpuAdapterGetInfo(ptr, &info); - // LOG(kDefLog, kInfo, "GPU(Adapter)[%d] = %s\n", i, info.description); - // wgpuAdapterInfoFreeMembers(info); - // } - // } - - { - LOG(kDefLog, kInfo, "Use GPU(Adapter)[%d]\n", gpuIdx); - auto ptr = adapters[gpuIdx].Get(); - if (ptr) { - WGPUAdapterInfo info{}; - wgpuAdapterGetInfo(ptr, &info); - LOG(kDefLog, kInfo, "GPU(Adapter)[%d] = %s\n", gpuIdx, - info.description); - wgpuAdapterInfoFreeMembers(info); - } - context.adapter = adapters[gpuIdx].Get(); - dawn::native::GetProcs().adapterAddRef(context.adapter); - } +/** + * @brief Lists the available GPU adapters in the current WebGPU instance. + * + * This function retrieves the list of available GPU adapters using the + * getAdapters helper function, then formats and returns the adapter + * descriptions as a single string using the formatAdapters helper function. + * + * @param ctx The Context containing the WebGPU instance handle. + * @return std::string A newline-delimited string listing each adapter's + * description. + * + * @code + * std::string adapterList = listAdapters(ctx); + * @endcode + */ +inline std::string listAdapters(Context &ctx) { + auto adapters = getAdapters(ctx); + return formatAdapters(adapters); +} + +/** + * @brief Asynchronously creates a GPU context using the specified GPU index. + * + * This function creates a WebGPU instance, retrieves the available GPU + * adapters, and selects the adapter at the specified index. It then requests a + * device from the selected adapter and sets up a logging callback for device + * errors. The function returns a future that will be fulfilled with the + * created Context once all operations are complete. + * + * @param gpuIdx The index of the GPU adapter to use. + * @param desc Instance descriptor for the WebGPU instance (optional) + * @param devDescriptor Device descriptor for the WebGPU device (optional) + * @return std::future A future that will eventually hold the created + * Context. + * + * @code + * std::future contextFuture = createContextByGpuIdxAsync(0); + * Context ctx = waitForContextFuture(contextFuture); + * @endcode + */ +inline std::future +createContextByGpuIdxAsync(int gpuIdx, const WGPUInstanceDescriptor &desc = {}, + const WGPUDeviceDescriptor &devDescriptor = {}) { + auto promise = std::make_shared>(); + Context ctx; + + ctx.instance = wgpuCreateInstance(&desc); + + if (!ctx.instance) { + promise->set_exception(std::make_exception_ptr( + std::runtime_error("Failed to create WebGPU instance."))); + return promise->get_future(); } + check(ctx.instance, "Initialize WebGPU", __FILE__, __LINE__); - LOG(kDefLog, kInfo, "Requesting device"); - { - struct DeviceData { - WGPUDevice device = nullptr; - bool requestEnded = false; - }; - DeviceData devData; - - auto onDeviceRequestEnded = [](WGPURequestDeviceStatus status, - WGPUDevice device, WGPUStringView message, - void *pUserData, void *) { - DeviceData &devData = *reinterpret_cast(pUserData); - check(status == WGPURequestDeviceStatus_Success, - "Could not get WebGPU device.", __FILE__, __LINE__); - LOG(kDefLog, kTrace, "Device Request succeeded %x", - static_cast(device)); - devData.device = device; - devData.requestEnded = true; - }; + // Use helper functions to obtain and format the adapters. + auto adapters = getAdapters(ctx); - WGPURequestDeviceCallbackInfo deviceCallbackInfo = { - .mode = WGPUCallbackMode_AllowSpontaneous, - .callback = onDeviceRequestEnded, - .userdata1 = &devData, - .userdata2 = nullptr}; - wgpuAdapterRequestDevice(context.adapter, &devDescriptor, - deviceCallbackInfo); - - LOG(kDefLog, kInfo, "Waiting for device request to end"); - while (!devData.requestEnded) { - processEvents(context.instance); - } - LOG(kDefLog, kInfo, "Device request ended"); - assert(devData.requestEnded); - context.device = devData.device; - - WGPULoggingCallbackInfo loggingCallbackInfo = { - .nextInChain = nullptr, - .callback = - [](WGPULoggingType type, WGPUStringView message, void *userdata1, - void *userdata2) { - LOG(kDefLog, kError, "Device logging callback: %.*s", - static_cast(message.length), message.data); - if (type == WGPULoggingType_Error) { - throw std::runtime_error("Device error logged."); - } - }, - .userdata1 = nullptr, - .userdata2 = nullptr}; - wgpuDeviceSetLoggingCallback(context.device, loggingCallbackInfo); + if (gpuIdx >= adapters.size()) { + promise->set_exception( + std::make_exception_ptr(std::runtime_error("Invalid GPU index."))); + return promise->get_future(); + } + LOG(kDefLog, kInfo, "Using GPU Adapter[%d]", gpuIdx); + auto adapterPtr = adapters[gpuIdx].Get(); + if (adapterPtr) { + WGPUAdapterInfo info = {}; + wgpuAdapterGetInfo(adapterPtr, &info); + LOG(kDefLog, kInfo, "GPU(Adapter)[%d] = %s", gpuIdx, + formatWGPUStringView(info.description).c_str()); + wgpuAdapterInfoFreeMembers(info); + } + ctx.adapter = reinterpret_cast(adapterPtr); + dawn::native::GetProcs().adapterAddRef(ctx.adapter); + + LOG(kDefLog, kInfo, "Requesting device"); + // Request the device asynchronously (using our requestDeviceAsync helper). + auto deviceFuture = requestDeviceAsync(ctx.adapter, devDescriptor); + try { + ctx.device = wait(ctx, deviceFuture); + ctx.deviceStatus = WGPURequestDeviceStatus_Success; + } catch (const std::exception &ex) { + promise->set_exception(std::make_exception_ptr(ex)); + return promise->get_future(); } - context.queue = wgpuDeviceGetQueue(context.device); - return context; + + WGPULoggingCallbackInfo loggingCallbackInfo{ + .nextInChain = nullptr, + .callback = + [](WGPULoggingType type, WGPUStringView message, void *userdata1, + void *userdata2) { + LOG(kDefLog, kError, "Device logging callback: %.*s", + static_cast(message.length), message.data); + if (type == WGPULoggingType_Error) { + throw std::runtime_error("Device error logged."); + } + }, + .userdata1 = nullptr, + .userdata2 = nullptr}; + wgpuDeviceSetLoggingCallback(ctx.device, loggingCallbackInfo); + ctx.queue = wgpuDeviceGetQueue(ctx.device); + promise->set_value(std::move(ctx)); + return promise->get_future(); } -#endif + +/** + * @brief Synchronously creates a GPU context using the specified GPU index. + * + * This function calls the asynchronous createContextByGpuIdxAsync function to + * create a GPU context, then waits for its completion using + * waitForContextFuture. The returned Context holds handles to the WebGPU + * instance, adapter, device, and queue, and is used for subsequent GPU + * operations. + * + * @param gpuIdx The index of the GPU adapter to use. + * @param desc Instance descriptor for the WebGPU instance (optional) + * @param devDescriptor Device descriptor for the WebGPU device (optional) + * @return Context The fully initialized GPU context. + * + * @code + * Context ctx = createContextByGpuIdx(0); + * @endcode + */ +inline Context createContextByGpuIdx(int gpuIdx, + const WGPUInstanceDescriptor &desc = {}, + const WGPUDeviceDescriptor &devDescriptor = {}) { + std::future contextFuture = + createContextByGpuIdxAsync(gpuIdx, desc, devDescriptor); + return waitForContextFuture(contextFuture); +} + +#endif // USE_DAWN_API +#endif // __EMSCRIPTEN__ /** * @brief Callback function invoked upon completion of an asynchronous GPU From 70d980287f9a7cca8889e166d67b802bc4b69319 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Fri, 21 Feb 2025 22:14:41 -0600 Subject: [PATCH 18/54] tests toCPU, adds offset, adds gpuflow doc, default cmakelists builds test/test_gpu.cpp --- CMakeLists.txt | 18 +++++ docs/gpuflow.md | 78 +++++++++++++++++++ gpu.hpp | 110 ++++++++++++++++---------- test/test_gpu.cpp | 193 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 357 insertions(+), 42 deletions(-) create mode 100644 docs/gpuflow.md create mode 100644 test/test_gpu.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 816cdf3..a17602e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,24 @@ endif() include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/dawn.cmake") include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/gpu.cmake") +target_link_libraries(gpu PRIVATE webgpu_dawn) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/test) + +add_executable(test_gpu ${CMAKE_CURRENT_SOURCE_DIR}/test/test_gpu.cpp) +target_link_libraries(test_gpu PRIVATE gpu) + +# Platform-specific post-build actions (e.g. copying DLLs for MSVC) +if(MSVC) + add_custom_command( + TARGET test_gpu POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + ${DAWN_BUILD_DIR}/$/webgpu_dawn.dll + $ + COMMENT "Copying webgpu_dawn.dll to the build directory" + ) +endif() + add_library(gpud SHARED gpu.hpp) set_target_properties(gpud PROPERTIES LINKER_LANGUAGE CXX) target_link_libraries(gpud PRIVATE gpu) diff --git a/docs/gpuflow.md b/docs/gpuflow.md new file mode 100644 index 0000000..d4eb37a --- /dev/null +++ b/docs/gpuflow.md @@ -0,0 +1,78 @@ +# GPU.cpp Lifecycle + +```mermaid +flowchart TD + %% Data Preparation & Upload + subgraph "Data Preparation & Upload" + A["CPU Data"] + B["Define Data Properties
(shape, type, size)"] + C["Create GPU Buffer
(allocate raw buffer)"] + D["Create Tensor
(allocates Array with one
or more buffers
and associates Shape)"] + + E["Upload Data via toGPU
(raw buffer)
toGPU
(ctx, data, buffer, size)"] + F["Upload Data via toGPU
(Tensor overload)
toGPU(ctx, data, tensor)"] + G["Optional: Upload Kernel Parameters
toGPU(ctx, params, Kernel)"] + end + + %% Buffer Setup & Bindings + subgraph "Buffer & Binding Setup" + H["Define Bindings
(Bindings, TensorView)"] + I["Map GPU buffers
to shader bindings
(Collection from Tensor
or single buffers)"] + end + + %% Kernel Setup & Execution + subgraph "Kernel Setup & Execution" + J["Define KernelCode
(WGSL template, workgroup size, precision)"] + K["Create Kernel"] + L["Dispatch Kernel"] + end + + %% GPU Execution & Result Readback + subgraph "GPU Execution & Result Readback" + M["Kernel Execution
(GPU shader runs)"] + N["Readback Data
(toCPU variants)"] + end + + %% Context & Resources + O["Context
(Device, Queue,
TensorPool, KernelPool)"] + + %% Flow Connections + A --> B + B --> C + B --> D + C --> E + D --> F + F --> H + E --> H + H --> I + I --> K + J --> K + G --- K + K --> L + L --> M + M --> N + + %% Context shared by all stages + O --- D + O --- E + O --- F + O --- K + O --- L + O --- N +``` + +Rank 0: Scalar +Rank 1: Vector +Rank 2: Matrix +Rank 3: 3D Tensor (or Cube) +Rank 4: 4D Tensor +Rank ..: Higher Dimensional Tensors + + +• The `gpu::Array` (which wraps a GPU buffer with usage and size) and the `gpu::Shape` (which defines dimensions and rank) are combined—via the creation process—to produce a `gpu::Tensor`. +• A `gpu::TensorView` provides a non‑owning view into a slice of a `gpu::Tensor`. Ex. `TensorView view = {tensor, 0, 256};` +• gpu::Bindings collect multiple Tensors (or TensorViews) along with view offset/size information for use in a kernel. +• The gpu::TensorPool (managed by the Context) is responsible for the lifetime of tensors and GPU resource cleanup. +• gpu::KernelCode contains the WGSL shader template plus metadata (workgroup size, precision, label, and entry point) that drive the kernel configuration. +• The gpu::createKernelAsync/gpu::createKernel functions (within the Execution Flow) use the gpu::Context, gpu::Bindings, and gpu::KernelCode to configure and construct a gpu::Kernel that manages all the underlying GPU resources (buffers, bind groups, compute pipeline, etc.). +• gpu::KernelCode’s workgroup size (a gpu::Shape) defines the dispatch configuration, and the gpu::Kernel eventually uses the underlying gpu::Array (contains WGPUBuffer, WGPUBufferUsage, size_t) and gpu::Shape data from the created Tensor. diff --git a/gpu.hpp b/gpu.hpp index 906371c..931d646 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -16,7 +16,6 @@ #include // std::pair #include - #ifdef __EMSCRIPTEN__ #include "emscripten/emscripten.h" #endif @@ -1106,7 +1105,7 @@ inline Context createContext(const WGPUInstanceDescriptor &desc = {}, * @param ctx The Context containing the WebGPU instance handle. * @return std::vector A vector of available GPU * adapters. - * + * * @code * std::vector adapters = getAdapters(ctx); * @endcode @@ -1118,21 +1117,25 @@ inline std::vector getAdapters(Context &ctx) { } /** - * @brief Formats the given vector of Dawn adapters into a single concatenated string. + * @brief Formats the given vector of Dawn adapters into a single concatenated + * string. * - * This function iterates over each Dawn adapter in the provided vector, retrieves its - * description using the WebGPU API, and converts the description from a WGPUStringView - * to an std::string using the formatWGPUStringView helper. The resulting descriptions - * are concatenated into a single string separated by newline characters. + * This function iterates over each Dawn adapter in the provided vector, + * retrieves its description using the WebGPU API, and converts the description + * from a WGPUStringView to an std::string using the formatWGPUStringView + * helper. The resulting descriptions are concatenated into a single string + * separated by newline characters. * * @param adapters A vector of Dawn adapters obtained from a WebGPU instance. - * @return std::string A newline-delimited string listing each adapter's description. - * + * @return std::string A newline-delimited string listing each adapter's + * description. + * * @code * std::string adapterList = formatAdapters(adapters); * @endcode */ -inline std::string formatAdapters(const std::vector &adapters) { +inline std::string +formatAdapters(const std::vector &adapters) { std::string adapterList; for (size_t i = 0; i < adapters.size(); ++i) { auto adapterPtr = adapters[i].Get(); @@ -1157,7 +1160,7 @@ inline std::string formatAdapters(const std::vector &adap * @param ctx The Context containing the WebGPU instance handle. * @return std::string A newline-delimited string listing each adapter's * description. - * + * * @code * std::string adapterList = listAdapters(ctx); * @endcode @@ -1181,7 +1184,7 @@ inline std::string listAdapters(Context &ctx) { * @param devDescriptor Device descriptor for the WebGPU device (optional) * @return std::future A future that will eventually hold the created * Context. - * + * * @code * std::future contextFuture = createContextByGpuIdxAsync(0); * Context ctx = waitForContextFuture(contextFuture); @@ -1270,9 +1273,9 @@ createContextByGpuIdxAsync(int gpuIdx, const WGPUInstanceDescriptor &desc = {}, * Context ctx = createContextByGpuIdx(0); * @endcode */ -inline Context createContextByGpuIdx(int gpuIdx, - const WGPUInstanceDescriptor &desc = {}, - const WGPUDeviceDescriptor &devDescriptor = {}) { +inline Context +createContextByGpuIdx(int gpuIdx, const WGPUInstanceDescriptor &desc = {}, + const WGPUDeviceDescriptor &devDescriptor = {}) { std::future contextFuture = createContextByGpuIdxAsync(gpuIdx, desc, devDescriptor); return waitForContextFuture(contextFuture); @@ -1365,17 +1368,19 @@ inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, /** * @brief Copies data from a GPU buffer to CPU memory. * @param[in] ctx Context instance to manage the operation - * @param[in] tensor Tensor instance representing the GPU buffer to copy from * @param[out] data Pointer to the CPU memory to copy the data to * @param[in] bufferSize Size of the data buffer in bytes * @param[in] op StagingBuffer instance to manage the operation + * @param[in] sourceOffset Offset in the GPU buffer to start copying from. * * @code * toCPU(ctx, tensor, data, bufferSize); * @endcode */ -inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, - size_t bufferSize, CopyData &op) { + +// NOTE: I think this one is redundant? CopyData not used externally. +inline std::future toCPUAsync(Context &ctx, void *data, size_t bufferSize, + CopyData &op, size_t sourceOffset = 0) { // Submit the command buffer and release it. wgpuQueueSubmit(ctx.queue, 1, &op.commandBuffer); wgpuCommandBufferRelease(op.commandBuffer); @@ -1388,8 +1393,8 @@ inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, CallbackData *cbData = new CallbackData{ op.readbackBuffer, // The GPU buffer to be read back. bufferSize, - data, // CPU memory destination. - promise // The promise to be signaled. + data, // CPU memory destination. + promise, // The promise to be signaled. }; // Set up the work-done callback to initiate the buffer mapping. @@ -1402,6 +1407,11 @@ inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, // Begin the asynchronous chain by registering the queue work-done callback. wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); + // Release the readback buffer as it is no longer needed. + if (op.readbackBuffer) { + wgpuBufferRelease(op.readbackBuffer); + } + return promise->get_future(); } @@ -1417,11 +1427,13 @@ inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, * * @param[in] ctx Context instance to manage the operation * @param[in] tensor Tensor instance representing the GPU buffer to copy from - * @param[in] bufferSize Size of the data buffer in bytes + * @param[in] bufferSize Size to read in bytes as out data. * @param[out] data Pointer to the CPU memory to copy the data to + * @param[in] sourceOffset Offset in the GPU buffer to start copying from. */ inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, - size_t bufferSize) { + size_t bufferSize, + size_t sourceOffset = 0) { // Create a promise that will later be satisfied when the async copy // completes. auto promise = std::make_shared>(); @@ -1430,7 +1442,7 @@ inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, WGPUBufferDescriptor readbackBufferDescriptor = { .label = {.data = nullptr, .length = 0}, .usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead, - .size = bufferSize, + .size = bufferSize, // Size of the readback buffer. }; WGPUBuffer readbackBuffer = wgpuDeviceCreateBuffer(ctx.device, &readbackBufferDescriptor); @@ -1438,8 +1450,9 @@ inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, // Create a command encoder and record a copy from the tensor GPU buffer WGPUCommandEncoder commandEncoder = wgpuDeviceCreateCommandEncoder(ctx.device, nullptr); - wgpuCommandEncoderCopyBufferToBuffer(commandEncoder, tensor.data.buffer, 0, - readbackBuffer, 0, bufferSize); + wgpuCommandEncoderCopyBufferToBuffer(commandEncoder, tensor.data.buffer, + sourceOffset, readbackBuffer, 0, + bufferSize); // Finish recording by creating a command buffer and release the encoder. WGPUCommandBuffer commandBuffer = wgpuCommandEncoderFinish(commandEncoder, nullptr); @@ -1472,13 +1485,16 @@ inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, // queueWorkDoneCallback. wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); + if (readbackBuffer) { + wgpuBufferRelease(readbackBuffer); + } + return promise->get_future(); } inline std::future toCPUAsync(Context &ctx, WGPUBuffer buffer, void *data, - size_t size) { - // The size (in bytes) for the copy. - uint64_t bufferSize = size; + size_t bufferSize, + size_t sourceOffset = 0) { // Create an operation structure (here we reuse CopyData solely for its // members that we need to create a readback buffer and command buffer). @@ -1503,7 +1519,7 @@ inline std::future toCPUAsync(Context &ctx, WGPUBuffer buffer, void *data, { WGPUCommandEncoder commandEncoder = wgpuDeviceCreateCommandEncoder(ctx.device, nullptr); - wgpuCommandEncoderCopyBufferToBuffer(commandEncoder, buffer, 0, + wgpuCommandEncoderCopyBufferToBuffer(commandEncoder, buffer, sourceOffset, op.readbackBuffer, 0, bufferSize); op.commandBuffer = wgpuCommandEncoderFinish(commandEncoder, nullptr); wgpuCommandEncoderRelease(commandEncoder); @@ -1516,10 +1532,10 @@ inline std::future toCPUAsync(Context &ctx, WGPUBuffer buffer, void *data, // Allocate callback data CallbackData *cbData = new CallbackData{ - op.readbackBuffer, // The readback buffer created above. - static_cast(bufferSize), // Size of the copy. - data, // Destination CPU memory. - promise // Our promise to satisfy when done. + op.readbackBuffer, // The readback buffer created above. + bufferSize, // Size of the copy. + data, // Destination CPU memory. // Offset in the GPU buffer. + promise // Our promise to satisfy when done. }; // Set up the queue work-done callback info. @@ -1532,6 +1548,10 @@ inline std::future toCPUAsync(Context &ctx, WGPUBuffer buffer, void *data, // Start the asynchronous chain by registering the work-done callback. wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); + if (op.readbackBuffer) { + wgpuBufferRelease(op.readbackBuffer); + } + return promise->get_future(); } @@ -1548,9 +1568,11 @@ inline std::future toCPUAsync(Context &ctx, WGPUBuffer buffer, void *data, * @endcode */ template -inline std::future toCPUAsync(Context &ctx, Tensor &tensor, - std::array &data) { - return toCPUAsync(ctx, tensor, data.data(), sizeof(data)); +inline std::future +toCPUAsync(Context &ctx, Tensor &tensor, std::array &data, + size_t sourceOffset = 0) { + return toCPUAsync(ctx, tensor, data.data(), sizeof(data), sourceOffset + ); } /** @@ -1571,8 +1593,10 @@ inline std::future toCPUAsync(Context &ctx, Tensor &tensor, * toCPU(ctx, tensor, data, bufferSize, instance); * @endcode */ -inline void toCPU(Context &ctx, Tensor &tensor, void *data, size_t bufferSize) { - auto future = toCPUAsync(ctx, tensor, data, bufferSize); +inline void toCPU(Context &ctx, Tensor &tensor, void *data, size_t bufferSize, + size_t sourceOffset = 0) { + auto future = + toCPUAsync(ctx, tensor, data, bufferSize, sourceOffset); wait(ctx, future); } @@ -1593,8 +1617,9 @@ inline void toCPU(Context &ctx, Tensor &tensor, void *data, size_t bufferSize) { * toCPU(ctx, buffer, data, size, instance); * @endcode */ -inline void toCPU(Context &ctx, WGPUBuffer buffer, void *data, size_t size) { - auto future = toCPUAsync(ctx, buffer, data, size); +inline void toCPU(Context &ctx, WGPUBuffer buffer, void *data, size_t size, + size_t sourceOffset = 0) { + auto future = toCPUAsync(ctx, buffer, data, size, sourceOffset); wait(ctx, future); } @@ -1616,8 +1641,9 @@ inline void toCPU(Context &ctx, WGPUBuffer buffer, void *data, size_t size) { * @endcode */ template -inline void toCPU(Context &ctx, Tensor &tensor, std::array &data) { - auto future = toCPUAsync(ctx, tensor, data); +inline void toCPU(Context &ctx, Tensor &tensor, std::array &data, + size_t sourceOffset = 0) { + auto future = toCPUAsync(ctx, tensor, data, sourceOffset); wait(ctx, future); } diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp new file mode 100644 index 0000000..2cc4290 --- /dev/null +++ b/test/test_gpu.cpp @@ -0,0 +1,193 @@ +//// filepath: /d:/Code/git/forks/gpu.cpp/test/test_gpu_integration.cpp +#include "gpu.hpp" +#include +#include +#include +#include +#include +#include + +using namespace gpu; + +// A simple WGSL copy kernel that copies input to output. +static const char *kCopyKernel = R"( +@group(0) @binding(0) var inp: array<{{precision}}>; +@group(0) @binding(1) var out: array<{{precision}}>; +@group(0) @binding(1) var dummy: array<{{precision}}>; +@compute @workgroup_size({{workgroupSize}}) +fn main(@builtin(global_invocation_id) gid: vec3) { + let i: u32 = gid.x; + if (i < arrayLength(&inp)) { + out[i] = inp[i]; + } +} +)"; + +// Test using the overload that takes a Tensor. +void testToCPUWithTensor() { + LOG(kDefLog, kInfo, "Running testToCPUWithTensor..."); + + // Create a real GPU context. + #ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); + #else + Context ctx = createContext(); + #endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i); + outputData[i] = 0.0f; + } + + // Create input and output tensors. + Tensor inputTensor = createTensor(ctx, Shape{N}, kf32, inputData.data()); + Tensor outputTensor = createTensor(ctx, Shape{N}, kf32); + + // Create and dispatch the copy kernel. + Kernel copyKernel = createKernel(ctx, {kCopyKernel, 256, kf32}, + Bindings{inputTensor, outputTensor}, + {cdiv(N, 256), 1, 1}); + dispatchKernel(ctx, copyKernel); + + // Synchronously copy GPU output to CPU using the tensor overload. + toCPU(ctx, outputTensor, outputData.data(), sizeof(outputData)); + + // Verify the output matches the input. + for (size_t i = 0; i < N; ++i) { + LOG(kDefLog, kInfo, "inputData[%zu] = %f", i, inputData[i]); + LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); + assert(outputData[i] == inputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithTensor passed."); +} + +// Test using the overload that takes a raw GPU buffer. +// We reuse the Tensor's underlying buffer for this test. +void testToCPUWithBuffer() { + LOG(kDefLog, kInfo, "Running testToCPUWithBuffer..."); + + #ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); + #else + Context ctx = createContext(); + #endif + + constexpr size_t N = 1024; + std::array data, outputData; + for (size_t i = 0; i < N; ++i) { + data[i] = static_cast(i * 2); + outputData[i] = 0.0f; + } + + // Create a tensor to allocate a GPU buffer and initialize it. + Tensor tensor = createTensor(ctx, Shape{N}, kf32, data.data()); + + // Now extract the raw GPU buffer from the tensor. + WGPUBuffer gpuBuffer = tensor.data.buffer; + + // Use the WGPUBuffer overload. This call returns a future. + auto future = toCPUAsync(ctx, gpuBuffer, outputData.data(), sizeof(outputData), 0); + wait(ctx, future); + + // Verify that the CPU output matches the original data. + for (size_t i = 0; i < N; ++i) { + LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); + assert(outputData[i] == data[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithBuffer passed."); +} + +void testToCPUWithTensorSourceOffset() { + LOG(kDefLog, kInfo, "Running testToCPUWithTensorSourceOffset..."); +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t numElements = 25; + constexpr size_t sourceOffsetElements = 5; // Skip first 5 elements + constexpr size_t copyCount = 10; // Number of floats to copy + size_t copySize = copyCount * sizeof(float); + + // Create an input array with known data. + std::array inputData{}; + for (size_t i = 0; i < numElements; ++i) { + inputData[i] = static_cast(i + 50); // Arbitrary values + } + // Create a tensor from the full data. + Tensor tensor = createTensor(ctx, Shape{numElements}, kf32, inputData.data()); + + // Allocate a destination CPU buffer exactly as large as the data we want to copy. + std::vector cpuOutput(copyCount, -1.0f); + + // Set sourceOffset to skip the first few float elements + size_t sourceOffsetBytes = sourceOffsetElements * sizeof(float); + // Call the tensor overload with sourceOffset and destOffset = 0. + auto future = toCPUAsync(ctx, tensor, cpuOutput.data(), copySize, sourceOffsetBytes); + wait(ctx, future); + + // Verify the copied data matches the expected subset. + for (size_t i = 0; i < copyCount; ++i) { + float expected = inputData[sourceOffsetElements + i]; + float actual = cpuOutput[i]; + LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); + LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); + assert(expected == actual); + } + LOG(kDefLog, kInfo, "testToCPUWithTensorSourceOffset passed."); +} + +void testToCPUWithBufferSourceOffset() { + LOG(kDefLog, kInfo, "Running testToCPUWithBufferSourceOffset..."); +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t numElements = 30; + constexpr size_t sourceOffsetElements = 7; // Skip first 7 elements + constexpr size_t copyCount = 12; // Number of floats to copy + size_t copySize = copyCount * sizeof(float); + + // Create an input array with arbitrary data. + std::array inputData{}; + for (size_t i = 0; i < numElements; ++i) { + inputData[i] = static_cast(i + 100); + } + // Create a tensor to initialize a GPU buffer. + Tensor tensor = createTensor(ctx, Shape{numElements}, kf32, inputData.data()); + // Extract the raw GPU buffer from the tensor. + WGPUBuffer buffer = tensor.data.buffer; + + // Allocate a destination CPU buffer exactly as large as needed. + std::vector cpuOutput(copyCount, -2.0f); + size_t sourceOffsetBytes = sourceOffsetElements * sizeof(float); + + // Call the buffer overload with sourceOffset and destOffset = 0. + auto future = toCPUAsync(ctx, buffer, cpuOutput.data(), copySize, sourceOffsetBytes); + wait(ctx, future); + + // Verify that the copied data matches the expected subset. + for (size_t i = 0; i < copyCount; ++i) { + float expected = inputData[sourceOffsetElements + i]; + float actual = cpuOutput[i]; + LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); + LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); + assert(expected == actual); + } + LOG(kDefLog, kInfo, "testToCPUWithBufferSourceOffset passed."); +} + +int main() { + LOG(kDefLog, kInfo, "Running GPU integration tests..."); + testToCPUWithTensor(); + testToCPUWithBuffer(); + testToCPUWithTensorSourceOffset(); + testToCPUWithBufferSourceOffset(); + LOG(kDefLog, kInfo, "All tests passed."); + return 0; +} \ No newline at end of file From 16feb9e9f32e8cc2bbc12019a448c856a061d19f Mon Sep 17 00:00:00 2001 From: MichealReed Date: Fri, 21 Feb 2025 22:16:44 -0600 Subject: [PATCH 19/54] remove path --- test/test_gpu.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 2cc4290..0954e44 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -1,4 +1,3 @@ -//// filepath: /d:/Code/git/forks/gpu.cpp/test/test_gpu_integration.cpp #include "gpu.hpp" #include #include From e61e80917a73406e8fb8af5a94c743982231f51b Mon Sep 17 00:00:00 2001 From: MichealReed Date: Fri, 21 Feb 2025 22:18:53 -0600 Subject: [PATCH 20/54] format --- test/test_gpu.cpp | 294 +++++++++++++++++++++++----------------------- 1 file changed, 149 insertions(+), 145 deletions(-) diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 0954e44..48aa1bc 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -1,7 +1,7 @@ #include "gpu.hpp" #include -#include #include +#include #include #include #include @@ -24,169 +24,173 @@ fn main(@builtin(global_invocation_id) gid: vec3) { // Test using the overload that takes a Tensor. void testToCPUWithTensor() { - LOG(kDefLog, kInfo, "Running testToCPUWithTensor..."); - - // Create a real GPU context. - #ifdef USE_DAWN_API - Context ctx = createContextByGpuIdx(0); - #else - Context ctx = createContext(); - #endif - - constexpr size_t N = 1024; - std::array inputData, outputData; - for (size_t i = 0; i < N; ++i) { - inputData[i] = static_cast(i); - outputData[i] = 0.0f; - } - - // Create input and output tensors. - Tensor inputTensor = createTensor(ctx, Shape{N}, kf32, inputData.data()); - Tensor outputTensor = createTensor(ctx, Shape{N}, kf32); - - // Create and dispatch the copy kernel. - Kernel copyKernel = createKernel(ctx, {kCopyKernel, 256, kf32}, - Bindings{inputTensor, outputTensor}, - {cdiv(N, 256), 1, 1}); - dispatchKernel(ctx, copyKernel); - - // Synchronously copy GPU output to CPU using the tensor overload. - toCPU(ctx, outputTensor, outputData.data(), sizeof(outputData)); - - // Verify the output matches the input. - for (size_t i = 0; i < N; ++i) { - LOG(kDefLog, kInfo, "inputData[%zu] = %f", i, inputData[i]); - LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); - assert(outputData[i] == inputData[i]); - } - LOG(kDefLog, kInfo, "testToCPUWithTensor passed."); + LOG(kDefLog, kInfo, "Running testToCPUWithTensor..."); + +// Create a real GPU context. +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i); + outputData[i] = 0.0f; + } + + // Create input and output tensors. + Tensor inputTensor = createTensor(ctx, Shape{N}, kf32, inputData.data()); + Tensor outputTensor = createTensor(ctx, Shape{N}, kf32); + + // Create and dispatch the copy kernel. + Kernel copyKernel = + createKernel(ctx, {kCopyKernel, 256, kf32}, + Bindings{inputTensor, outputTensor}, {cdiv(N, 256), 1, 1}); + dispatchKernel(ctx, copyKernel); + + // Synchronously copy GPU output to CPU using the tensor overload. + toCPU(ctx, outputTensor, outputData.data(), sizeof(outputData)); + + // Verify the output matches the input. + for (size_t i = 0; i < N; ++i) { + LOG(kDefLog, kInfo, "inputData[%zu] = %f", i, inputData[i]); + LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); + assert(outputData[i] == inputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithTensor passed."); } // Test using the overload that takes a raw GPU buffer. // We reuse the Tensor's underlying buffer for this test. void testToCPUWithBuffer() { - LOG(kDefLog, kInfo, "Running testToCPUWithBuffer..."); - - #ifdef USE_DAWN_API - Context ctx = createContextByGpuIdx(0); - #else - Context ctx = createContext(); - #endif - - constexpr size_t N = 1024; - std::array data, outputData; - for (size_t i = 0; i < N; ++i) { - data[i] = static_cast(i * 2); - outputData[i] = 0.0f; - } - - // Create a tensor to allocate a GPU buffer and initialize it. - Tensor tensor = createTensor(ctx, Shape{N}, kf32, data.data()); - - // Now extract the raw GPU buffer from the tensor. - WGPUBuffer gpuBuffer = tensor.data.buffer; - - // Use the WGPUBuffer overload. This call returns a future. - auto future = toCPUAsync(ctx, gpuBuffer, outputData.data(), sizeof(outputData), 0); - wait(ctx, future); - - // Verify that the CPU output matches the original data. - for (size_t i = 0; i < N; ++i) { - LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); - assert(outputData[i] == data[i]); - } - LOG(kDefLog, kInfo, "testToCPUWithBuffer passed."); + LOG(kDefLog, kInfo, "Running testToCPUWithBuffer..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array data, outputData; + for (size_t i = 0; i < N; ++i) { + data[i] = static_cast(i * 2); + outputData[i] = 0.0f; + } + + // Create a tensor to allocate a GPU buffer and initialize it. + Tensor tensor = createTensor(ctx, Shape{N}, kf32, data.data()); + + // Now extract the raw GPU buffer from the tensor. + WGPUBuffer gpuBuffer = tensor.data.buffer; + + // Use the WGPUBuffer overload. This call returns a future. + auto future = + toCPUAsync(ctx, gpuBuffer, outputData.data(), sizeof(outputData), 0); + wait(ctx, future); + + // Verify that the CPU output matches the original data. + for (size_t i = 0; i < N; ++i) { + LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); + assert(outputData[i] == data[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithBuffer passed."); } void testToCPUWithTensorSourceOffset() { - LOG(kDefLog, kInfo, "Running testToCPUWithTensorSourceOffset..."); + LOG(kDefLog, kInfo, "Running testToCPUWithTensorSourceOffset..."); #ifdef USE_DAWN_API - Context ctx = createContextByGpuIdx(0); + Context ctx = createContextByGpuIdx(0); #else - Context ctx = createContext(); + Context ctx = createContext(); #endif - constexpr size_t numElements = 25; - constexpr size_t sourceOffsetElements = 5; // Skip first 5 elements - constexpr size_t copyCount = 10; // Number of floats to copy - size_t copySize = copyCount * sizeof(float); - - // Create an input array with known data. - std::array inputData{}; - for (size_t i = 0; i < numElements; ++i) { - inputData[i] = static_cast(i + 50); // Arbitrary values - } - // Create a tensor from the full data. - Tensor tensor = createTensor(ctx, Shape{numElements}, kf32, inputData.data()); - - // Allocate a destination CPU buffer exactly as large as the data we want to copy. - std::vector cpuOutput(copyCount, -1.0f); - - // Set sourceOffset to skip the first few float elements - size_t sourceOffsetBytes = sourceOffsetElements * sizeof(float); - // Call the tensor overload with sourceOffset and destOffset = 0. - auto future = toCPUAsync(ctx, tensor, cpuOutput.data(), copySize, sourceOffsetBytes); - wait(ctx, future); - - // Verify the copied data matches the expected subset. - for (size_t i = 0; i < copyCount; ++i) { - float expected = inputData[sourceOffsetElements + i]; - float actual = cpuOutput[i]; - LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); - LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); - assert(expected == actual); - } - LOG(kDefLog, kInfo, "testToCPUWithTensorSourceOffset passed."); + constexpr size_t numElements = 25; + constexpr size_t sourceOffsetElements = 5; // Skip first 5 elements + constexpr size_t copyCount = 10; // Number of floats to copy + size_t copySize = copyCount * sizeof(float); + + // Create an input array with known data. + std::array inputData{}; + for (size_t i = 0; i < numElements; ++i) { + inputData[i] = static_cast(i + 50); // Arbitrary values + } + // Create a tensor from the full data. + Tensor tensor = createTensor(ctx, Shape{numElements}, kf32, inputData.data()); + + // Allocate a destination CPU buffer exactly as large as the data we want to + // copy. + std::vector cpuOutput(copyCount, -1.0f); + + // Set sourceOffset to skip the first few float elements + size_t sourceOffsetBytes = sourceOffsetElements * sizeof(float); + // Call the tensor overload with sourceOffset and destOffset = 0. + auto future = + toCPUAsync(ctx, tensor, cpuOutput.data(), copySize, sourceOffsetBytes); + wait(ctx, future); + + // Verify the copied data matches the expected subset. + for (size_t i = 0; i < copyCount; ++i) { + float expected = inputData[sourceOffsetElements + i]; + float actual = cpuOutput[i]; + LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); + LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); + assert(expected == actual); + } + LOG(kDefLog, kInfo, "testToCPUWithTensorSourceOffset passed."); } void testToCPUWithBufferSourceOffset() { - LOG(kDefLog, kInfo, "Running testToCPUWithBufferSourceOffset..."); + LOG(kDefLog, kInfo, "Running testToCPUWithBufferSourceOffset..."); #ifdef USE_DAWN_API - Context ctx = createContextByGpuIdx(0); + Context ctx = createContextByGpuIdx(0); #else - Context ctx = createContext(); + Context ctx = createContext(); #endif - constexpr size_t numElements = 30; - constexpr size_t sourceOffsetElements = 7; // Skip first 7 elements - constexpr size_t copyCount = 12; // Number of floats to copy - size_t copySize = copyCount * sizeof(float); - - // Create an input array with arbitrary data. - std::array inputData{}; - for (size_t i = 0; i < numElements; ++i) { - inputData[i] = static_cast(i + 100); - } - // Create a tensor to initialize a GPU buffer. - Tensor tensor = createTensor(ctx, Shape{numElements}, kf32, inputData.data()); - // Extract the raw GPU buffer from the tensor. - WGPUBuffer buffer = tensor.data.buffer; - - // Allocate a destination CPU buffer exactly as large as needed. - std::vector cpuOutput(copyCount, -2.0f); - size_t sourceOffsetBytes = sourceOffsetElements * sizeof(float); - - // Call the buffer overload with sourceOffset and destOffset = 0. - auto future = toCPUAsync(ctx, buffer, cpuOutput.data(), copySize, sourceOffsetBytes); - wait(ctx, future); - - // Verify that the copied data matches the expected subset. - for (size_t i = 0; i < copyCount; ++i) { - float expected = inputData[sourceOffsetElements + i]; - float actual = cpuOutput[i]; - LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); - LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); - assert(expected == actual); - } - LOG(kDefLog, kInfo, "testToCPUWithBufferSourceOffset passed."); + constexpr size_t numElements = 30; + constexpr size_t sourceOffsetElements = 7; // Skip first 7 elements + constexpr size_t copyCount = 12; // Number of floats to copy + size_t copySize = copyCount * sizeof(float); + + // Create an input array with arbitrary data. + std::array inputData{}; + for (size_t i = 0; i < numElements; ++i) { + inputData[i] = static_cast(i + 100); + } + // Create a tensor to initialize a GPU buffer. + Tensor tensor = createTensor(ctx, Shape{numElements}, kf32, inputData.data()); + // Extract the raw GPU buffer from the tensor. + WGPUBuffer buffer = tensor.data.buffer; + + // Allocate a destination CPU buffer exactly as large as needed. + std::vector cpuOutput(copyCount, -2.0f); + size_t sourceOffsetBytes = sourceOffsetElements * sizeof(float); + + // Call the buffer overload with sourceOffset and destOffset = 0. + auto future = + toCPUAsync(ctx, buffer, cpuOutput.data(), copySize, sourceOffsetBytes); + wait(ctx, future); + + // Verify that the copied data matches the expected subset. + for (size_t i = 0; i < copyCount; ++i) { + float expected = inputData[sourceOffsetElements + i]; + float actual = cpuOutput[i]; + LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); + LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); + assert(expected == actual); + } + LOG(kDefLog, kInfo, "testToCPUWithBufferSourceOffset passed."); } int main() { - LOG(kDefLog, kInfo, "Running GPU integration tests..."); - testToCPUWithTensor(); - testToCPUWithBuffer(); - testToCPUWithTensorSourceOffset(); - testToCPUWithBufferSourceOffset(); - LOG(kDefLog, kInfo, "All tests passed."); - return 0; -} \ No newline at end of file + LOG(kDefLog, kInfo, "Running GPU integration tests..."); + testToCPUWithTensor(); + testToCPUWithBuffer(); + testToCPUWithTensorSourceOffset(); + testToCPUWithBufferSourceOffset(); + LOG(kDefLog, kInfo, "All tests passed."); + return 0; +} From ad8698dc1cb10ac89f020e6920d680328a6200ae Mon Sep 17 00:00:00 2001 From: MichealReed Date: Fri, 21 Feb 2025 22:22:25 -0600 Subject: [PATCH 21/54] doc formatting --- docs/gpuflow.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/gpuflow.md b/docs/gpuflow.md index d4eb37a..420397d 100644 --- a/docs/gpuflow.md +++ b/docs/gpuflow.md @@ -61,18 +61,18 @@ flowchart TD O --- N ``` +• The `gpu::Array` (which wraps a GPU buffer with usage and size) and the `gpu::Shape` (which defines dimensions and rank) are combined—via the creation process—to produce a `gpu::Tensor`. +• A `gpu::TensorView` provides a non‑owning view into a slice of a `gpu::Tensor`. Ex. `TensorView view = {tensor, 0, 256};` +• `gpu::Bindings` collect multiple Tensors (or TensorViews) along with view offset/size information for use in a kernel. +• The `gpu::TensorPool` (managed by the Context) is responsible for the lifetime of tensors and GPU resource cleanup. +• `gpu::KernelCode` contains the WGSL shader template plus metadata (workgroup size, precision, label, and entry point) that drive the kernel configuration. +• The `gpu::createKernelAsync/gpu::createKernel` functions (within the Execution Flow) use the `gpu::Context`, `gpu::Bindings`, and `gpu::KernelCode` to configure and construct a `gpu::Kernel` that manages all the underlying GPU resources (buffers, bind groups, compute pipeline, etc.). +• `gpu::KernelCode`’s workgroup size (a `gpu::Shape`) defines the dispatch configuration, and the `gpu::Kernel` eventually uses the underlying `gpu::Array` (contains` WGPUBuffer, WGPUBufferUsage, size_t`) and `gpu::Shape` data from the created Tensor. + +`gpu::Tensor` Ranks: Rank 0: Scalar Rank 1: Vector Rank 2: Matrix Rank 3: 3D Tensor (or Cube) Rank 4: 4D Tensor -Rank ..: Higher Dimensional Tensors - - -• The `gpu::Array` (which wraps a GPU buffer with usage and size) and the `gpu::Shape` (which defines dimensions and rank) are combined—via the creation process—to produce a `gpu::Tensor`. -• A `gpu::TensorView` provides a non‑owning view into a slice of a `gpu::Tensor`. Ex. `TensorView view = {tensor, 0, 256};` -• gpu::Bindings collect multiple Tensors (or TensorViews) along with view offset/size information for use in a kernel. -• The gpu::TensorPool (managed by the Context) is responsible for the lifetime of tensors and GPU resource cleanup. -• gpu::KernelCode contains the WGSL shader template plus metadata (workgroup size, precision, label, and entry point) that drive the kernel configuration. -• The gpu::createKernelAsync/gpu::createKernel functions (within the Execution Flow) use the gpu::Context, gpu::Bindings, and gpu::KernelCode to configure and construct a gpu::Kernel that manages all the underlying GPU resources (buffers, bind groups, compute pipeline, etc.). -• gpu::KernelCode’s workgroup size (a gpu::Shape) defines the dispatch configuration, and the gpu::Kernel eventually uses the underlying gpu::Array (contains WGPUBuffer, WGPUBufferUsage, size_t) and gpu::Shape data from the created Tensor. +Rank (max 8): Higher Dimensional Tensors \ No newline at end of file From 025af2a8f4621ba9612354a6e524044da2188ac3 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Fri, 21 Feb 2025 22:29:25 -0600 Subject: [PATCH 22/54] doc nits --- docs/gpuflow.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/gpuflow.md b/docs/gpuflow.md index 420397d..fee9d4c 100644 --- a/docs/gpuflow.md +++ b/docs/gpuflow.md @@ -11,7 +11,7 @@ flowchart TD E["Upload Data via toGPU
(raw buffer)
toGPU
(ctx, data, buffer, size)"] F["Upload Data via toGPU
(Tensor overload)
toGPU(ctx, data, tensor)"] - G["Optional: Upload Kernel Parameters
toGPU(ctx, params, Kernel)"] + G["Optional:
Kernel Parameters
toGPU(ctx, params, Kernel)"] end %% Buffer Setup & Bindings From 3776dcd50152ba4fc18ca9029006bd9e9588dca7 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sat, 22 Feb 2025 11:33:30 -0600 Subject: [PATCH 23/54] set project root on root cmakelists --- CMakeLists.txt | 2 +- cmake/gpu.cmake | 1 - docs/gpuflow.md | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a17602e..85911a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ # and cmake/gpu.cmake for more details cmake_minimum_required(VERSION 3.28) project(gpu) - +set(PROJECT_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # export compile_commands.json to use with # LSP set(CMAKE_CXX_STANDARD 20) diff --git a/cmake/gpu.cmake b/cmake/gpu.cmake index f936991..d991a18 100644 --- a/cmake/gpu.cmake +++ b/cmake/gpu.cmake @@ -39,4 +39,3 @@ else() target_include_directories(gpu PUBLIC "${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/include/") target_include_directories(gpu PUBLIC "${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/include/webgpu/") endif() - diff --git a/docs/gpuflow.md b/docs/gpuflow.md index fee9d4c..d13a228 100644 --- a/docs/gpuflow.md +++ b/docs/gpuflow.md @@ -75,4 +75,4 @@ Rank 1: Vector Rank 2: Matrix Rank 3: 3D Tensor (or Cube) Rank 4: 4D Tensor -Rank (max 8): Higher Dimensional Tensors \ No newline at end of file +Rank (max 8): Higher Dimensional Tensors From d58e1911b5e015ea073c4e4350d3378a2edf80bd Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sat, 22 Feb 2025 16:15:51 -0600 Subject: [PATCH 24/54] fix linux issue with callback info --- gpu.hpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index 931d646..8c661bc 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -1354,11 +1355,12 @@ inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, __LINE__); // Set up the buffer mapping callback information. - WGPUBufferMapCallbackInfo mapCallbackInfo; - mapCallbackInfo.mode = WGPUCallbackMode_AllowSpontaneous; - mapCallbackInfo.callback = bufferMapCallback; - mapCallbackInfo.userdata1 = cbData; - mapCallbackInfo.userdata2 = nullptr; + WGPUBufferMapCallbackInfo mapCallbackInfo = { + .mode = WGPUCallbackMode_AllowSpontaneous, + .callback = bufferMapCallback, + .userdata1 = cbData, // Pass the callback data. + .userdata2 = nullptr // No additional user data. + }; // Begin the asynchronous mapping of the readback buffer. wgpuBufferMapAsync(cbData->buffer, WGPUMapMode_Read, 0, cbData->bufferSize, From 498ba74b73962d8b647b844fc570cf758ebaf467 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sat, 22 Feb 2025 17:54:16 -0600 Subject: [PATCH 25/54] should not release readback buffer --- gpu.hpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index 8c661bc..4854338 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -1486,11 +1486,7 @@ inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, // Register the callback. The async chain continues inside // queueWorkDoneCallback. wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); - - if (readbackBuffer) { - wgpuBufferRelease(readbackBuffer); - } - + return promise->get_future(); } @@ -1550,10 +1546,6 @@ inline std::future toCPUAsync(Context &ctx, WGPUBuffer buffer, void *data, // Start the asynchronous chain by registering the work-done callback. wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); - if (op.readbackBuffer) { - wgpuBufferRelease(op.readbackBuffer); - } - return promise->get_future(); } From 2db9be10fb3f0298294ba199d71eca894746e3a6 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sat, 22 Feb 2025 18:07:09 -0600 Subject: [PATCH 26/54] clean up callback syntax --- gpu.hpp | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index 4854338..b057514 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -1306,7 +1306,7 @@ createContextByGpuIdx(int gpuIdx, const WGPUInstanceDescriptor &desc = {}, */ inline void bufferMapCallback(WGPUMapAsyncStatus status, WGPUStringView message, void *userdata1, void * /*userdata2*/) { - CallbackData *cbData = reinterpret_cast(userdata1); + const CallbackData *cbData = static_cast(userdata1); // Check that mapping succeeded. check(status == WGPUMapAsyncStatus_Success, "Map readbackBuffer", __FILE__, __LINE__); @@ -1349,17 +1349,17 @@ inline void bufferMapCallback(WGPUMapAsyncStatus status, WGPUStringView message, */ inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, void *userdata1, void * /*userdata2*/) { - CallbackData *cbData = reinterpret_cast(userdata1); + const CallbackData *cbData = static_cast(userdata1); // Ensure the queue work finished successfully. check(status == WGPUQueueWorkDoneStatus_Success, "Queue work done", __FILE__, __LINE__); // Set up the buffer mapping callback information. WGPUBufferMapCallbackInfo mapCallbackInfo = { - .mode = WGPUCallbackMode_AllowSpontaneous, - .callback = bufferMapCallback, - .userdata1 = cbData, // Pass the callback data. - .userdata2 = nullptr // No additional user data. + .mode = WGPUCallbackMode_AllowSpontaneous, + .callback = bufferMapCallback, + .userdata1 = const_cast(cbData), // Pass the callback data. + .userdata2 = nullptr // No additional user data. }; // Begin the asynchronous mapping of the readback buffer. @@ -1400,11 +1400,11 @@ inline std::future toCPUAsync(Context &ctx, void *data, size_t bufferSize, }; // Set up the work-done callback to initiate the buffer mapping. - WGPUQueueWorkDoneCallbackInfo workDoneCallbackInfo; - workDoneCallbackInfo.mode = WGPUCallbackMode_AllowSpontaneous; - workDoneCallbackInfo.callback = queueWorkDoneCallback; - workDoneCallbackInfo.userdata1 = cbData; // Pass the callback data. - workDoneCallbackInfo.userdata2 = nullptr; + WGPUQueueWorkDoneCallbackInfo workDoneCallbackInfo = { + .mode = WGPUCallbackMode_AllowSpontaneous, + .callback = queueWorkDoneCallback, + .userdata1 = const_cast(cbData), + .userdata2 = nullptr}; // Begin the asynchronous chain by registering the queue work-done callback. wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); @@ -1486,7 +1486,7 @@ inline std::future toCPUAsync(Context &ctx, Tensor &tensor, void *data, // Register the callback. The async chain continues inside // queueWorkDoneCallback. wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); - + return promise->get_future(); } @@ -1562,11 +1562,10 @@ inline std::future toCPUAsync(Context &ctx, WGPUBuffer buffer, void *data, * @endcode */ template -inline std::future -toCPUAsync(Context &ctx, Tensor &tensor, std::array &data, - size_t sourceOffset = 0) { - return toCPUAsync(ctx, tensor, data.data(), sizeof(data), sourceOffset - ); +inline std::future toCPUAsync(Context &ctx, Tensor &tensor, + std::array &data, + size_t sourceOffset = 0) { + return toCPUAsync(ctx, tensor, data.data(), sizeof(data), sourceOffset); } /** @@ -1589,8 +1588,7 @@ toCPUAsync(Context &ctx, Tensor &tensor, std::array &data, */ inline void toCPU(Context &ctx, Tensor &tensor, void *data, size_t bufferSize, size_t sourceOffset = 0) { - auto future = - toCPUAsync(ctx, tensor, data, bufferSize, sourceOffset); + auto future = toCPUAsync(ctx, tensor, data, bufferSize, sourceOffset); wait(ctx, future); } From 752a53a3d426fb5bb87a89f31b601817adea25c7 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sat, 22 Feb 2025 18:38:09 -0600 Subject: [PATCH 27/54] add stress test --- test/test_gpu.cpp | 78 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 9 deletions(-) diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 48aa1bc..99a1af6 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -1,12 +1,34 @@ #include "gpu.hpp" #include #include +#include #include #include #include #include using namespace gpu; +using namespace std::chrono; + + +// Forward declarations: +void testToCPUWithTensor(); +void testToCPUWithBuffer(); +void testToCPUWithTensorSourceOffset(); +void testToCPUWithBufferSourceOffset(); +void stressTestToCPU(); + +int main() { + LOG(kDefLog, kInfo, "Running GPU integration tests..."); + testToCPUWithTensor(); + testToCPUWithBuffer(); + testToCPUWithTensorSourceOffset(); + testToCPUWithBufferSourceOffset(); + stressTestToCPU(); + LOG(kDefLog, kInfo, "All tests passed."); + return 0; +} + // A simple WGSL copy kernel that copies input to output. static const char *kCopyKernel = R"( @@ -22,6 +44,7 @@ fn main(@builtin(global_invocation_id) gid: vec3) { } )"; + // Test using the overload that takes a Tensor. void testToCPUWithTensor() { LOG(kDefLog, kInfo, "Running testToCPUWithTensor..."); @@ -185,12 +208,49 @@ void testToCPUWithBufferSourceOffset() { LOG(kDefLog, kInfo, "testToCPUWithBufferSourceOffset passed."); } -int main() { - LOG(kDefLog, kInfo, "Running GPU integration tests..."); - testToCPUWithTensor(); - testToCPUWithBuffer(); - testToCPUWithTensorSourceOffset(); - testToCPUWithBufferSourceOffset(); - LOG(kDefLog, kInfo, "All tests passed."); - return 0; -} +void stressTestToCPU() { + LOG(kDefLog, kInfo, "Running stressTestToCPU for 2 seconds..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + // Create a persistent tensor with some test data. + std::vector inputData(N, 0.0f); + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i); + } + Tensor tensor = createTensor(ctx, Shape{N}, kf32, inputData.data()); + + // Prepare to run for one second. + auto startTime = high_resolution_clock::now(); + std::vector> futures; + size_t opCount = 0; + while (high_resolution_clock::now() - startTime < seconds(2)) { + // Allocate an output buffer (using a shared_ptr so it stays valid until the future completes) + auto outputData = std::make_shared>(N, 0.0f); + // Use the tensor overload; we’re copying the entire tensor (destOffset = 0) + LOG(kDefLog, kInfo, "Copying %zu bytes from GPU to CPU...", N * sizeof(float)); + // log count + LOG(kDefLog, kInfo, "opCount = %zu", opCount); + auto fut = toCPUAsync(ctx, tensor, outputData->data(), N * sizeof(float), 0); + futures.push_back(std::move(fut)); + ++opCount; + } + + // Wait for all submitted operations to complete. + for (auto &f : futures) { + wait(ctx, f); + } + + auto endTime = high_resolution_clock::now(); + auto totalMs = duration_cast(endTime - startTime).count(); + double throughput = (opCount / (totalMs / 1000.0)); + + LOG(kDefLog, kInfo, "Stress test completed:\n" + " %zu GPU to CPU operations in %lld ms\n" + " Throughput: %.2f ops/sec", opCount, totalMs, throughput); +} \ No newline at end of file From 5f82ff4d9e0fdd1de7f2ccf8e0a0a6d8e981b2fb Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sat, 22 Feb 2025 19:07:20 -0600 Subject: [PATCH 28/54] linux has a segfault if wait for events after. --- test/test_gpu.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 99a1af6..aa42b83 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -227,7 +227,6 @@ void stressTestToCPU() { // Prepare to run for one second. auto startTime = high_resolution_clock::now(); - std::vector> futures; size_t opCount = 0; while (high_resolution_clock::now() - startTime < seconds(2)) { // Allocate an output buffer (using a shared_ptr so it stays valid until the future completes) @@ -237,14 +236,9 @@ void stressTestToCPU() { // log count LOG(kDefLog, kInfo, "opCount = %zu", opCount); auto fut = toCPUAsync(ctx, tensor, outputData->data(), N * sizeof(float), 0); - futures.push_back(std::move(fut)); + wait(ctx, fut); ++opCount; } - - // Wait for all submitted operations to complete. - for (auto &f : futures) { - wait(ctx, f); - } auto endTime = high_resolution_clock::now(); auto totalMs = duration_cast(endTime - startTime).count(); From 28dabf277eebb9fb5541870014287a9d7f533036 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sun, 23 Feb 2025 10:22:27 -0600 Subject: [PATCH 29/54] EOF newline --- test/test_gpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index aa42b83..b855712 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -247,4 +247,4 @@ void stressTestToCPU() { LOG(kDefLog, kInfo, "Stress test completed:\n" " %zu GPU to CPU operations in %lld ms\n" " Throughput: %.2f ops/sec", opCount, totalMs, throughput); -} \ No newline at end of file +} From 39c816ca6b4ba0dff8808b680e0cf8f7b36973d4 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Sat, 1 Mar 2025 17:34:42 -0600 Subject: [PATCH 30/54] added sleeptime optional arg --- gpu.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index b057514..69ed0e9 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -869,7 +869,7 @@ template T wait(Context &ctx, std::future &f) { * Context ctx = waitForContextFuture(contextFuture); * @endcode */ -template T waitForContextFuture(std::future &f) { +template T waitForContextFuture(std::future &f, size_t sleepTime = 10) { #ifdef __EMSCRIPTEN__ while (f.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { @@ -879,7 +879,7 @@ template T waitForContextFuture(std::future &f) { #else while (f.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { - std::this_thread::sleep_for(std::chrono::milliseconds(10)); + std::this_thread::sleep_for(std::chrono::milliseconds(sleepTime)); } return f.get(); #endif From d09e8a90f594559459b93acf1867902de91bef17 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Thu, 3 Apr 2025 20:16:08 -0500 Subject: [PATCH 31/54] adds missing numeric types --- cmake/dawn.cmake | 251 ++++++++++++++----------- cmake/gpu.cmake | 1 - gpu.hpp | 254 ++++++++++++++++++++++--- numeric_types/half.cpp | 10 +- numeric_types/half.hpp | 1 + test/test_gpu.cpp | 414 ++++++++++++++++++++++++++++++++++++++++- 6 files changed, 781 insertions(+), 150 deletions(-) diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake index c6fed94..bfcdf95 100644 --- a/cmake/dawn.cmake +++ b/cmake/dawn.cmake @@ -1,124 +1,167 @@ -# Setup directories -set(FETCHCONTENT_BASE_DIR "${PROJECT_ROOT}/third_party") -set(DAWN_DIR "${FETCHCONTENT_BASE_DIR}/dawn" CACHE INTERNAL "") -set(DAWN_BUILD_DIR "${DAWN_DIR}/build" CACHE INTERNAL "") +cmake_minimum_required(VERSION 3.14) +include(ExternalProject) +include(FetchContent) + +# include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/print_target.cmake") + + +# Setup directories and basic paths +set(FETCHCONTENT_BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external") +set(DAWN_DIR "${FETCHCONTENT_BASE_DIR}/dawn" CACHE INTERNAL "Dawn source directory") + +# For Emscripten builds (if desired) +set(EM_SDK_DIR $ENV{EMSDK} CACHE INTERNAL "") +set(EMSCRIPTEN_DIR "${EM_SDK_DIR}/upstream/emscripten" CACHE INTERNAL "") + +# Decide where to build Dawn’s build files. if(EMSCRIPTEN) - set(EM_SDK_DIR $ENV{EMSDK} CACHE INTERNAL "") - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_web" CACHE INTERNAL "") - set(DAWN_EMSCRIPTEN_TOOLCHAIN ${EM_SDK_DIR}/upstream/emscripten CACHE INTERNAL "" FORCE) + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_web" CACHE INTERNAL "web build directory" FORCE) +elseif(WIN32) + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_win" CACHE INTERNAL "windows build directory" FORCE) +elseif(IOS) + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_ios" CACHE INTERNAL "ios build directory" FORCE) +elseif(APPLE) + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_mac" CACHE INTERNAL "mac build directory" FORCE) +elseif(ANDROID) + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_android" CACHE INTERNAL "android build directory" FORCE) else() - add_compile_definitions(USE_DAWN_API) + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_unix" CACHE INTERNAL "linux build directory" FORCE) endif() -# Enable find for no dawn rebuilds with flutter run -set(ENABLE_DAWN_FIND OFF CACHE BOOL "Enable finding Dawn" FORCE) +# Add Dawn header include directories so that they are available later. +include_directories(BEFORE PUBLIC + "${DAWN_BUILD_DIR}/src/dawn/native/" + "${DAWN_BUILD_DIR}/src/dawn/native/Debug" + "${DAWN_BUILD_DIR}/src/dawn/native/Release" +) + + +# Optionally try to find an existing Dawn build. +set(ENABLE_DAWN_FIND ON CACHE BOOL "Attempt to find an existing Dawn build" FORCE) set(DAWN_BUILD_FOUND OFF CACHE BOOL "Dawn build found" FORCE) + if(ENABLE_DAWN_FIND) - # find_library, windows adds extra folder - if(MSVC) - find_library(WEBGPU_DAWN_DEBUG webgpu_dawn - NAMES webgpu_dawn - HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Debug" - ) - find_library(WEBGPU_DAWN_RELEASE webgpu_dawn - NAMES webgpu_dawn - HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Release" - ) - set(DAWN_BUILD_FOUND ON) - elseif(NOT EMSCRIPTEN AND NOT MSVC) - find_library(WEBGPU_DAWN_LIB - NAMES webgpu_dawn - PATHS "${DAWN_BUILD_DIR}/src/dawn/native" - REQUIRED - ) - set(DAWN_BUILD_FOUND ON) - else() - set(DAWN_BUILD_FOUND ON) + message(STATUS "Attempting to find an existing Dawn build...") + if(WIN32) + find_library(WEBGPU_DAWN_DEBUG NAMES webgpu_dawn HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Debug") + find_library(WEBGPU_DAWN_RELEASE NAMES webgpu_dawn HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Release") + + if(WEBGPU_DAWN_DEBUG OR WEBGPU_DAWN_RELEASE) + message(STATUS "Dawn build found on Windows. Debug: ${WEBGPU_DAWN_DEBUG}, Release: ${WEBGPU_DAWN_RELEASE}") + set(DAWN_BUILD_FOUND ON) + endif() + elseif(NOT EMSCRIPTEN AND NOT WIN32) + find_library(WEBGPU_DAWN_LIB NAMES webgpu_dawn.so PATHS "${DAWN_BUILD_DIR}/src/dawn/native") + + if(WEBGPU_DAWN_LIB) + message(STATUS "Dawn build found on Linux/Unix. Library: ${WEBGPU_DAWN_LIB}") + set(DAWN_BUILD_FOUND ON) endif() + endif() endif() -# Dawn options for more, -# see https://dawn.googlesource.com/dawn/+/refs/heads/main/CMakeLists.txt -set(DAWN_ALWAYS_ASSERT OFF CACHE INTERNAL "Always assert in Dawn" FORCE) -set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE INTERNAL "Build Dawn monolithically" FORCE) -set(DAWN_BUILD_EXAMPLES OFF CACHE INTERNAL "Build Dawn examples" FORCE) -set(DAWN_BUILD_SAMPLES OFF CACHE INTERNAL "Build Dawn samples" FORCE) -set(DAWN_BUILD_TESTS OFF CACHE INTERNAL "Build Dawn tests" FORCE) -set(DAWN_ENABLE_INSTALL OFF CACHE INTERNAL "Enable Dawn installation" FORCE) -set(DAWN_FETCH_DEPENDENCIES ON CACHE INTERNAL "Fetch Dawn dependencies" FORCE) -set(TINT_BUILD_TESTS OFF CACHE INTERNAL "Build Tint Tests" FORCE) -set(TINT_BUILD_IR_BINARY OFF CACHE INTERNAL "Build Tint IR binary" FORCE) -set(TINT_BUILD_CMD_TOOLS OFF CACHE INTERNAL "Build Tint command line tools" FORCE) +# Pre-build Dawn at configuration time if not already built. if(NOT DAWN_BUILD_FOUND) - include(FetchContent) - message("webgpu_dawn not found start building") - if(EMSCRIPTEN) - set(EMSCRIPTEN_DIR "${EM_SDK_DIR}/upstream/emscripten" CACHE INTERNAL "" FORCE) - set(DAWN_EMSCRIPTEN_TOOLCHAIN ${EMSCRIPTEN_DIR} CACHE INTERNAL "" FORCE) - endif() + message(STATUS "Dawn build not found - pre-building Dawn.") - FetchContent_Declare( - dawn - DOWNLOAD_DIR ${DAWN_DIR} - SOURCE_DIR ${DAWN_DIR} - SUBBUILD_DIR ${DAWN_BUILD_DIR}/tmp - BINARY_DIR ${DAWN_BUILD_DIR} - DOWNLOAD_COMMAND - cd ${DAWN_DIR} && - git init && - git fetch --depth=1 https://dawn.googlesource.com/dawn && - git reset --hard FETCH_HEAD - ) + # Force Dawn build options. + set(DAWN_ALWAYS_ASSERT ON CACHE INTERNAL "Always assert in Dawn" FORCE) + set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE INTERNAL "Build Dawn monolithically" FORCE) + set(DAWN_BUILD_EXAMPLES OFF CACHE INTERNAL "Build Dawn examples" FORCE) + set(DAWN_BUILD_SAMPLES OFF CACHE INTERNAL "Build Dawn samples" FORCE) + set(DAWN_BUILD_TESTS OFF CACHE INTERNAL "Build Dawn tests" FORCE) + set(DAWN_ENABLE_INSTALL OFF CACHE INTERNAL "Enable Dawn installation" FORCE) + set(DAWN_FETCH_DEPENDENCIES ON CACHE INTERNAL "Fetch Dawn dependencies" FORCE) + set(TINT_BUILD_TESTS OFF CACHE INTERNAL "Build Tint Tests" FORCE) + set(TINT_BUILD_IR_BINARY OFF CACHE INTERNAL "Build Tint IR binary" FORCE) + set(TINT_BUILD_CMD_TOOLS OFF CACHE INTERNAL "Build Tint command line tools" FORCE) + set(DAWN_EMSCRIPTEN_TOOLCHAIN ${EMSCRIPTEN_DIR} CACHE INTERNAL "Emscripten toolchain" FORCE) - # Download the repository and add it as a subdirectory. - FetchContent_MakeAvailable(dawn) + set(DAWN_COMMIT "66d57f910357befb441b91162f29a97f687af6d9" CACHE STRING "Dawn commit to checkout" FORCE) + + file(MAKE_DIRECTORY ${DAWN_DIR}) + # Initialize Git and set/update remote. + execute_process(COMMAND git init + WORKING_DIRECTORY "${DAWN_DIR}" + ) + execute_process( + COMMAND git remote add origin https://dawn.googlesource.com/dawn + WORKING_DIRECTORY "${DAWN_DIR}" + ) + # Fetch and checkout the specified commit. + execute_process( + COMMAND git fetch origin ${DAWN_COMMIT} + WORKING_DIRECTORY "${DAWN_DIR}" + ) + execute_process( + COMMAND git checkout ${DAWN_COMMIT} + WORKING_DIRECTORY "${DAWN_DIR}" + ) + execute_process( + COMMAND git reset --hard ${DAWN_COMMIT} + WORKING_DIRECTORY "${DAWN_DIR}" + ) + # Fetch the Dawn repository if not already present. + FetchContent_Declare( + dawn + SOURCE_DIR ${DAWN_DIR} + SUBBUILD_DIR ${DAWN_BUILD_DIR}/tmp + BINARY_DIR ${DAWN_BUILD_DIR} + ) + FetchContent_MakeAvailable(dawn) - # attempt fix flutter rebuilds - set(CMAKE_INCLUDE_PATH "${CMAKE_INCLUDE_PATH};${DAWN_DIR}/src" CACHE INTERNAL "") + set(CMAKE_INCLUDE_PATH "${CMAKE_INCLUDE_PATH};${DAWN_DIR}/src" CACHE INTERNAL "") - execute_process( - WORKING_DIRECTORY ${DAWN_DIR} - COMMAND ${CMAKE_COMMAND} -S ${DAWN_DIR} - -B ${DAWN_BUILD_DIR} - ) + set(DAWN_BUILD_FOUND ON) +endif() # End pre-build Dawn - # Build Dawn - execute_process( - COMMAND ${CMAKE_COMMAND} --build ${DAWN_BUILD_DIR} - ) - - # find_library, windows adds extra folder - if(MSVC) - find_library(WEBGPU_DAWN_DEBUG webgpu_dawn - NAMES webgpu_dawn - HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Debug" - ) - find_library(WEBGPU_DAWN_RELEASE webgpu_dawn - NAMES webgpu_dawn - HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Release" - ) - set(DAWN_BUILD_FOUND ON) - elseif(NOT EMSCRIPTEN AND NOT MSVC) - find_library(WEBGPU_DAWN_LIB - NAMES webgpu_dawn - PATHS "${DAWN_BUILD_DIR}/src/dawn/native" - REQUIRED - ) - set(DAWN_BUILD_FOUND ON) - else() - set(DAWN_BUILD_FOUND ON) - endif() +# Create an IMPORTED target for the Dawn library. +# Adjust the expected output name/extension per platform. +if(MSVC) +message(STATUS "Dawn build found on Windows.") +# MSVC: use separate debug and release dlls. +if((NOT WEBGPU_DAWN_DEBUG) OR (WEBGPU_DAWN_DEBUG MATCHES "NOTFOUND")) + find_library(WEBGPU_DAWN_DEBUG NAMES webgpu_dawn PATHS "${DAWN_BUILD_DIR}/src/dawn/native/Debug") +endif() +if((NOT WEBGPU_DAWN_RELEASE) OR (WEBGPU_DAWN_RELEASE MATCHES "NOTFOUND")) + find_library(WEBGPU_DAWN_RELEASE NAMES webgpu_dawn PATHS "${DAWN_BUILD_DIR}/src/dawn/native/Release") endif() -if(EMSCRIPTEN) - add_library(webgpu_dawn INTERFACE IMPORTED) - target_include_directories(webgpu_dawn INTERFACE ${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/include) - target_include_directories(webgpu_dawn INTERFACE ${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/include/webgpu/webgpu.h) - target_link_libraries(webgpu_dawn INTERFACE ${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_enum_tables.js) - target_link_libraries(webgpu_dawn INTERFACE ${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_struct_info.js) - target_link_libraries(webgpu_dawn INTERFACE ${DAWN_BUILD_DIR}/gen/src/emdawnwebgpu/library_webgpu_generated_sig_info.js) - target_link_libraries(webgpu_dawn INTERFACE ${DAWN_DIR}/third_party/emdawnwebgpu/library_webgpu.js) -else() +if(WEBGPU_DAWN_DEBUG OR WEBGPU_DAWN_RELEASE) + if(NOT TARGET webgpu_dawn) + add_library(webgpu_dawn INTERFACE) + target_link_libraries(webgpu_dawn INTERFACE + $<$:${WEBGPU_DAWN_DEBUG}> + $<$:${WEBGPU_DAWN_RELEASE}> + ) + endif() endif() +elseif(IOS) + # On iOS, it is common to build a static library. + if(NOT TARGET webgpu_dawn) + add_library(webgpu_dawn STATIC IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.a") + endif() +elseif(APPLE) + # On macOS (non-iOS), typically a dynamic library (.dylib) is built. + if(NOT TARGET webgpu_dawn) + add_library(webgpu_dawn SHARED IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.dylib") + endif() +elseif(ANDROID) + if(NOT TARGET webgpu_dawn) + add_library(webgpu_dawn SHARED IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.so") + endif() +elseif(NOT EMSCRIPTEN) # For Linux and other Unix-like systems. + if(NOT TARGET webgpu_dawn) + add_library(webgpu_dawn SHARED IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.so") + endif() +endif() \ No newline at end of file diff --git a/cmake/gpu.cmake b/cmake/gpu.cmake index d991a18..d57f083 100644 --- a/cmake/gpu.cmake +++ b/cmake/gpu.cmake @@ -15,7 +15,6 @@ message(STATUS "PROJECT_ROOT: ${PROJECT_ROOT}") set(GPU_SOURCES "${PROJECT_ROOT}/gpu.cpp" "${PROJECT_ROOT}/numeric_types/half.cpp" - "${DAWN_BUILD_DIR}/gen/include/dawn/webgpu.h" ) # Add headers diff --git a/gpu.hpp b/gpu.hpp index 69ed0e9..44310b8 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -195,7 +195,15 @@ struct TensorPool { enum NumType { kf16, // (experimental) kf32, - ki32 + kf64, + ki8, + ki16, + ki32, + ki64, + ku8, + ku16, + ku32, + ku64, }; /** @@ -207,8 +215,24 @@ inline size_t sizeBytes(const NumType &type) { return sizeof(uint16_t); case kf32: return sizeof(float); + case kf64: + return sizeof(double); + case ki8: + return sizeof(uint8_t); + case ki16: + return sizeof(uint16_t); case ki32: return sizeof(int32_t); + case ki64: + return sizeof(int64_t); + case ku8: + return sizeof(uint8_t); + case ku16: + return sizeof(uint16_t); + case ku32: + return sizeof(uint32_t); + case ku64: + return sizeof(uint64_t); default: LOG(kDefLog, kError, "Invalid NumType in size calculation."); return 0; @@ -224,8 +248,24 @@ inline std::string toString(NumType type) { return "f16"; case kf32: return "f32"; + case kf64: + return "f64"; + case ki8: + return "i8"; + case ki16: + return "i16"; case ki32: return "i32"; + case ki64: + return "i64"; + case ku8: + return "u8"; + case ku16: + return "u16"; + case ku32: + return "u32"; + case ku64: + return "u64"; default: LOG(kDefLog, kError, "Invalid NumType in string conversion."); return "unknown"; @@ -693,6 +733,18 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype) { * Tensor tensor = createTensor(ctx, {256, 256}, kf32, data); * @endcode */ +inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, + const half *data) { + assert(dtype == kf16); + Tensor tensor = + createTensor(ctx.pool, ctx.device, shape, dtype, + WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | + WGPUBufferUsage_CopySrc); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); + return tensor; +} + inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, const float *data) { assert(dtype == kf32); @@ -706,8 +758,8 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, } inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, - const int32_t *data) { - assert(dtype == ki32); + const double *data) { + assert(dtype == kf64); Tensor tensor = createTensor(ctx.pool, ctx.device, shape, dtype, WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | @@ -717,27 +769,93 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, return tensor; } -/** - * @brief Overload of the tensor factory function to instantiate a tensor on - * the GPU with a given shape, data type. This overload also takes initial - * half* data to populate the tensor with. - * - * The data is assumed to be of size equal to the product of the dimensions in - * the shape, and is copied to the GPU buffer. - * - * @param[in] ctx Context instance to manage the tensor - * @param[in] shape Shape of the tensor - * @param[in] dtype Data type of the tensor (e.g. kf32) - * @param[in] data Initial data to populate the tensor with - * @return Tensor instance representing the created tensor - * - * @code - * Tensor tensor = createTensor(ctx, {256, 256}, kf32, data); - * @endcode - */ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, - const half *data) { - assert(dtype == kf16); + const uint8_t *data) { + assert(dtype == ku8); + Tensor tensor = + createTensor(ctx.pool, ctx.device, shape, dtype, + WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | + WGPUBufferUsage_CopySrc); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); + return tensor; +} + +inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, + const uint16_t *data) { + assert(dtype == ku16); + Tensor tensor = + createTensor(ctx.pool, ctx.device, shape, dtype, + WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | + WGPUBufferUsage_CopySrc); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); + return tensor; +} + +inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, + const uint32_t *data) { + assert(dtype == ku32); + Tensor tensor = + createTensor(ctx.pool, ctx.device, shape, dtype, + WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | + WGPUBufferUsage_CopySrc); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); + return tensor; +} + +inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, + const uint64_t *data) { + assert(dtype == ku64); + Tensor tensor = + createTensor(ctx.pool, ctx.device, shape, dtype, + WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | + WGPUBufferUsage_CopySrc); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); + return tensor; +} + +inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, + const int64_t *data) { + assert(dtype == ki64); + Tensor tensor = + createTensor(ctx.pool, ctx.device, shape, dtype, + WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | + WGPUBufferUsage_CopySrc); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); + return tensor; +} + +inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, + const int8_t *data) { + assert(dtype == ki8); + Tensor tensor = + createTensor(ctx.pool, ctx.device, shape, dtype, + WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | + WGPUBufferUsage_CopySrc); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); + return tensor; +} + +inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, + const int16_t *data) { + assert(dtype == ki16); + Tensor tensor = + createTensor(ctx.pool, ctx.device, shape, dtype, + WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | + WGPUBufferUsage_CopySrc); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); + return tensor; +} + +inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, + const int32_t *data) { + assert(dtype == ki32); Tensor tensor = createTensor(ctx.pool, ctx.device, shape, dtype, WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | @@ -869,7 +987,8 @@ template T wait(Context &ctx, std::future &f) { * Context ctx = waitForContextFuture(contextFuture); * @endcode */ -template T waitForContextFuture(std::future &f, size_t sleepTime = 10) { +template +T waitForContextFuture(std::future &f, size_t sleepTime = 10) { #ifdef __EMSCRIPTEN__ while (f.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { @@ -1358,8 +1477,9 @@ inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, WGPUBufferMapCallbackInfo mapCallbackInfo = { .mode = WGPUCallbackMode_AllowSpontaneous, .callback = bufferMapCallback, - .userdata1 = const_cast(cbData), // Pass the callback data. - .userdata2 = nullptr // No additional user data. + .userdata1 = + const_cast(cbData), // Pass the callback data. + .userdata2 = nullptr // No additional user data. }; // Begin the asynchronous mapping of the readback buffer. @@ -1680,7 +1800,7 @@ inline void toGPU(Context &ctx, const half *data, Tensor &tensor) { tensor.data.size); } -inline void toGPU(Context &ctx, const int *data, Tensor &tensor) { +inline void toGPU(Context &ctx, const double *data, Tensor &tensor) { wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, tensor.data.size); } @@ -1694,10 +1814,90 @@ inline void toGPU(Context &ctx, const half *data, Tensor &tensor, size_t size) { wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); } +inline void toGPU(Context &ctx, const double *data, Tensor &tensor, + size_t size) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +} + +inline void toGPU(Context &ctx, const uint8_t *data, Tensor &tensor) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); +} + +inline void toGPU(Context &ctx, const uint16_t *data, Tensor &tensor) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); +} + +inline void toGPU(Context &ctx, const uint32_t *data, Tensor &tensor) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); +} + +inline void toGPU(Context &ctx, const uint64_t *data, Tensor &tensor) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); +} + +inline void toGPU(Context &ctx, const uint8_t *data, Tensor &tensor, + size_t size) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +} + +inline void toGPU(Context &ctx, const uint16_t *data, Tensor &tensor, + size_t size) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +} + +inline void toGPU(Context &ctx, const uint32_t *data, Tensor &tensor, + size_t size) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +} + +inline void toGPU(Context &ctx, const uint64_t *data, Tensor &tensor, + size_t size) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +} + +inline void toGPU(Context &ctx, const int8_t *data, Tensor &tensor) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); +} + +inline void toGPU(Context &ctx, const int16_t *data, Tensor &tensor) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); +} + +inline void toGPU(Context &ctx, const int *data, Tensor &tensor) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); +} + +inline void toGPU(Context &ctx, const int64_t *data, Tensor &tensor) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, + tensor.data.size); +} + +inline void toGPU(Context &ctx, const int8_t *data, Tensor &tensor, + size_t size) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +} + +inline void toGPU(Context &ctx, const int16_t *data, Tensor &tensor, + size_t size) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +} + inline void toGPU(Context &ctx, const int *data, Tensor &tensor, size_t size) { wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); } +inline void toGPU(Context &ctx, const int64_t *data, Tensor &tensor, + size_t size) { + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +} + template inline void toGPU(Context &ctx, Params ¶ms, Kernel &op) { // TODO(avh): Maintain params metadata in Kernel and check for consistency. diff --git a/numeric_types/half.cpp b/numeric_types/half.cpp index c183754..e6e8d71 100644 --- a/numeric_types/half.cpp +++ b/numeric_types/half.cpp @@ -214,13 +214,7 @@ fn main( } } )"; - Context ctx = createContext( - {}, {}, - /*device descriptor, enabling f16 in WGSL*/ - { - .requiredFeatureCount = 1, - .requiredFeatures = std::array{WGPUFeatureName_ShaderF16}.data(), - }); + Context ctx = createContext(); static constexpr size_t N = 10000; std::array inputArr, outputArr; for (int i = 0; i < N; ++i) { @@ -238,7 +232,7 @@ fn main( } } -int testHalfMain() { +int testHalf() { printf("\nHalf-precision float tests\n==========================\n"); printf("\nRegular values float round trips\n\n"); diff --git a/numeric_types/half.hpp b/numeric_types/half.hpp index f78e61a..7f0f906 100644 --- a/numeric_types/half.hpp +++ b/numeric_types/half.hpp @@ -54,6 +54,7 @@ static inline uint64_t __builtin_clz(uint64_t value) struct half; static inline half halfFromFloat(float f); static inline float halfToFloat(half h); +int testHalf(); /** * Experimental implementation of half-precision 16-bit floating point numbers. diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index b855712..02b3e9a 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -1,4 +1,5 @@ #include "gpu.hpp" +#include "numeric_types/half.hpp" #include #include #include @@ -10,13 +11,24 @@ using namespace gpu; using namespace std::chrono; - // Forward declarations: void testToCPUWithTensor(); void testToCPUWithBuffer(); void testToCPUWithTensorSourceOffset(); void testToCPUWithBufferSourceOffset(); void stressTestToCPU(); +void testToCPUWithHalf(); +void testToCPUWithFloat(); +void testToCPUWithDouble(); +void testToCPUWithint8(); +void testToCPUWithint16(); +void testToCPUWithint(); +void testToCPUWithint64(); +void testToCPUWithUint8(); +void testToCPUWithUint16(); +void testToCPUWithUint32(); +void testToCPUWithUint64(); +void testNumTypeSizes(); int main() { LOG(kDefLog, kInfo, "Running GPU integration tests..."); @@ -24,12 +36,24 @@ int main() { testToCPUWithBuffer(); testToCPUWithTensorSourceOffset(); testToCPUWithBufferSourceOffset(); + testToCPUWithHalf(); + testToCPUWithFloat(); + testToCPUWithDouble(); + testToCPUWithint8(); + testToCPUWithint16(); + testToCPUWithint(); + testToCPUWithint64(); + testToCPUWithUint8(); + testToCPUWithUint16(); + testToCPUWithUint32(); + testToCPUWithUint64(); + testNumTypeSizes(); stressTestToCPU(); + testHalf(); LOG(kDefLog, kInfo, "All tests passed."); return 0; } - // A simple WGSL copy kernel that copies input to output. static const char *kCopyKernel = R"( @group(0) @binding(0) var inp: array<{{precision}}>; @@ -44,6 +68,374 @@ fn main(@builtin(global_invocation_id) gid: vec3) { } )"; +void testNumTypeSizes() { + LOG(kDefLog, kInfo, "Running testNumTypeSizes..."); + + // kf16 and kf32 expected sizes + // Adjust these values if your implementation differs. + assert(sizeBytes(kf16) == 2); + assert(sizeBytes(kf32) == 4); + + // For the integer types, we compare against the sizeof the respective type. + assert(sizeBytes(ki8) == sizeof(uint8_t)); // typically 1 + assert(sizeBytes(ki16) == sizeof(uint16_t)); // typically 2 + assert(sizeBytes(ki32) == sizeof(int32_t)); // typically 4 + assert(sizeBytes(ku8) == sizeof(uint8_t)); // typically 1 + assert(sizeBytes(ku16) == sizeof(uint16_t)); // typically 2 + // Assuming ku32 should be sizeof(uint32_t) + assert(sizeBytes(ku32) == sizeof(uint32_t)); // typically 4 + + LOG(kDefLog, kInfo, "testNumTypeSizes passed."); +} + +// Test using half-precision (16-bit float) data. +void testToCPUWithHalf() { + LOG(kDefLog, kInfo, "Running testToCPUWithHalf..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + for (size_t i = 0; i < N; ++i) { + // Construct half from float. + inputData[i] = half(static_cast(i)); + } + + // Create a tensor for half data using the kf16 type. + Tensor inputTensor = createTensor(ctx, Shape{N}, kf16, inputData.data()); + + // Copy GPU output to CPU. + toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); + + // Validate the copy (using float conversion for approximate equality). + for (size_t i = 0; i < N; ++i) { + float inVal = static_cast(inputData[i]); + float outVal = static_cast(outputData[i]); + // Use a small epsilon to compare half values. + assert(fabs(inVal - outVal) <= 0.01f); + } + LOG(kDefLog, kInfo, "testToCPUWithHalf passed."); +} + +// Test using float (32-bit) data. +void testToCPUWithFloat() { + LOG(kDefLog, kInfo, "Running testToCPUWithFloat..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i * 1.5f); + outputData[i] = 0.0f; + } + + // Create a tensor for float data using the kf32 type. + Tensor inputTensor = createTensor(ctx, Shape{N}, kf32, inputData.data()); + + // Copy GPU output to CPU. + toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); + + // Validate the copy. + for (size_t i = 0; i < N; ++i) { + assert(inputData[i] == outputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithFloat passed."); +} + +// Test using double (64-bit floating point) data. +void testToCPUWithDouble() { + LOG(kDefLog, kInfo, "Running testToCPUWithDouble..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i) * 2.5; + outputData[i] = 0.0; + } + + Tensor inputTensor = createTensor(ctx, Shape{N}, kf64, inputData.data()); + + // Copy GPU output to CPU. + toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); + + // Validate the copy. + for (size_t i = 0; i < N; ++i) { + assert(inputData[i] == outputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithDouble passed."); +} + +void testToCPUWithint8() { + LOG(kDefLog, kInfo, "Running testToCPUWithint8..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + // Use a range that includes negative values. + for (size_t i = 0; i < N; ++i) { + // Values between -128 and 127. + inputData[i] = static_cast((i % 256) - 128); + outputData[i] = 0; + } + + // Create a tensor for int8_t. + Tensor inputTensor = createTensor(ctx, Shape{N}, ki8, inputData.data()); + + // Synchronously copy the GPU tensor data to CPU. + toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); + + // Validate the copy. + for (size_t i = 0; i < N; ++i) { + LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); + LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); + assert(outputData[i] == inputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithint8 passed."); +} + +// Test using int16_t data. +void testToCPUWithint16() { + LOG(kDefLog, kInfo, "Running testToCPUWithint16..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + // Use a range that includes negative values. + for (size_t i = 0; i < N; ++i) { + // Values between -32768 and 32767. + inputData[i] = static_cast((i % 65536) - 32768); + outputData[i] = 0; + } + + // Create a tensor for int16_t. + Tensor inputTensor = createTensor(ctx, Shape{N}, ki16, inputData.data()); + + // Synchronously copy the GPU tensor data to CPU. + toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); + + // Validate the copy. + for (size_t i = 0; i < N; ++i) { + LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); + LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); + assert(outputData[i] == inputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithint16 passed."); +} + +// Test using int (int32_t) data. +void testToCPUWithint() { + LOG(kDefLog, kInfo, "Running testToCPUWithint..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + // Fill with sample data. + for (size_t i = 0; i < N; ++i) { + inputData[i] = + static_cast(i - 512); // Negative and positive values. + outputData[i] = 0; + } + + // Create a tensor for int32_t. + Tensor inputTensor = createTensor(ctx, Shape{N}, ki32, inputData.data()); + + // Synchronously copy the GPU tensor data to CPU. + toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); + + // Validate the copy. + for (size_t i = 0; i < N; ++i) { + LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); + LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); + assert(outputData[i] == inputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithint passed."); +} + +// Test using int64_t (64-bit signed integer) data. +void testToCPUWithint64() { + LOG(kDefLog, kInfo, "Running testToCPUWithint64..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + for (size_t i = 0; i < N; ++i) { + inputData[i] = + static_cast(i) - 512; // Some negative and positive values. + outputData[i] = 0; + } + + // Assuming a new NumType 'ki64' for 64-bit integers. + Tensor inputTensor = createTensor(ctx, Shape{N}, ki64, inputData.data()); + + // Copy GPU output to CPU. + toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); + + // Validate the copy. + for (size_t i = 0; i < N; ++i) { + assert(inputData[i] == outputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithint64 passed."); +} + +void testToCPUWithUint8() { + LOG(kDefLog, kInfo, "Running testToCPUWithUint8..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i % 256); + outputData[i] = 0; + } + + Tensor inputTensor = createTensor( + ctx, Shape{N}, ku8, reinterpret_cast(inputData.data())); + + // Synchronously copy GPU output to CPU using the tensor overload. + toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); + + // Verify the output matches the input. + for (size_t i = 0; i < N; ++i) { + LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); + LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); + assert(outputData[i] == inputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithUint8 passed."); +} + +void testToCPUWithUint16() { + LOG(kDefLog, kInfo, "Running testToCPUWithUint16..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i % 65536); + outputData[i] = 0; + } + + Tensor inputTensor = + createTensor(ctx, Shape{N}, ku16, + reinterpret_cast(inputData.data())); + + // Synchronously copy GPU output to CPU using the tensor overload. + toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); + + // Verify the output matches the input. + for (size_t i = 0; i < N; ++i) { + LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); + LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); + assert(outputData[i] == inputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithUint16 passed."); +} + +void testToCPUWithUint32() { + LOG(kDefLog, kInfo, "Running testToCPUWithUint32..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i); + outputData[i] = 0; + } + + Tensor inputTensor = + createTensor(ctx, Shape{N}, ku32, + reinterpret_cast(inputData.data())); + + // Synchronously copy GPU output to CPU using the tensor overload. + toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); + + // Verify the output matches the input. + for (size_t i = 0; i < N; ++i) { + LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); + LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); + assert(outputData[i] == inputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithUint32 passed."); +} + +// Test using uint64_t (64-bit unsigned integer) data. +void testToCPUWithUint64() { + LOG(kDefLog, kInfo, "Running testToCPUWithUint64..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::array inputData, outputData; + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i); + outputData[i] = 0; + } + + // Assuming a new NumType 'ku64' for 64-bit unsigned integers. + Tensor inputTensor = createTensor(ctx, Shape{N}, ku64, inputData.data()); + + // Copy GPU output to CPU. + toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); + + // Validate the copy. + for (size_t i = 0; i < N; ++i) { + assert(inputData[i] == outputData[i]); + } + LOG(kDefLog, kInfo, "testToCPUWithUint64 passed."); +} // Test using the overload that takes a Tensor. void testToCPUWithTensor() { @@ -229,22 +621,24 @@ void stressTestToCPU() { auto startTime = high_resolution_clock::now(); size_t opCount = 0; while (high_resolution_clock::now() - startTime < seconds(2)) { - // Allocate an output buffer (using a shared_ptr so it stays valid until the future completes) + // Allocate an output buffer (using a shared_ptr so it stays valid until the + // future completes) auto outputData = std::make_shared>(N, 0.0f); // Use the tensor overload; we’re copying the entire tensor (destOffset = 0) - LOG(kDefLog, kInfo, "Copying %zu bytes from GPU to CPU...", N * sizeof(float)); // log count - LOG(kDefLog, kInfo, "opCount = %zu", opCount); - auto fut = toCPUAsync(ctx, tensor, outputData->data(), N * sizeof(float), 0); + auto fut = + toCPUAsync(ctx, tensor, outputData->data(), N * sizeof(float), 0); wait(ctx, fut); ++opCount; } - + auto endTime = high_resolution_clock::now(); auto totalMs = duration_cast(endTime - startTime).count(); double throughput = (opCount / (totalMs / 1000.0)); - LOG(kDefLog, kInfo, "Stress test completed:\n" - " %zu GPU to CPU operations in %lld ms\n" - " Throughput: %.2f ops/sec", opCount, totalMs, throughput); + LOG(kDefLog, kInfo, + "Stress test completed:\n" + " %zu GPU to CPU operations in %lld ms\n" + " Throughput: %.2f ops/sec", + opCount, totalMs, throughput); } From 75c8654534c54c10a5dc01cefa7e078b654f6297 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Wed, 9 Apr 2025 18:36:09 -0500 Subject: [PATCH 32/54] test cleanup --- test/test_gpu.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 02b3e9a..21cb27d 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -71,18 +71,14 @@ fn main(@builtin(global_invocation_id) gid: vec3) { void testNumTypeSizes() { LOG(kDefLog, kInfo, "Running testNumTypeSizes..."); - // kf16 and kf32 expected sizes - // Adjust these values if your implementation differs. + assert(sizeBytes(kf16) == 2); assert(sizeBytes(kf32) == 4); - - // For the integer types, we compare against the sizeof the respective type. assert(sizeBytes(ki8) == sizeof(uint8_t)); // typically 1 assert(sizeBytes(ki16) == sizeof(uint16_t)); // typically 2 assert(sizeBytes(ki32) == sizeof(int32_t)); // typically 4 assert(sizeBytes(ku8) == sizeof(uint8_t)); // typically 1 assert(sizeBytes(ku16) == sizeof(uint16_t)); // typically 2 - // Assuming ku32 should be sizeof(uint32_t) assert(sizeBytes(ku32) == sizeof(uint32_t)); // typically 4 LOG(kDefLog, kInfo, "testNumTypeSizes passed."); @@ -105,7 +101,6 @@ void testToCPUWithHalf() { inputData[i] = half(static_cast(i)); } - // Create a tensor for half data using the kf16 type. Tensor inputTensor = createTensor(ctx, Shape{N}, kf16, inputData.data()); // Copy GPU output to CPU. @@ -138,7 +133,6 @@ void testToCPUWithFloat() { outputData[i] = 0.0f; } - // Create a tensor for float data using the kf32 type. Tensor inputTensor = createTensor(ctx, Shape{N}, kf32, inputData.data()); // Copy GPU output to CPU. @@ -299,7 +293,6 @@ void testToCPUWithint64() { outputData[i] = 0; } - // Assuming a new NumType 'ki64' for 64-bit integers. Tensor inputTensor = createTensor(ctx, Shape{N}, ki64, inputData.data()); // Copy GPU output to CPU. @@ -331,7 +324,6 @@ void testToCPUWithUint8() { Tensor inputTensor = createTensor( ctx, Shape{N}, ku8, reinterpret_cast(inputData.data())); - // Synchronously copy GPU output to CPU using the tensor overload. toCPU(ctx, inputTensor, outputData.data(), sizeof(outputData)); // Verify the output matches the input. From f2b555da20b576ed6d2da4525ebb2edfa1261395 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Thu, 10 Apr 2025 16:44:27 -0500 Subject: [PATCH 33/54] replace clz --- cmake/dawn.cmake | 2 +- numeric_types/half.hpp | 79 ++++++++++-------------------------------- test/test_gpu.cpp | 38 ++++++++++---------- 3 files changed, 39 insertions(+), 80 deletions(-) diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake index bfcdf95..baed5ad 100644 --- a/cmake/dawn.cmake +++ b/cmake/dawn.cmake @@ -38,7 +38,7 @@ include_directories(BEFORE PUBLIC # Optionally try to find an existing Dawn build. -set(ENABLE_DAWN_FIND ON CACHE BOOL "Attempt to find an existing Dawn build" FORCE) +set(ENABLE_DAWN_FIND OFF CACHE BOOL "Attempt to find an existing Dawn build" FORCE) set(DAWN_BUILD_FOUND OFF CACHE BOOL "Dawn build found" FORCE) if(ENABLE_DAWN_FIND) diff --git a/numeric_types/half.hpp b/numeric_types/half.hpp index 7f0f906..b2461cf 100644 --- a/numeric_types/half.hpp +++ b/numeric_types/half.hpp @@ -7,50 +7,18 @@ #include #include -#ifdef _MSC_VER -#include - -static inline uint32_t __builtin_clz(uint32_t value) -{ - unsigned long leading_zero = 0; - if (value == 0) - { - return 32; +// A simple function that counts leading zeros in a 16-bit number. +static inline uint16_t half_clz16(uint16_t value) { + uint16_t count = 0; + // Start at the highest bit (0x8000) + for (uint16_t mask = 0x8000; mask; mask >>= 1) { + if (value & mask) + break; + ++count; } - _BitScanReverse(&leading_zero, value); - return 31 - leading_zero; + return count; } -static inline uint16_t __builtin_clz(uint16_t value) -{ - return __builtin_clz(static_cast(value)) - 16; -} - -static inline uint64_t __builtin_clz(uint64_t value) -{ - unsigned long leading_zero = 0; - if (value == 0) - { - return 64; - } -#if defined(_WIN64) - _BitScanReverse64(&leading_zero, value); - return 63 - leading_zero; -#else - uint32_t high = static_cast(value >> 32); - uint32_t low = static_cast(value); - if (high != 0) - { - return __builtin_clz(high); - } - else - { - return 32 + __builtin_clz(low); - } -#endif -} -#endif - struct half; static inline half halfFromFloat(float f); static inline float halfToFloat(half h); @@ -59,8 +27,7 @@ int testHalf(); /** * Experimental implementation of half-precision 16-bit floating point numbers. */ -struct half -{ +struct half { uint16_t data; // Default constructor @@ -78,22 +45,19 @@ struct half operator uint16_t() const { return data; } // Overload assignment operator from uint16_t - half &operator=(uint16_t value) - { + half &operator=(uint16_t value) { data = value; return *this; } // Overload assignment operator from another half - half &operator=(const half &other) - { + half &operator=(const half &other) { data = other.data; return *this; } // Overload assignment operator from float - half &operator=(float value) - { + half &operator=(float value) { data = halfFromFloat(value); return *this; } @@ -104,10 +68,8 @@ struct half * * Based on Mike Acton's half.c implementation. */ -half halfFromFloat(float f) -{ - union - { +half halfFromFloat(float f) { + union { float f; uint32_t u; } floatUnion = {f}; @@ -146,8 +108,7 @@ half halfFromFloat(float f) const uint32_t floatMantissa = float32 & FLOAT_MANTISSA_MASK; // Check for NaN - if ((floatExpMasked == FLOAT_EXP_MASK) && (floatMantissa != 0)) - { + if ((floatExpMasked == FLOAT_EXP_MASK) && (floatMantissa != 0)) { half result; result.data = HALF_EXP_MASK | (floatMantissa >> FLOAT_HALF_MANTISSA_POS_OFFSET); @@ -227,8 +188,7 @@ half halfFromFloat(float f) * * Based on Mike Acton's half.c implementation. */ -float halfToFloat(half h) -{ +float halfToFloat(half h) { // Constants for bit masks, shifts, and biases const uint16_t ONE = 0x0001; const uint16_t TWO = 0x0002; @@ -273,7 +233,7 @@ float halfToFloat(half h) const uint32_t isNan = isExpFlagged && isMantissaNonZero; // Handling denormalized numbers - const uint16_t halfMantissaLeadingZeros = __builtin_clz(halfMantissa) - 16; + const uint16_t halfMantissaLeadingZeros = half_clz16(halfMantissa); const uint16_t halfDenormShiftAmount = halfMantissaLeadingZeros + HALF_FLOAT_DENORM_SA_OFFSET; const uint32_t halfFloatDenormMantissaShiftAmount = @@ -309,8 +269,7 @@ float halfToFloat(half h) const uint32_t result = checkNanResult; // Reinterpret the uint32_t result as a float using a union - union - { + union { uint32_t u; float f; } floatUnion; diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 21cb27d..51e8cef 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -200,8 +200,8 @@ void testToCPUWithint8() { // Validate the copy. for (size_t i = 0; i < N; ++i) { - LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); - LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); + //LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); + //LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithint8 passed."); @@ -234,8 +234,8 @@ void testToCPUWithint16() { // Validate the copy. for (size_t i = 0; i < N; ++i) { - LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); - LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); + //LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); + //LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithint16 passed."); @@ -268,8 +268,8 @@ void testToCPUWithint() { // Validate the copy. for (size_t i = 0; i < N; ++i) { - LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); - LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); + //LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); + //LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithint passed."); @@ -328,8 +328,8 @@ void testToCPUWithUint8() { // Verify the output matches the input. for (size_t i = 0; i < N; ++i) { - LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); - LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); + //LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); + //LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithUint8 passed."); @@ -360,8 +360,8 @@ void testToCPUWithUint16() { // Verify the output matches the input. for (size_t i = 0; i < N; ++i) { - LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); - LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); + //LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); + //LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithUint16 passed."); @@ -392,8 +392,8 @@ void testToCPUWithUint32() { // Verify the output matches the input. for (size_t i = 0; i < N; ++i) { - LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); - LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); + //LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); + //LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithUint32 passed."); @@ -462,8 +462,8 @@ void testToCPUWithTensor() { // Verify the output matches the input. for (size_t i = 0; i < N; ++i) { - LOG(kDefLog, kInfo, "inputData[%zu] = %f", i, inputData[i]); - LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); + //LOG(kDefLog, kInfo, "inputData[%zu] = %f", i, inputData[i]); + //LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithTensor passed."); @@ -500,7 +500,7 @@ void testToCPUWithBuffer() { // Verify that the CPU output matches the original data. for (size_t i = 0; i < N; ++i) { - LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); + //LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); assert(outputData[i] == data[i]); } LOG(kDefLog, kInfo, "testToCPUWithBuffer passed."); @@ -542,8 +542,8 @@ void testToCPUWithTensorSourceOffset() { for (size_t i = 0; i < copyCount; ++i) { float expected = inputData[sourceOffsetElements + i]; float actual = cpuOutput[i]; - LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); - LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); + //LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); + //LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); assert(expected == actual); } LOG(kDefLog, kInfo, "testToCPUWithTensorSourceOffset passed."); @@ -585,8 +585,8 @@ void testToCPUWithBufferSourceOffset() { for (size_t i = 0; i < copyCount; ++i) { float expected = inputData[sourceOffsetElements + i]; float actual = cpuOutput[i]; - LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); - LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); + //LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); + //LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); assert(expected == actual); } LOG(kDefLog, kInfo, "testToCPUWithBufferSourceOffset passed."); From 81bfe07adeace8e8b54926e3783fa722d43a8958 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Thu, 10 Apr 2025 16:45:07 -0500 Subject: [PATCH 34/54] replace clz --- numeric_types/half.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numeric_types/half.hpp b/numeric_types/half.hpp index b2461cf..395e257 100644 --- a/numeric_types/half.hpp +++ b/numeric_types/half.hpp @@ -7,7 +7,7 @@ #include #include -// A simple function that counts leading zeros in a 16-bit number. +// Counts leading zeros in a 16-bit number. static inline uint16_t half_clz16(uint16_t value) { uint16_t count = 0; // Start at the highest bit (0x8000) From 36fe730631c6a2e6b483073b7ca861fe1b806248 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Fri, 11 Apr 2025 00:24:37 -0500 Subject: [PATCH 35/54] need to pack and unpack unsupported types --- gpu.hpp | 264 ++++++++++++++++++++++++++++++++++------------ test/test_gpu.cpp | 159 ++++++++++++++++++++++++---- 2 files changed, 337 insertions(+), 86 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index 44310b8..da507f0 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -757,21 +757,21 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, return tensor; } +// Overload for double: pack each double into a float (losing precision) inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, const double *data) { - assert(dtype == kf64); - Tensor tensor = - createTensor(ctx.pool, ctx.device, shape, dtype, - WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | - WGPUBufferUsage_CopySrc); - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); - return tensor; + assert(dtype == kf64); // unsupported: convert to kf32 + size_t numElements = size(shape); + std::vector packed(numElements); + for (size_t i = 0; i < numElements; ++i) { + packed[i] = static_cast(data[i]); + } + return createTensor(ctx, shape, kf32, packed.data()); } inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, - const uint8_t *data) { - assert(dtype == ku8); + const int32_t *data) { + assert(dtype == ki32); Tensor tensor = createTensor(ctx.pool, ctx.device, shape, dtype, WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | @@ -781,45 +781,55 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, return tensor; } +// Overload for int8_t: pack four 8‑bit ints into one 32‑bit integer inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, - const uint16_t *data) { - assert(dtype == ku16); - Tensor tensor = - createTensor(ctx.pool, ctx.device, shape, dtype, - WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | - WGPUBufferUsage_CopySrc); - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); - return tensor; + const int8_t *data) { + assert(dtype == ki8); // unsupported: pack into ki32 + size_t numElements = size(shape); + size_t packedCount = (numElements + 3) / 4; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + // pack as unsigned bits then reinterpret; shader is then responsible for + // unpacking + packed[idx] |= (static_cast(data[i]) << shift); + } + return createTensor(ctx, shape, ki32, packed.data()); } +// Overload for int16_t: pack two 16‑bit ints into one 32‑bit integer inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, - const uint32_t *data) { - assert(dtype == ku32); - Tensor tensor = - createTensor(ctx.pool, ctx.device, shape, dtype, - WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | - WGPUBufferUsage_CopySrc); - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); - return tensor; + const int16_t *data) { + assert(dtype == ki16); // unsupported: pack into ki32 + size_t numElements = size(shape); + size_t packedCount = (numElements + 1) / 2; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + packed[idx] |= (static_cast(data[i]) << shift); + } + return createTensor(ctx, shape, ki32, packed.data()); } +// Overload for int64_t: pack each 64‑bit int into two 32‑bit integers inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, - const uint64_t *data) { - assert(dtype == ku64); - Tensor tensor = - createTensor(ctx.pool, ctx.device, shape, dtype, - WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | - WGPUBufferUsage_CopySrc); - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); - return tensor; + const int64_t *data) { + assert(dtype == ki64); // unsupported: pack into two ki32s + size_t numElements = size(shape); + std::vector packed(numElements * 2); + for (size_t i = 0; i < numElements; ++i) { + int64_t val = data[i]; + packed[2 * i] = static_cast(val & 0xFFFFFFFF); + packed[2 * i + 1] = static_cast((val >> 32) & 0xFFFFFFFF); + } + return createTensor(ctx, shape, ki32, packed.data()); } inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, - const int64_t *data) { - assert(dtype == ki64); + const uint32_t *data) { + assert(dtype == ku32); Tensor tensor = createTensor(ctx.pool, ctx.device, shape, dtype, WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | @@ -829,40 +839,51 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, return tensor; } +// Overload for uint8_t: pack four 8‑bit integers into one 32‑bit unsigned +// integer inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, - const int8_t *data) { - assert(dtype == ki8); - Tensor tensor = - createTensor(ctx.pool, ctx.device, shape, dtype, - WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | - WGPUBufferUsage_CopySrc); - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); - return tensor; + const uint8_t *data) { + assert(dtype == ku8); // unsupported: pack into ku32 + size_t numElements = size(shape); + size_t packedCount = (numElements + 3) / 4; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + packed[idx] |= (static_cast(data[i]) << shift); + } + return createTensor(ctx, shape, ku32, packed.data()); } +// Overload for uint16_t: pack two 16‑bit integers into one 32‑bit unsigned +// integer inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, - const int16_t *data) { - assert(dtype == ki16); - Tensor tensor = - createTensor(ctx.pool, ctx.device, shape, dtype, - WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | - WGPUBufferUsage_CopySrc); - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); - return tensor; + const uint16_t *data) { + assert(dtype == ku16); // unsupported: pack into ku32 + size_t numElements = size(shape); + size_t packedCount = (numElements + 1) / 2; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + packed[idx] |= (static_cast(data[i]) << shift); + } + return createTensor(ctx, shape, ku32, packed.data()); } +// Overload for uint64_t: pack each 64‑bit integer into two 32‑bit unsigned +// integers inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, - const int32_t *data) { - assert(dtype == ki32); - Tensor tensor = - createTensor(ctx.pool, ctx.device, shape, dtype, - WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | - WGPUBufferUsage_CopySrc); - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); - return tensor; + const uint64_t *data) { + assert(dtype == ku64); // unsupported: pack into two ku32s + size_t numElements = size(shape); + std::vector packed(numElements * 2); + for (size_t i = 0; i < numElements; ++i) { + uint64_t val = data[i]; + packed[2 * i] = static_cast(val & 0xFFFFFFFF); + packed[2 * i + 1] = static_cast(val >> 32); + } + return createTensor(ctx, shape, ku32, packed.data()); } /** @@ -1759,6 +1780,117 @@ inline void toCPU(Context &ctx, Tensor &tensor, std::array &data, wait(ctx, future); } +inline void toCPU(Context &ctx, Tensor &tensor, NumType dtype, void *output, size_t sourceOffset = 0) { + size_t numElements = size(tensor.shape); + switch (dtype) { + // These types are directly supported. + case kf16: + case kf32: + case ku32: + case ki32: + toCPU(ctx, tensor, output, tensor.data.size, sourceOffset); + break; + + // For double, the tensor was created by packing doubles into floats. + case kf64: { + std::vector tmp(numElements); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(float), sourceOffset); + double *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + dst[i] = static_cast(tmp[i]); + } + break; + } + + // For int8_t: four 8‑bit ints packed into one int32_t. + case ki8: { + size_t packedCount = (numElements + 3) / 4; + std::vector tmp(packedCount); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset); + int8_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFF); + } + break; + } + + // For int16_t: two 16‑bit ints packed into one int32_t. + case ki16: { + size_t packedCount = (numElements + 1) / 2; + std::vector tmp(packedCount); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset); + int16_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFFFF); + } + break; + } + + // For int64_t: each 64‑bit int was packed into two int32_t. + case ki64: { + std::vector tmp(numElements * 2); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset); + int64_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + int32_t low = tmp[2 * i]; + int32_t high = tmp[2 * i + 1]; + dst[i] = (static_cast(high) << 32) | + (static_cast(low)); + } + break; + } + + // For uint8_t: four 8‑bit uints packed into one uint32_t. + case ku8: { + size_t packedCount = (numElements + 3) / 4; + std::vector tmp(packedCount); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); + uint8_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFF); + } + break; + } + + // For uint16_t: two 16‑bit uints packed into one uint32_t. + case ku16: { + size_t packedCount = (numElements + 1) / 2; + std::vector tmp(packedCount); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); + uint16_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFFFF); + } + break; + } + + // For uint64_t: each 64‑bit unsigned int was packed into two uint32_t. + case ku64: { + std::vector tmp(numElements * 2); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); + uint64_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + uint32_t low = tmp[2 * i]; + uint32_t high = tmp[2 * i + 1]; + dst[i] = (static_cast(high) << 32) | low; + } + break; + } + + default: + LOG(kDefLog, kError, "Unsupported dtype in toCPUUnpack"); + break; + } +} + /** * @brief Copies data from CPU memory to a GPU buffer. The toGPU overloads are * effectively a convenience wrapper around the WebGPU API call diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 51e8cef..a285ceb 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -29,9 +29,11 @@ void testToCPUWithUint16(); void testToCPUWithUint32(); void testToCPUWithUint64(); void testNumTypeSizes(); +void testToCPUUnpack(); int main() { LOG(kDefLog, kInfo, "Running GPU integration tests..."); + testToCPUUnpack(); testToCPUWithTensor(); testToCPUWithBuffer(); testToCPUWithTensorSourceOffset(); @@ -68,10 +70,127 @@ fn main(@builtin(global_invocation_id) gid: vec3) { } )"; +void testToCPUUnpack() { + LOG(kDefLog, kInfo, "Running testToCPUUnpack..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + // Test for double (kf64 -> packed as kf32) + { + constexpr size_t N = 1024; + std::vector inputData(N), outputData(N); + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i) * 3.14; + } + Tensor tensor = createTensor(ctx, Shape{N}, kf64, inputData.data()); + toCPU(ctx, tensor, kf64, outputData.data(), 0); + for (size_t i = 0; i < N; ++i) { + // Allow for a very small epsilon error due to float conversion. + assert(fabs(inputData[i] - outputData[i]) < 1e-4); + } + LOG(kDefLog, kInfo, "toCPUUnpack for double passed."); + } + + // Test for int8_t (ki8 -> packed as ki32) + { + constexpr size_t N = 1024; + std::vector inputData(N), outputData(N); + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast((i % 256) - 128); + } + Tensor tensor = createTensor(ctx, Shape{N}, ki8, inputData.data()); + toCPU(ctx, tensor, ki8, outputData.data(), 0); + for (size_t i = 0; i < N; ++i) { + assert(inputData[i] == outputData[i]); + } + LOG(kDefLog, kInfo, "toCPUUnpack for int8_t passed."); + } + + // Test for int16_t (ki16 -> packed as ki32) + { + constexpr size_t N = 1024; + std::vector inputData(N), outputData(N); + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast((i % 65536) - 32768); + } + Tensor tensor = createTensor(ctx, Shape{N}, ki16, inputData.data()); + toCPU(ctx, tensor, ki16, outputData.data(), 0); + for (size_t i = 0; i < N; ++i) { + assert(inputData[i] == outputData[i]); + } + LOG(kDefLog, kInfo, "toCPUUnpack for int16_t passed."); + } + + // Test for int64_t (ki64 -> packed as two ki32s) + { + constexpr size_t N = 1024; + std::vector inputData(N), outputData(N); + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i) - 512; + } + Tensor tensor = createTensor(ctx, Shape{N}, ki64, inputData.data()); + toCPU(ctx, tensor, ki64, outputData.data(), 0); + for (size_t i = 0; i < N; ++i) { + assert(inputData[i] == outputData[i]); + } + LOG(kDefLog, kInfo, "toCPUUnpack for int64_t passed."); + } + + // Test for uint8_t (ku8 -> packed as ku32) + { + constexpr size_t N = 1024; + std::vector inputData(N), outputData(N); + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i % 256); + } + Tensor tensor = createTensor(ctx, Shape{N}, ku8, inputData.data()); + toCPU(ctx, tensor, ku8, outputData.data(), 0); + for (size_t i = 0; i < N; ++i) { + assert(inputData[i] == outputData[i]); + } + LOG(kDefLog, kInfo, "toCPUUnpack for uint8_t passed."); + } + + // Test for uint16_t (ku16 -> packed as ku32) + { + constexpr size_t N = 1024; + std::vector inputData(N), outputData(N); + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i % 65536); + } + Tensor tensor = createTensor(ctx, Shape{N}, ku16, inputData.data()); + toCPU(ctx, tensor, ku16, outputData.data(), 0); + for (size_t i = 0; i < N; ++i) { + assert(inputData[i] == outputData[i]); + } + LOG(kDefLog, kInfo, "toCPUUnpack for uint16_t passed."); + } + + // Test for uint64_t (ku64 -> packed as two ku32s) + { + constexpr size_t N = 1024; + std::vector inputData(N), outputData(N); + for (size_t i = 0; i < N; ++i) { + inputData[i] = static_cast(i) * 123456789ULL; + } + Tensor tensor = createTensor(ctx, Shape{N}, ku64, inputData.data()); + toCPU(ctx, tensor, ku64, outputData.data(), 0); + for (size_t i = 0; i < N; ++i) { + assert(inputData[i] == outputData[i]); + } + LOG(kDefLog, kInfo, "toCPUUnpack for uint64_t passed."); + } + + LOG(kDefLog, kInfo, "All toCPUUnpack tests passed."); +} + void testNumTypeSizes() { LOG(kDefLog, kInfo, "Running testNumTypeSizes..."); - assert(sizeBytes(kf16) == 2); assert(sizeBytes(kf32) == 4); assert(sizeBytes(ki8) == sizeof(uint8_t)); // typically 1 @@ -200,8 +319,8 @@ void testToCPUWithint8() { // Validate the copy. for (size_t i = 0; i < N; ++i) { - //LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); - //LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); + // LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); + // LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithint8 passed."); @@ -234,8 +353,8 @@ void testToCPUWithint16() { // Validate the copy. for (size_t i = 0; i < N; ++i) { - //LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); - //LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); + // LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); + // LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithint16 passed."); @@ -268,8 +387,8 @@ void testToCPUWithint() { // Validate the copy. for (size_t i = 0; i < N; ++i) { - //LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); - //LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); + // LOG(kDefLog, kInfo, "inputData[%zu] = %d", i, inputData[i]); + // LOG(kDefLog, kInfo, "outputData[%zu] = %d", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithint passed."); @@ -328,8 +447,8 @@ void testToCPUWithUint8() { // Verify the output matches the input. for (size_t i = 0; i < N; ++i) { - //LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); - //LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); + // LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); + // LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithUint8 passed."); @@ -360,8 +479,8 @@ void testToCPUWithUint16() { // Verify the output matches the input. for (size_t i = 0; i < N; ++i) { - //LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); - //LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); + // LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); + // LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithUint16 passed."); @@ -392,8 +511,8 @@ void testToCPUWithUint32() { // Verify the output matches the input. for (size_t i = 0; i < N; ++i) { - //LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); - //LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); + // LOG(kDefLog, kInfo, "inputData[%zu] = %u", i, inputData[i]); + // LOG(kDefLog, kInfo, "outputData[%zu] = %u", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithUint32 passed."); @@ -462,8 +581,8 @@ void testToCPUWithTensor() { // Verify the output matches the input. for (size_t i = 0; i < N; ++i) { - //LOG(kDefLog, kInfo, "inputData[%zu] = %f", i, inputData[i]); - //LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); + // LOG(kDefLog, kInfo, "inputData[%zu] = %f", i, inputData[i]); + // LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); assert(outputData[i] == inputData[i]); } LOG(kDefLog, kInfo, "testToCPUWithTensor passed."); @@ -500,7 +619,7 @@ void testToCPUWithBuffer() { // Verify that the CPU output matches the original data. for (size_t i = 0; i < N; ++i) { - //LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); + // LOG(kDefLog, kInfo, "outputData[%zu] = %f", i, outputData[i]); assert(outputData[i] == data[i]); } LOG(kDefLog, kInfo, "testToCPUWithBuffer passed."); @@ -542,8 +661,8 @@ void testToCPUWithTensorSourceOffset() { for (size_t i = 0; i < copyCount; ++i) { float expected = inputData[sourceOffsetElements + i]; float actual = cpuOutput[i]; - //LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); - //LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); + // LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); + // LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); assert(expected == actual); } LOG(kDefLog, kInfo, "testToCPUWithTensorSourceOffset passed."); @@ -585,8 +704,8 @@ void testToCPUWithBufferSourceOffset() { for (size_t i = 0; i < copyCount; ++i) { float expected = inputData[sourceOffsetElements + i]; float actual = cpuOutput[i]; - //LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); - //LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); + // LOG(kDefLog, kInfo, "cpuOutput[%zu] = %f", i, actual); + // LOG(kDefLog, kInfo, "expected[%zu] = %f", i, expected); assert(expected == actual); } LOG(kDefLog, kInfo, "testToCPUWithBufferSourceOffset passed."); From a61dfc304b31c1f481053561d6f045922ce73a83 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Fri, 11 Apr 2025 00:43:55 -0500 Subject: [PATCH 36/54] adds override for buffer --- gpu.hpp | 258 +++++++++++++++++++++++++++++++++------------- test/test_gpu.cpp | 44 ++++++++ 2 files changed, 231 insertions(+), 71 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index da507f0..d0d459a 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -1780,7 +1780,8 @@ inline void toCPU(Context &ctx, Tensor &tensor, std::array &data, wait(ctx, future); } -inline void toCPU(Context &ctx, Tensor &tensor, NumType dtype, void *output, size_t sourceOffset = 0) { +inline void toCPU(Context &ctx, Tensor &tensor, NumType dtype, void *output, + size_t sourceOffset = 0) { size_t numElements = size(tensor.shape); switch (dtype) { // These types are directly supported. @@ -1788,106 +1789,221 @@ inline void toCPU(Context &ctx, Tensor &tensor, NumType dtype, void *output, siz case kf32: case ku32: case ki32: - toCPU(ctx, tensor, output, tensor.data.size, sourceOffset); - break; + toCPU(ctx, tensor, output, tensor.data.size, sourceOffset); + break; // For double, the tensor was created by packing doubles into floats. case kf64: { - std::vector tmp(numElements); - toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(float), sourceOffset); - double *dst = static_cast(output); - for (size_t i = 0; i < numElements; ++i) { - dst[i] = static_cast(tmp[i]); - } - break; + std::vector tmp(numElements); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(float), sourceOffset); + double *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + dst[i] = static_cast(tmp[i]); + } + break; } // For int8_t: four 8‑bit ints packed into one int32_t. case ki8: { - size_t packedCount = (numElements + 3) / 4; - std::vector tmp(packedCount); - toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset); - int8_t *dst = static_cast(output); - for (size_t i = 0; i < numElements; ++i) { - size_t idx = i / 4; - size_t shift = (i % 4) * 8; - dst[i] = static_cast((tmp[idx] >> shift) & 0xFF); - } - break; + size_t packedCount = (numElements + 3) / 4; + std::vector tmp(packedCount); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset); + int8_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFF); + } + break; } // For int16_t: two 16‑bit ints packed into one int32_t. case ki16: { - size_t packedCount = (numElements + 1) / 2; - std::vector tmp(packedCount); - toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset); - int16_t *dst = static_cast(output); - for (size_t i = 0; i < numElements; ++i) { - size_t idx = i / 2; - size_t shift = (i % 2) * 16; - dst[i] = static_cast((tmp[idx] >> shift) & 0xFFFF); - } - break; + size_t packedCount = (numElements + 1) / 2; + std::vector tmp(packedCount); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset); + int16_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFFFF); + } + break; } // For int64_t: each 64‑bit int was packed into two int32_t. case ki64: { - std::vector tmp(numElements * 2); - toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset); - int64_t *dst = static_cast(output); - for (size_t i = 0; i < numElements; ++i) { - int32_t low = tmp[2 * i]; - int32_t high = tmp[2 * i + 1]; - dst[i] = (static_cast(high) << 32) | - (static_cast(low)); - } - break; + std::vector tmp(numElements * 2); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset); + int64_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + int32_t low = tmp[2 * i]; + int32_t high = tmp[2 * i + 1]; + dst[i] = + (static_cast(high) << 32) | (static_cast(low)); + } + break; } // For uint8_t: four 8‑bit uints packed into one uint32_t. case ku8: { - size_t packedCount = (numElements + 3) / 4; - std::vector tmp(packedCount); - toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); - uint8_t *dst = static_cast(output); - for (size_t i = 0; i < numElements; ++i) { - size_t idx = i / 4; - size_t shift = (i % 4) * 8; - dst[i] = static_cast((tmp[idx] >> shift) & 0xFF); - } - break; + size_t packedCount = (numElements + 3) / 4; + std::vector tmp(packedCount); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); + uint8_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFF); + } + break; } // For uint16_t: two 16‑bit uints packed into one uint32_t. case ku16: { - size_t packedCount = (numElements + 1) / 2; - std::vector tmp(packedCount); - toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); - uint16_t *dst = static_cast(output); - for (size_t i = 0; i < numElements; ++i) { - size_t idx = i / 2; - size_t shift = (i % 2) * 16; - dst[i] = static_cast((tmp[idx] >> shift) & 0xFFFF); - } - break; + size_t packedCount = (numElements + 1) / 2; + std::vector tmp(packedCount); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); + uint16_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFFFF); + } + break; } // For uint64_t: each 64‑bit unsigned int was packed into two uint32_t. case ku64: { - std::vector tmp(numElements * 2); - toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); - uint64_t *dst = static_cast(output); - for (size_t i = 0; i < numElements; ++i) { - uint32_t low = tmp[2 * i]; - uint32_t high = tmp[2 * i + 1]; - dst[i] = (static_cast(high) << 32) | low; - } - break; + std::vector tmp(numElements * 2); + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); + uint64_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + uint32_t low = tmp[2 * i]; + uint32_t high = tmp[2 * i + 1]; + dst[i] = (static_cast(high) << 32) | low; + } + break; + } + + default: + LOG(kDefLog, kError, "Unsupported dtype in toCPUUnpack"); + break; + } +} + +inline void toCPU(Context &ctx, WGPUBuffer buffer, NumType dtype, void *output, + size_t numElements, size_t sourceOffset = 0) { + switch (dtype) { + // Directly supported types. + case kf16: + case kf32: + case ku32: + case ki32: { + size_t byteSize = numElements * sizeBytes(dtype); + toCPU(ctx, buffer, output, byteSize, sourceOffset); + break; + } + + // For double, the buffer was written as floats. + case kf64: { + std::vector tmp(numElements); + toCPU(ctx, buffer, tmp.data(), numElements * sizeof(float), sourceOffset); + double *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + dst[i] = static_cast(tmp[i]); + } + break; + } + + // For int8_t: four 8‑bit ints packed into one int32_t. + case ki8: { + size_t packedCount = (numElements + 3) / 4; + std::vector tmp(packedCount); + toCPU(ctx, buffer, tmp.data(), packedCount * sizeof(int32_t), sourceOffset); + int8_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFF); + } + break; + } + + // For int16_t: two 16‑bit ints packed into one int32_t. + case ki16: { + size_t packedCount = (numElements + 1) / 2; + std::vector tmp(packedCount); + toCPU(ctx, buffer, tmp.data(), packedCount * sizeof(int32_t), sourceOffset); + int16_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFFFF); + } + break; + } + + // For int64_t: each 64‑bit int is packed into two int32_t. + case ki64: { + std::vector tmp(numElements * 2); + toCPU(ctx, buffer, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset); + int64_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + int32_t low = tmp[2 * i]; + int32_t high = tmp[2 * i + 1]; + dst[i] = + (static_cast(high) << 32) | (static_cast(low)); + } + break; + } + + // For uint8_t: four 8‑bit uints packed into one uint32_t. + case ku8: { + size_t packedCount = (numElements + 3) / 4; + std::vector tmp(packedCount); + toCPU(ctx, buffer, tmp.data(), packedCount * sizeof(uint32_t), + sourceOffset); + uint8_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFF); + } + break; + } + + // For uint16_t: two 16‑bit uints packed into one uint32_t. + case ku16: { + size_t packedCount = (numElements + 1) / 2; + std::vector tmp(packedCount); + toCPU(ctx, buffer, tmp.data(), packedCount * sizeof(uint32_t), + sourceOffset); + uint16_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + dst[i] = static_cast((tmp[idx] >> shift) & 0xFFFF); + } + break; + } + + // For uint64_t: each 64‑bit unsigned int packed into two uint32_t. + case ku64: { + std::vector tmp(numElements * 2); + toCPU(ctx, buffer, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); + uint64_t *dst = static_cast(output); + for (size_t i = 0; i < numElements; ++i) { + uint32_t low = tmp[2 * i]; + uint32_t high = tmp[2 * i + 1]; + dst[i] = (static_cast(high) << 32) | low; + } + break; } default: - LOG(kDefLog, kError, "Unsupported dtype in toCPUUnpack"); - break; + LOG(kDefLog, kError, "Unsupported dtype in toCPU (raw buffer override)"); + break; } } diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index a285ceb..32618a7 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -30,9 +30,11 @@ void testToCPUWithUint32(); void testToCPUWithUint64(); void testNumTypeSizes(); void testToCPUUnpack(); +void testCopyShaderPackedUnpack_int8(); int main() { LOG(kDefLog, kInfo, "Running GPU integration tests..."); + testCopyShaderPackedUnpack_int8(); testToCPUUnpack(); testToCPUWithTensor(); testToCPUWithBuffer(); @@ -70,6 +72,48 @@ fn main(@builtin(global_invocation_id) gid: vec3) { } )"; +void testCopyShaderPackedUnpack_int8() { + LOG(kDefLog, kInfo, "Running testCopyShaderPackedUnpack_int8..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; + std::vector inputData(N), outputData(N); + for (size_t i = 0; i < N; ++i) { + // Values between -128 and 127. + inputData[i] = static_cast((i % 256) - 128); + } + + // Create an input tensor using the int8_t overload. + // Under the hood the data is packed into int32_t. + Tensor inputTensor = createTensor(ctx, Shape{N}, ki8, inputData.data()); + + // Create an output tensor of the same shape and unsupported type. + Tensor outputTensor = createTensor(ctx, Shape{N}, ki8); + + // Our copy shader (kCopyKernel) expects to work with supported types. + // Since int8_t is packed into int32_t, we pass 'ki32' as our shader + // precision. + Kernel copyKernel = + createKernel(ctx, {kCopyKernel, 256, ki32}, + Bindings{inputTensor, outputTensor}, {cdiv(N, 256), 1, 1}); + dispatchKernel(ctx, copyKernel); + + // Now retrieve the output from the GPU and unpack from the packed int32_t + // back to int8_t. + toCPU(ctx, outputTensor, ki8, outputData.data(), 0); + + // Verify the unpacked data matches the original input. + for (size_t i = 0; i < N; ++i) { + assert(inputData[i] == outputData[i]); + } + LOG(kDefLog, kInfo, "testCopyShaderPackedUnpack_int8 passed."); +} + void testToCPUUnpack() { LOG(kDefLog, kInfo, "Running testToCPUUnpack..."); From 9745c7724c7045c5160d80842cf51609f343959a Mon Sep 17 00:00:00 2001 From: MichealReed Date: Fri, 11 Apr 2025 01:08:38 -0500 Subject: [PATCH 37/54] typed toGPU for packing --- gpu.hpp | 254 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 181 insertions(+), 73 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index d0d459a..79de1f8 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -2027,123 +2027,231 @@ inline void toGPU(Context &ctx, const void *data, WGPUBuffer buffer, wgpuQueueWriteBuffer(ctx.queue, buffer, 0, data, size); } -/** - * @brief Overload of the toGPU function to copy data from CPU memory to a GPU - * taking a Tensor instance instead of a WGPUBuffer instance. - * @param[in] ctx Context instance to manage the operation - * @param[in] data Pointer to the CPU memory to copy from - * @param[in] tensor Tensor instance representing the GPU buffer to copy to - * - * @code - * toGPU(ctx, data, tensor); - * @endcode - */ -inline void toGPU(Context &ctx, const float *data, Tensor &tensor) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); -} - -inline void toGPU(Context &ctx, const half *data, Tensor &tensor) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); -} - -inline void toGPU(Context &ctx, const double *data, Tensor &tensor) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); -} - -inline void toGPU(Context &ctx, const float *data, Tensor &tensor, +// Overload for float: directly copy the float data. +inline void toGPU(Context &ctx, const float *data, WGPUBuffer buffer, size_t size) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); -} - -inline void toGPU(Context &ctx, const half *data, Tensor &tensor, size_t size) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); + toGPU(ctx, static_cast(data), buffer, size); } -inline void toGPU(Context &ctx, const double *data, Tensor &tensor, +// Overload for half: directly copy the half data. +inline void toGPU(Context &ctx, const half *data, WGPUBuffer buffer, size_t size) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); -} - -inline void toGPU(Context &ctx, const uint8_t *data, Tensor &tensor) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); + toGPU(ctx, static_cast(data), buffer, size); } -inline void toGPU(Context &ctx, const uint16_t *data, Tensor &tensor) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); +// Overload for double: pack each double into a float (losing precision). +inline void toGPU(Context &ctx, const double *data, WGPUBuffer buffer, + size_t size) { + // Number of doubles = size / sizeof(double) + size_t numElements = size / sizeof(double); + std::vector packed(numElements); + for (size_t i = 0; i < numElements; ++i) { + packed[i] = static_cast(data[i]); + } + toGPU(ctx, packed.data(), buffer, packed.size() * sizeof(float)); } -inline void toGPU(Context &ctx, const uint32_t *data, Tensor &tensor) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); +// Overload for int8_t: pack four 8‑bit ints into one 32‑bit integer. +inline void toGPU(Context &ctx, const int8_t *data, WGPUBuffer buffer, + size_t size) { + // Number of int8_t elements equals size (sizeof(int8_t)==1) + size_t numElements = size; + size_t packedCount = (numElements + 3) / 4; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + packed[idx] |= (static_cast(data[i]) << shift); + } + toGPU(ctx, packed.data(), buffer, packedCount * sizeof(int32_t)); } -inline void toGPU(Context &ctx, const uint64_t *data, Tensor &tensor) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, - tensor.data.size); +// Overload for int16_t: pack two 16‑bit ints into one 32‑bit integer. +inline void toGPU(Context &ctx, const int16_t *data, WGPUBuffer buffer, + size_t size) { + size_t numElements = size / sizeof(int16_t); + size_t packedCount = (numElements + 1) / 2; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + packed[idx] |= (static_cast(data[i]) << shift); + } + toGPU(ctx, packed.data(), buffer, packedCount * sizeof(int32_t)); } -inline void toGPU(Context &ctx, const uint8_t *data, Tensor &tensor, +// Overload for int64_t: pack each 64‑bit int into two 32‑bit integers. +inline void toGPU(Context &ctx, const int64_t *data, WGPUBuffer buffer, size_t size) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); + size_t numElements = size / sizeof(int64_t); + std::vector packed(numElements * 2); + for (size_t i = 0; i < numElements; ++i) { + int64_t val = data[i]; + packed[2 * i] = static_cast(val & 0xFFFFFFFF); + packed[2 * i + 1] = static_cast((val >> 32) & 0xFFFFFFFF); + } + toGPU(ctx, packed.data(), buffer, packed.size() * sizeof(int32_t)); } -inline void toGPU(Context &ctx, const uint16_t *data, Tensor &tensor, +// Overload for uint8_t: pack four 8‑bit uints into one 32‑bit unsigned integer. +inline void toGPU(Context &ctx, const uint8_t *data, WGPUBuffer buffer, size_t size) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); + size_t numElements = size; // sizeof(uint8_t)==1 + size_t packedCount = (numElements + 3) / 4; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + packed[idx] |= (static_cast(data[i]) << shift); + } + toGPU(ctx, packed.data(), buffer, packedCount * sizeof(uint32_t)); } -inline void toGPU(Context &ctx, const uint32_t *data, Tensor &tensor, +// Overload for uint16_t: pack two 16‑bit uints into one 32‑bit unsigned +// integer. +inline void toGPU(Context &ctx, const uint16_t *data, WGPUBuffer buffer, size_t size) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); + size_t numElements = size / sizeof(uint16_t); + size_t packedCount = (numElements + 1) / 2; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + packed[idx] |= (static_cast(data[i]) << shift); + } + toGPU(ctx, packed.data(), buffer, packedCount * sizeof(uint32_t)); } -inline void toGPU(Context &ctx, const uint64_t *data, Tensor &tensor, +// Overload for uint64_t: pack each 64‑bit uint into two 32‑bit unsigned +// integers. +inline void toGPU(Context &ctx, const uint64_t *data, WGPUBuffer buffer, size_t size) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); + size_t numElements = size / sizeof(uint64_t); + std::vector packed(numElements * 2); + for (size_t i = 0; i < numElements; ++i) { + uint64_t val = data[i]; + packed[2 * i] = static_cast(val & 0xFFFFFFFF); + packed[2 * i + 1] = static_cast(val >> 32); + } + toGPU(ctx, packed.data(), buffer, packed.size() * sizeof(uint32_t)); } -inline void toGPU(Context &ctx, const int8_t *data, Tensor &tensor) { +/** + * @brief Overload of the toGPU function to copy data from CPU memory to a GPU + * taking a Tensor instance instead of a WGPUBuffer instance. + * @param[in] ctx Context instance to manage the operation + * @param[in] data Pointer to the CPU memory to copy from + * @param[in] tensor Tensor instance representing the GPU buffer to copy to + * + * @code + * toGPU(ctx, data, tensor); + * @endcode + */ +inline void toGPU(Context &ctx, const float *data, Tensor &tensor) { wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, tensor.data.size); } -inline void toGPU(Context &ctx, const int16_t *data, Tensor &tensor) { +inline void toGPU(Context &ctx, const half *data, Tensor &tensor) { wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, tensor.data.size); } -inline void toGPU(Context &ctx, const int *data, Tensor &tensor) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, +// Overload for double: pack each double into a float (losing precision) +inline void toGPU(Context &ctx, const double *data, Tensor &tensor) { + size_t numElements = size(tensor.shape); + std::vector packed(numElements); + for (size_t i = 0; i < numElements; ++i) { + packed[i] = static_cast(data[i]); + } + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), tensor.data.size); } -inline void toGPU(Context &ctx, const int64_t *data, Tensor &tensor) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, +// Overload for int8_t: pack four 8‑bit integers into one 32‑bit integer +inline void toGPU(Context &ctx, const int8_t *data, Tensor &tensor) { + size_t numElements = size(tensor.shape); + size_t packedCount = (numElements + 3) / 4; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + // Pack as unsigned then reinterpret (shader will unpack) + packed[idx] |= (static_cast(data[i]) << shift); + } + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), tensor.data.size); } -inline void toGPU(Context &ctx, const int8_t *data, Tensor &tensor, - size_t size) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +// Overload for int16_t: pack two 16‑bit integers into one 32‑bit integer +inline void toGPU(Context &ctx, const int16_t *data, Tensor &tensor) { + size_t numElements = size(tensor.shape); + size_t packedCount = (numElements + 1) / 2; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + packed[idx] |= (static_cast(data[i]) << shift); + } + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + tensor.data.size); } -inline void toGPU(Context &ctx, const int16_t *data, Tensor &tensor, - size_t size) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +// Overload for int64_t: pack each 64‑bit integer into two 32‑bit integers +inline void toGPU(Context &ctx, const int64_t *data, Tensor &tensor) { + size_t numElements = size(tensor.shape); + std::vector packed(numElements * 2); + for (size_t i = 0; i < numElements; ++i) { + int64_t val = data[i]; + packed[2 * i] = static_cast(val & 0xFFFFFFFF); + packed[2 * i + 1] = static_cast((val >> 32) & 0xFFFFFFFF); + } + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + tensor.data.size); } -inline void toGPU(Context &ctx, const int *data, Tensor &tensor, size_t size) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +// Overload for uint8_t: pack four 8‑bit unsigned integers into one 32‑bit +// unsigned +inline void toGPU(Context &ctx, const uint8_t *data, Tensor &tensor) { + size_t numElements = size(tensor.shape); + size_t packedCount = (numElements + 3) / 4; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + packed[idx] |= (static_cast(data[i]) << shift); + } + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + tensor.data.size); } -inline void toGPU(Context &ctx, const int64_t *data, Tensor &tensor, - size_t size) { - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, data, size); +// Overload for uint16_t: pack two 16‑bit unsigned integers into one 32‑bit +// unsigned +inline void toGPU(Context &ctx, const uint16_t *data, Tensor &tensor) { + size_t numElements = size(tensor.shape); + size_t packedCount = (numElements + 1) / 2; + std::vector packed(packedCount, 0); + for (size_t i = 0; i < numElements; ++i) { + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + packed[idx] |= (static_cast(data[i]) << shift); + } + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + tensor.data.size); +} + +// Overload for uint64_t: pack each 64‑bit unsigned integer into two 32‑bit +// unsigned +inline void toGPU(Context &ctx, const uint64_t *data, Tensor &tensor) { + size_t numElements = size(tensor.shape); + std::vector packed(numElements * 2); + for (size_t i = 0; i < numElements; ++i) { + uint64_t val = data[i]; + packed[2 * i] = static_cast(val & 0xFFFFFFFF); + packed[2 * i + 1] = static_cast(val >> 32); + } + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + tensor.data.size); } template From ea8b2fd1d52f62d96bf644f3a398fa0c6b5709f9 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Fri, 11 Apr 2025 11:58:55 -0500 Subject: [PATCH 38/54] pack f64 as uint32 --- gpu.hpp | 87 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 24 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index 79de1f8..bd56e43 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -760,13 +760,27 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, // Overload for double: pack each double into a float (losing precision) inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, const double *data) { - assert(dtype == kf64); // unsupported: convert to kf32 + assert(dtype == kf64); size_t numElements = size(shape); - std::vector packed(numElements); + // Each double (8 bytes) will be packed into 2 uint32_t values (2×4 bytes). + std::vector packed(numElements * 2); for (size_t i = 0; i < numElements; ++i) { - packed[i] = static_cast(data[i]); + uint64_t bits; + std::memcpy(&bits, &data[i], sizeof(double)); // Extract raw bits. + packed[2 * i] = static_cast(bits & 0xFFFFFFFF); + packed[2 * i + 1] = static_cast(bits >> 32); } - return createTensor(ctx, shape, kf32, packed.data()); + // Create a tensor using the core overload that accepts a TensorPool and + // WGPUDevice. + Tensor tensor = + createTensor(ctx.pool, ctx.device, shape, kf64, + WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst | + WGPUBufferUsage_CopySrc); + + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + packed.size() * sizeof(uint32_t)); + + return tensor; } inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, @@ -1792,13 +1806,22 @@ inline void toCPU(Context &ctx, Tensor &tensor, NumType dtype, void *output, toCPU(ctx, tensor, output, tensor.data.size, sourceOffset); break; - // For double, the tensor was created by packing doubles into floats. + // kf64 to reverse bit‐packing of doubles. case kf64: { - std::vector tmp(numElements); - toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(float), sourceOffset); + // We expect each double to have been packed into 2 uint32_t values. + std::vector tmp(numElements * 2); + // Read the packed data (each element is 4 bytes) + toCPU(ctx, tensor, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); double *dst = static_cast(output); for (size_t i = 0; i < numElements; ++i) { - dst[i] = static_cast(tmp[i]); + uint32_t low = tmp[2 * i]; + uint32_t high = tmp[2 * i + 1]; + // Reassemble the 64-bit raw representation. + uint64_t bits = (static_cast(high) << 32) | low; + // Copy the raw bits into a double. + double d; + std::memcpy(&d, &bits, sizeof(double)); + dst[i] = d; } break; } @@ -1905,13 +1928,22 @@ inline void toCPU(Context &ctx, WGPUBuffer buffer, NumType dtype, void *output, break; } - // For double, the buffer was written as floats. + // kf64 to reverse bit‐packing of doubles. case kf64: { - std::vector tmp(numElements); - toCPU(ctx, buffer, tmp.data(), numElements * sizeof(float), sourceOffset); + // We expect each double to have been packed into 2 uint32_t values. + std::vector tmp(numElements * 2); + // Read the packed data (each element is 4 bytes) + toCPU(ctx, buffer, tmp.data(), tmp.size() * sizeof(uint32_t), sourceOffset); double *dst = static_cast(output); for (size_t i = 0; i < numElements; ++i) { - dst[i] = static_cast(tmp[i]); + uint32_t low = tmp[2 * i]; + uint32_t high = tmp[2 * i + 1]; + // Reassemble the 64-bit raw representation. + uint64_t bits = (static_cast(high) << 32) | low; + // Copy the raw bits into a double. + double d; + std::memcpy(&d, &bits, sizeof(double)); + dst[i] = d; } break; } @@ -2039,16 +2071,19 @@ inline void toGPU(Context &ctx, const half *data, WGPUBuffer buffer, toGPU(ctx, static_cast(data), buffer, size); } -// Overload for double: pack each double into a float (losing precision). +// Overload for double: bit-pack each double into two 32‑bit unsigned integers. inline void toGPU(Context &ctx, const double *data, WGPUBuffer buffer, size_t size) { - // Number of doubles = size / sizeof(double) size_t numElements = size / sizeof(double); - std::vector packed(numElements); + std::vector packed(numElements * 2); for (size_t i = 0; i < numElements; ++i) { - packed[i] = static_cast(data[i]); + uint64_t bits; + std::memcpy(&bits, &data[i], + sizeof(double)); // Reinterpret double as raw bits. + packed[2 * i] = static_cast(bits & 0xFFFFFFFF); + packed[2 * i + 1] = static_cast(bits >> 32); } - toGPU(ctx, packed.data(), buffer, packed.size() * sizeof(float)); + toGPU(ctx, packed.data(), buffer, packed.size() * sizeof(uint32_t)); } // Overload for int8_t: pack four 8‑bit ints into one 32‑bit integer. @@ -2157,15 +2192,19 @@ inline void toGPU(Context &ctx, const half *data, Tensor &tensor) { tensor.data.size); } -// Overload for double: pack each double into a float (losing precision) +// Overload for double: bit-pack each double into two 32‑bit unsigned integers. inline void toGPU(Context &ctx, const double *data, Tensor &tensor) { - size_t numElements = size(tensor.shape); - std::vector packed(numElements); + size_t numElements = tensor.data.size / sizeof(double); + std::vector packed(numElements * 2); for (size_t i = 0; i < numElements; ++i) { - packed[i] = static_cast(data[i]); - } - wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), - tensor.data.size); + uint64_t bits; + std::memcpy(&bits, &data[i], + sizeof(double)); // Reinterpret double as raw bits. + packed[2 * i] = static_cast(bits & 0xFFFFFFFF); + packed[2 * i + 1] = static_cast(bits >> 32); + } + toGPU(ctx, packed.data(), tensor.data.buffer, + packed.size() * sizeof(uint32_t)); } // Overload for int8_t: pack four 8‑bit integers into one 32‑bit integer From f988a0b3c046507d6baeeee22b92d268ca72394b Mon Sep 17 00:00:00 2001 From: MichealReed Date: Mon, 14 Apr 2025 20:29:41 -0500 Subject: [PATCH 39/54] conversion kernels for unpacking --- test/test_gpu.cpp | 243 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 212 insertions(+), 31 deletions(-) diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 32618a7..bab5a9b 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -11,6 +11,115 @@ using namespace gpu; using namespace std::chrono; +// WGSL Kernels + +// Kernel to unpack 4x int8 (packed in i32) to 4x int32 +const char *kPackedInt8ToInt32Kernel = R"( + @group(0) @binding(0) var packed_input: array; + @group(0) @binding(1) var unpacked_output: array; + + // Function to sign-extend an 8-bit value (represented in the lower bits of an i32) + fn sign_extend_i8(val: i32) -> i32 { + return (val << 24) >> 24; + } + + @compute @workgroup_size({{workgroupSize}}) + fn main(@builtin(global_invocation_id) gid: vec3) { + let packed_idx: u32 = gid.x; + + // Check bounds for the PACKED input array + if (packed_idx >= arrayLength(&packed_input)) { + return; + } + + let packed_val = packed_input[packed_idx]; + + // Unpack and write 4 separate i32 values + // Ensure the output buffer is large enough (4x the packed size) + let base_output_idx = packed_idx * 4u; + + // Check bounds for the UNPACKED output array (optional but safer) + // This assumes arrayLength(&unpacked_output) is at least 4 * arrayLength(&packed_input) + if ((base_output_idx + 3u) >= arrayLength(&unpacked_output)) { + return; // Avoid out-of-bounds write if something is wrong + } + + unpacked_output[base_output_idx + 0u] = sign_extend_i8((packed_val >> 0u) & 0xFF); + unpacked_output[base_output_idx + 1u] = sign_extend_i8((packed_val >> 8u) & 0xFF); + unpacked_output[base_output_idx + 2u] = sign_extend_i8((packed_val >> 16u) & 0xFF); + unpacked_output[base_output_idx + 3u] = sign_extend_i8((packed_val >> 24u) & 0xFF); + } + )"; + +// Kernel to pack 4x int32 back into 1x int32 (taking lower 8 bits) +const char *kInt32ToPackedInt8Kernel = R"( + @group(0) @binding(0) var unpacked_input: array; + @group(0) @binding(1) var packed_output: array; + + @compute @workgroup_size({{workgroupSize}}) + fn main(@builtin(global_invocation_id) gid: vec3) { + let packed_idx: u32 = gid.x; // Index for the PACKED output array + + // Check bounds for the PACKED output array + if (packed_idx >= arrayLength(&packed_output)) { + return; + } + + let base_input_idx = packed_idx * 4u; + + // Check bounds for the UNPACKED input array (optional but safer) + // Assumes arrayLength(&unpacked_input) is at least 4 * arrayLength(&packed_output) + if ((base_input_idx + 3u) >= arrayLength(&unpacked_input)) { + // Handle potential error or incomplete data - maybe write 0? + packed_output[packed_idx] = 0; + return; + } + + // Read 4 separate i32 values + let val0 = unpacked_input[base_input_idx + 0u]; + let val1 = unpacked_input[base_input_idx + 1u]; + let val2 = unpacked_input[base_input_idx + 2u]; + let val3 = unpacked_input[base_input_idx + 3u]; + + // Pack the lower 8 bits of each into one i32 + var packed_result: i32 = 0; + packed_result = packed_result | ((val0 & 0xFF) << 0u); + packed_result = packed_result | ((val1 & 0xFF) << 8u); + packed_result = packed_result | ((val2 & 0xFF) << 16u); + packed_result = packed_result | ((val3 & 0xFF) << 24u); + + packed_output[packed_idx] = packed_result; + } + )"; + +// Simple addition kernel for i32 +const char *kSimpleAddKernelI32 = R"( + @group(0) @binding(0) var a: array<{{precision}}>; + @group(0) @binding(1) var b: array<{{precision}}>; + @group(0) @binding(2) var c: array<{{precision}}>; + + @compute @workgroup_size({{workgroupSize}}) + fn main(@builtin(global_invocation_id) gid: vec3) { + let i: u32 = gid.x; + if (i < arrayLength(&a)) { + c[i] = a[i] + b[i]; + } + } + )"; + +// A simple WGSL copy kernel that copies input to output. +static const char *kCopyKernel = R"( + @group(0) @binding(0) var inp: array<{{precision}}>; + @group(0) @binding(1) var out: array<{{precision}}>; + @compute @workgroup_size({{workgroupSize}}) + fn main(@builtin(global_invocation_id) gid: vec3) { + let i: u32 = gid.x; + if (i < arrayLength(&inp)) { + out[i] = inp[i]; + } + } + )"; + // Forward declarations: void testToCPUWithTensor(); void testToCPUWithBuffer(); @@ -31,46 +140,118 @@ void testToCPUWithUint64(); void testNumTypeSizes(); void testToCPUUnpack(); void testCopyShaderPackedUnpack_int8(); +void testAddKernelInt8(); int main() { LOG(kDefLog, kInfo, "Running GPU integration tests..."); - testCopyShaderPackedUnpack_int8(); - testToCPUUnpack(); - testToCPUWithTensor(); - testToCPUWithBuffer(); - testToCPUWithTensorSourceOffset(); - testToCPUWithBufferSourceOffset(); - testToCPUWithHalf(); - testToCPUWithFloat(); - testToCPUWithDouble(); - testToCPUWithint8(); - testToCPUWithint16(); - testToCPUWithint(); - testToCPUWithint64(); - testToCPUWithUint8(); - testToCPUWithUint16(); - testToCPUWithUint32(); - testToCPUWithUint64(); - testNumTypeSizes(); - stressTestToCPU(); - testHalf(); + testAddKernelInt8(); + // testCopyShaderPackedUnpack_int8(); + // testToCPUUnpack(); + // testToCPUWithTensor(); + // testToCPUWithBuffer(); + // testToCPUWithTensorSourceOffset(); + // testToCPUWithBufferSourceOffset(); + // testToCPUWithHalf(); + // testToCPUWithFloat(); + // testToCPUWithDouble(); + // testToCPUWithint8(); + // testToCPUWithint16(); + // testToCPUWithint(); + // testToCPUWithint64(); + // testToCPUWithUint8(); + // testToCPUWithUint16(); + // testToCPUWithUint32(); + // testToCPUWithUint64(); + // testNumTypeSizes(); + // stressTestToCPU(); + // testHalf(); LOG(kDefLog, kInfo, "All tests passed."); return 0; } -// A simple WGSL copy kernel that copies input to output. -static const char *kCopyKernel = R"( -@group(0) @binding(0) var inp: array<{{precision}}>; -@group(0) @binding(1) var out: array<{{precision}}>; -@group(0) @binding(1) var dummy: array<{{precision}}>; -@compute @workgroup_size({{workgroupSize}}) -fn main(@builtin(global_invocation_id) gid: vec3) { - let i: u32 = gid.x; - if (i < arrayLength(&inp)) { - out[i] = inp[i]; +void testAddKernelInt8() { + LOG(kDefLog, kInfo, "Running testAddKernelInt8 (with conversion kernels)..."); + +#ifdef USE_DAWN_API + Context ctx = createContextByGpuIdx(0); +#else + Context ctx = createContext(); +#endif + + constexpr size_t N = 1024; // Logical number of int8 elements + std::vector aInput(N), bInput(N), result(N); + std::vector expected(N); + + // CPU Data Setup + for (size_t i = 0; i < N; ++i) { + // Values in range [-10, 9] + aInput[i] = static_cast((i % 20) - 10); + bInput[i] = static_cast(((2 * i) % 20) - 10); + // Compute expected as int then cast back. + int temp = static_cast(aInput[i]) + static_cast(bInput[i]); + expected[i] = static_cast(temp); + result[i] = 0; + } + + // These store the int8 data packed into i32 format on the GPU + Tensor aTensorPacked = createTensor(ctx, Shape{N}, ki8, aInput.data()); + Tensor bTensorPacked = createTensor(ctx, Shape{N}, ki8, bInput.data()); + // Final output tensor, also in packed format + Tensor outputTensorPacked = createTensor(ctx, Shape{N}, ki8); + + // These will hold the data converted to one i32 per original int8 element + Tensor aTensorUnpacked = createTensor(ctx, Shape{N}, ki32); + Tensor bTensorUnpacked = createTensor(ctx, Shape{N}, ki32); + Tensor outputTensorUnpacked = + createTensor(ctx, Shape{N}, ki32); // For the simple add result + + constexpr uint32_t workgroupSize = 256; + size_t packedCount = (N + 3) / 4; // Number of i32 elements in packed buffers + size_t unpackedCount = N; // Number of i32 elements in unpacked buffers + + // Convert Packed Inputs to Unpacked i32 + Kernel unpackKernelA = + createKernel(ctx, {kPackedInt8ToInt32Kernel, workgroupSize, ki32}, + Bindings{aTensorPacked, aTensorUnpacked}, + {cdiv(packedCount, workgroupSize), 1, + 1}); // Dispatch based on packed size + Kernel unpackKernelB = + createKernel(ctx, {kPackedInt8ToInt32Kernel, workgroupSize, ki32}, + Bindings{bTensorPacked, bTensorUnpacked}, + {cdiv(packedCount, workgroupSize), 1, + 1}); + // Dispatch based on packed size + dispatchKernel(ctx, unpackKernelA); + dispatchKernel(ctx, unpackKernelB); + + // Perform Simple Addition on Unpacked i32 + Kernel simpleAddKernel = createKernel( + ctx, {kSimpleAddKernelI32, workgroupSize, ki32}, + Bindings{aTensorUnpacked, bTensorUnpacked, outputTensorUnpacked}, + {cdiv(unpackedCount, workgroupSize), 1, + 1}); // Dispatch based on unpacked size + dispatchKernel(ctx, simpleAddKernel); + + // Convert Unpacked i32 Result back to Packed + Kernel packKernel = + createKernel(ctx, {kInt32ToPackedInt8Kernel, workgroupSize, ki32}, + Bindings{outputTensorUnpacked, outputTensorPacked}, + {cdiv(packedCount, workgroupSize), 1, + 1}); // Dispatch based on packed size + dispatchKernel(ctx, packKernel); + + // Copy Final Packed Result to CPU and Unpack + // Use the original toCPU for ki8, which handles the final CPU-side unpacking + toCPU(ctx, outputTensorPacked, ki8, result.data(), 0); + + for (size_t i = 0; i < N; ++i) { + LOG(kDefLog, kInfo, "result[%zu] = %d, expected[%zu] = %d", i, result[i], i, + expected[i]); + assert(result[i] == expected[i]); } + + LOG(kDefLog, kInfo, "testAddKernelInt8 (with conversion kernels) passed."); } -)"; void testCopyShaderPackedUnpack_int8() { LOG(kDefLog, kInfo, "Running testCopyShaderPackedUnpack_int8..."); From a70655fa6c8f8e1b56abb214506fb599c704d135 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Fri, 18 Apr 2025 12:38:12 -0500 Subject: [PATCH 40/54] adds kUnknown type, sizes half properly --- gpu.hpp | 7 ++++--- test/test_gpu.cpp | 5 +---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index bd56e43..2796a7d 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -204,6 +204,7 @@ enum NumType { ku16, ku32, ku64, + kUnknown }; /** @@ -212,15 +213,15 @@ enum NumType { inline size_t sizeBytes(const NumType &type) { switch (type) { case kf16: - return sizeof(uint16_t); + return sizeof(half); case kf32: return sizeof(float); case kf64: return sizeof(double); case ki8: - return sizeof(uint8_t); + return sizeof(int8_t); case ki16: - return sizeof(uint16_t); + return sizeof(int16_t); case ki32: return sizeof(int32_t); case ki64: diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index bab5a9b..78c8340 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -218,8 +218,7 @@ void testAddKernelInt8() { Kernel unpackKernelB = createKernel(ctx, {kPackedInt8ToInt32Kernel, workgroupSize, ki32}, Bindings{bTensorPacked, bTensorUnpacked}, - {cdiv(packedCount, workgroupSize), 1, - 1}); + {cdiv(packedCount, workgroupSize), 1, 1}); // Dispatch based on packed size dispatchKernel(ctx, unpackKernelA); dispatchKernel(ctx, unpackKernelB); @@ -245,8 +244,6 @@ void testAddKernelInt8() { toCPU(ctx, outputTensorPacked, ki8, result.data(), 0); for (size_t i = 0; i < N; ++i) { - LOG(kDefLog, kInfo, "result[%zu] = %d, expected[%zu] = %d", i, result[i], i, - expected[i]); assert(result[i] == expected[i]); } From b99d6bf67924a402f937d606c9d4da8881995b70 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Mon, 21 Apr 2025 22:13:54 -0500 Subject: [PATCH 41/54] 64bit fixes --- gpu.hpp | 24 +++++++++--------------- test/test_gpu.cpp | 40 ++++++++++++++++++++-------------------- 2 files changed, 29 insertions(+), 35 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index 2796a7d..4b2afee 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -1953,7 +1953,7 @@ inline void toCPU(Context &ctx, WGPUBuffer buffer, NumType dtype, void *output, case ki8: { size_t packedCount = (numElements + 3) / 4; std::vector tmp(packedCount); - toCPU(ctx, buffer, tmp.data(), packedCount * sizeof(int32_t), sourceOffset); + toCPU(ctx, buffer, tmp.data(), tmp.size() * sizeof(int32_t), sourceOffset); int8_t *dst = static_cast(output); for (size_t i = 0; i < numElements; ++i) { size_t idx = i / 4; @@ -2074,8 +2074,7 @@ inline void toGPU(Context &ctx, const half *data, WGPUBuffer buffer, // Overload for double: bit-pack each double into two 32‑bit unsigned integers. inline void toGPU(Context &ctx, const double *data, WGPUBuffer buffer, - size_t size) { - size_t numElements = size / sizeof(double); + size_t numElements) { std::vector packed(numElements * 2); for (size_t i = 0; i < numElements; ++i) { uint64_t bits; @@ -2089,23 +2088,22 @@ inline void toGPU(Context &ctx, const double *data, WGPUBuffer buffer, // Overload for int8_t: pack four 8‑bit ints into one 32‑bit integer. inline void toGPU(Context &ctx, const int8_t *data, WGPUBuffer buffer, - size_t size) { + size_t numElements) { // Number of int8_t elements equals size (sizeof(int8_t)==1) - size_t numElements = size; size_t packedCount = (numElements + 3) / 4; std::vector packed(packedCount, 0); for (size_t i = 0; i < numElements; ++i) { size_t idx = i / 4; size_t shift = (i % 4) * 8; packed[idx] |= (static_cast(data[i]) << shift); + //LOG(kDefLog, kInfo, "toGPU: %d %d %d", data[i], packed[idx], idx); } toGPU(ctx, packed.data(), buffer, packedCount * sizeof(int32_t)); } // Overload for int16_t: pack two 16‑bit ints into one 32‑bit integer. inline void toGPU(Context &ctx, const int16_t *data, WGPUBuffer buffer, - size_t size) { - size_t numElements = size / sizeof(int16_t); + size_t numElements) { size_t packedCount = (numElements + 1) / 2; std::vector packed(packedCount, 0); for (size_t i = 0; i < numElements; ++i) { @@ -2118,8 +2116,7 @@ inline void toGPU(Context &ctx, const int16_t *data, WGPUBuffer buffer, // Overload for int64_t: pack each 64‑bit int into two 32‑bit integers. inline void toGPU(Context &ctx, const int64_t *data, WGPUBuffer buffer, - size_t size) { - size_t numElements = size / sizeof(int64_t); + size_t numElements) { std::vector packed(numElements * 2); for (size_t i = 0; i < numElements; ++i) { int64_t val = data[i]; @@ -2131,8 +2128,7 @@ inline void toGPU(Context &ctx, const int64_t *data, WGPUBuffer buffer, // Overload for uint8_t: pack four 8‑bit uints into one 32‑bit unsigned integer. inline void toGPU(Context &ctx, const uint8_t *data, WGPUBuffer buffer, - size_t size) { - size_t numElements = size; // sizeof(uint8_t)==1 + size_t numElements) { size_t packedCount = (numElements + 3) / 4; std::vector packed(packedCount, 0); for (size_t i = 0; i < numElements; ++i) { @@ -2146,8 +2142,7 @@ inline void toGPU(Context &ctx, const uint8_t *data, WGPUBuffer buffer, // Overload for uint16_t: pack two 16‑bit uints into one 32‑bit unsigned // integer. inline void toGPU(Context &ctx, const uint16_t *data, WGPUBuffer buffer, - size_t size) { - size_t numElements = size / sizeof(uint16_t); + size_t numElements) { size_t packedCount = (numElements + 1) / 2; std::vector packed(packedCount, 0); for (size_t i = 0; i < numElements; ++i) { @@ -2161,8 +2156,7 @@ inline void toGPU(Context &ctx, const uint16_t *data, WGPUBuffer buffer, // Overload for uint64_t: pack each 64‑bit uint into two 32‑bit unsigned // integers. inline void toGPU(Context &ctx, const uint64_t *data, WGPUBuffer buffer, - size_t size) { - size_t numElements = size / sizeof(uint64_t); + size_t numElements) { std::vector packed(numElements * 2); for (size_t i = 0; i < numElements; ++i) { uint64_t val = data[i]; diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 78c8340..7f07dbf 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -145,26 +145,26 @@ void testAddKernelInt8(); int main() { LOG(kDefLog, kInfo, "Running GPU integration tests..."); testAddKernelInt8(); - // testCopyShaderPackedUnpack_int8(); - // testToCPUUnpack(); - // testToCPUWithTensor(); - // testToCPUWithBuffer(); - // testToCPUWithTensorSourceOffset(); - // testToCPUWithBufferSourceOffset(); - // testToCPUWithHalf(); - // testToCPUWithFloat(); - // testToCPUWithDouble(); - // testToCPUWithint8(); - // testToCPUWithint16(); - // testToCPUWithint(); - // testToCPUWithint64(); - // testToCPUWithUint8(); - // testToCPUWithUint16(); - // testToCPUWithUint32(); - // testToCPUWithUint64(); - // testNumTypeSizes(); - // stressTestToCPU(); - // testHalf(); + testCopyShaderPackedUnpack_int8(); + testToCPUUnpack(); + testToCPUWithTensor(); + testToCPUWithBuffer(); + testToCPUWithTensorSourceOffset(); + testToCPUWithBufferSourceOffset(); + testToCPUWithHalf(); + testToCPUWithFloat(); + testToCPUWithDouble(); + testToCPUWithint8(); + testToCPUWithint16(); + testToCPUWithint(); + testToCPUWithint64(); + testToCPUWithUint8(); + testToCPUWithUint16(); + testToCPUWithUint32(); + testToCPUWithUint64(); + testNumTypeSizes(); + stressTestToCPU(); + testHalf(); LOG(kDefLog, kInfo, "All tests passed."); return 0; } From a581f726e044c82500fd6b192b59c353d86a1100 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Mon, 2 Jun 2025 21:04:07 -0500 Subject: [PATCH 42/54] fixes memleak --- gpu.hpp | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index 4b2afee..7f4a5ce 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -1521,6 +1521,7 @@ inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, // Begin the asynchronous mapping of the readback buffer. wgpuBufferMapAsync(cbData->buffer, WGPUMapMode_Read, 0, cbData->bufferSize, mapCallbackInfo); + wgpuBufferRelease(cbData->buffer); } /** @@ -2074,7 +2075,9 @@ inline void toGPU(Context &ctx, const half *data, WGPUBuffer buffer, // Overload for double: bit-pack each double into two 32‑bit unsigned integers. inline void toGPU(Context &ctx, const double *data, WGPUBuffer buffer, - size_t numElements) { + size_t size) { + // Number of doubles = size / sizeof(double) + size_t numElements = size / sizeof(double); std::vector packed(numElements * 2); for (size_t i = 0; i < numElements; ++i) { uint64_t bits; @@ -2088,22 +2091,24 @@ inline void toGPU(Context &ctx, const double *data, WGPUBuffer buffer, // Overload for int8_t: pack four 8‑bit ints into one 32‑bit integer. inline void toGPU(Context &ctx, const int8_t *data, WGPUBuffer buffer, - size_t numElements) { + size_t size) { // Number of int8_t elements equals size (sizeof(int8_t)==1) + size_t numElements = size; size_t packedCount = (numElements + 3) / 4; std::vector packed(packedCount, 0); for (size_t i = 0; i < numElements; ++i) { - size_t idx = i / 4; - size_t shift = (i % 4) * 8; - packed[idx] |= (static_cast(data[i]) << shift); - //LOG(kDefLog, kInfo, "toGPU: %d %d %d", data[i], packed[idx], idx); + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + packed[idx] |= (static_cast(data[i]) << shift); + // LOG(kDefLog, kInfo, "toGPU: %d %d %d", data[i], packed[idx], idx); } toGPU(ctx, packed.data(), buffer, packedCount * sizeof(int32_t)); } // Overload for int16_t: pack two 16‑bit ints into one 32‑bit integer. inline void toGPU(Context &ctx, const int16_t *data, WGPUBuffer buffer, - size_t numElements) { + size_t size) { + size_t numElements = size / sizeof(int16_t); size_t packedCount = (numElements + 1) / 2; std::vector packed(packedCount, 0); for (size_t i = 0; i < numElements; ++i) { @@ -2116,7 +2121,8 @@ inline void toGPU(Context &ctx, const int16_t *data, WGPUBuffer buffer, // Overload for int64_t: pack each 64‑bit int into two 32‑bit integers. inline void toGPU(Context &ctx, const int64_t *data, WGPUBuffer buffer, - size_t numElements) { + size_t size) { + size_t numElements = size / sizeof(int64_t); std::vector packed(numElements * 2); for (size_t i = 0; i < numElements; ++i) { int64_t val = data[i]; @@ -2128,13 +2134,14 @@ inline void toGPU(Context &ctx, const int64_t *data, WGPUBuffer buffer, // Overload for uint8_t: pack four 8‑bit uints into one 32‑bit unsigned integer. inline void toGPU(Context &ctx, const uint8_t *data, WGPUBuffer buffer, - size_t numElements) { + size_t size) { + size_t numElements = size; // sizeof(uint8_t)==1 size_t packedCount = (numElements + 3) / 4; std::vector packed(packedCount, 0); for (size_t i = 0; i < numElements; ++i) { - size_t idx = i / 4; - size_t shift = (i % 4) * 8; - packed[idx] |= (static_cast(data[i]) << shift); + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + packed[idx] |= (static_cast(data[i]) << shift); } toGPU(ctx, packed.data(), buffer, packedCount * sizeof(uint32_t)); } @@ -2142,13 +2149,14 @@ inline void toGPU(Context &ctx, const uint8_t *data, WGPUBuffer buffer, // Overload for uint16_t: pack two 16‑bit uints into one 32‑bit unsigned // integer. inline void toGPU(Context &ctx, const uint16_t *data, WGPUBuffer buffer, - size_t numElements) { + size_t size) { + size_t numElements = size / sizeof(uint16_t); size_t packedCount = (numElements + 1) / 2; std::vector packed(packedCount, 0); for (size_t i = 0; i < numElements; ++i) { - size_t idx = i / 2; - size_t shift = (i % 2) * 16; - packed[idx] |= (static_cast(data[i]) << shift); + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + packed[idx] |= (static_cast(data[i]) << shift); } toGPU(ctx, packed.data(), buffer, packedCount * sizeof(uint32_t)); } @@ -2156,7 +2164,8 @@ inline void toGPU(Context &ctx, const uint16_t *data, WGPUBuffer buffer, // Overload for uint64_t: pack each 64‑bit uint into two 32‑bit unsigned // integers. inline void toGPU(Context &ctx, const uint64_t *data, WGPUBuffer buffer, - size_t numElements) { + size_t size) { + size_t numElements = size / sizeof(uint64_t); std::vector packed(numElements * 2); for (size_t i = 0; i < numElements; ++i) { uint64_t val = data[i]; From f5f64144592733509bcde9764c9f22063d0a2a99 Mon Sep 17 00:00:00 2001 From: MichealReed Date: Wed, 4 Jun 2025 03:04:17 -0500 Subject: [PATCH 43/54] fix types and emscripten race condition --- gpu.hpp | 107 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 83 insertions(+), 24 deletions(-) diff --git a/gpu.hpp b/gpu.hpp index 7f4a5ce..4a92789 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -625,28 +625,46 @@ struct Context { ~Context() { LOG(kDefLog, kTrace, "Destroying context"); + +#ifdef __EMSCRIPTEN__ + // For WebAssembly, do NOT call processEvents during destruction + // This prevents "Asyncify cannot be done during or after runtime exits" + LOG(kDefLog, kTrace, + "WebAssembly context destruction - skipping processEvents"); +#endif + if (queue) { wgpuQueueRelease(queue); + queue = nullptr; } else { - LOG(kDefLog, kTrace, "Queue is null"); + LOG(kDefLog, kTrace, "Queue already null"); } + if (device) { wgpuDeviceRelease(device); - processEvents(instance); + device = nullptr; } else { - LOG(kDefLog, kTrace, "Device is null"); + LOG(kDefLog, kTrace, "Device already null"); } + if (adapter) { wgpuAdapterRelease(adapter); - processEvents(instance); + adapter = nullptr; } else { - LOG(kDefLog, kTrace, "Adapter is null"); + LOG(kDefLog, kTrace, "Adapter already null"); } + if (instance) { +#ifndef __EMSCRIPTEN__ + // Only call processEvents on native platforms during cleanup + processEvents(instance); +#endif wgpuInstanceRelease(instance); + instance = nullptr; } else { - LOG(kDefLog, kTrace, "Instance is null"); + LOG(kDefLog, kTrace, "Instance already null"); } + LOG(kDefLog, kTrace, "Context destroyed"); } }; @@ -983,21 +1001,63 @@ inline void check(bool condition, const char *message, * devDescriptor); WGPUDevice device = wait(instance, deviceFuture); * @endcode */ +#ifdef __EMSCRIPTEN__ +// Global flag to prevent overlapping async operations in WebAssembly +static std::atomic asyncOperationInProgress{false}; +#endif + template T wait(Context &ctx, std::future &f) { #ifdef __EMSCRIPTEN__ - // Poll until the future is ready. - while (f.wait_for(std::chrono::milliseconds(0)) != - std::future_status::ready) { - // Yield control to the JS event loop. - emscripten_sleep(1); + // Check if another async operation is in progress + if (asyncOperationInProgress.load()) { + LOG(kDefLog, kWarn, + "wait(): Another async operation in progress, skipping wait"); + if constexpr (std::is_void_v) { + return; // For void functions, just return + } else { + return T{}; // Return default-constructed value for non-void types + } + } + + // Set the flag before starting async operation + asyncOperationInProgress.store(true); + + try { + // Poll until the future is ready + while (f.wait_for(std::chrono::milliseconds(0)) != + std::future_status::ready) { + emscripten_sleep(1); + } + + // Handle void vs non-void return types + if constexpr (std::is_void_v) { + f.get(); // Just call get() without storing result + asyncOperationInProgress.store(false); + return; // void return + } else { + T result = f.get(); + asyncOperationInProgress.store(false); + return result; + } + + } catch (...) { + asyncOperationInProgress.store(false); + throw; } - return f.get(); #else + // Native implementation unchanged while (f.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { wgpuInstanceProcessEvents(ctx.instance); } - return f.get(); + + // Handle void vs non-void for native too + if constexpr (std::is_void_v) { + f.get(); + return; + } else { + return f.get(); + } #endif } @@ -2097,10 +2157,10 @@ inline void toGPU(Context &ctx, const int8_t *data, WGPUBuffer buffer, size_t packedCount = (numElements + 3) / 4; std::vector packed(packedCount, 0); for (size_t i = 0; i < numElements; ++i) { - size_t idx = i / 4; - size_t shift = (i % 4) * 8; - packed[idx] |= (static_cast(data[i]) << shift); - // LOG(kDefLog, kInfo, "toGPU: %d %d %d", data[i], packed[idx], idx); + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + packed[idx] |= (static_cast(data[i]) << shift); + // LOG(kDefLog, kInfo, "toGPU: %d %d %d", data[i], packed[idx], idx); } toGPU(ctx, packed.data(), buffer, packedCount * sizeof(int32_t)); } @@ -2139,9 +2199,9 @@ inline void toGPU(Context &ctx, const uint8_t *data, WGPUBuffer buffer, size_t packedCount = (numElements + 3) / 4; std::vector packed(packedCount, 0); for (size_t i = 0; i < numElements; ++i) { - size_t idx = i / 4; - size_t shift = (i % 4) * 8; - packed[idx] |= (static_cast(data[i]) << shift); + size_t idx = i / 4; + size_t shift = (i % 4) * 8; + packed[idx] |= (static_cast(data[i]) << shift); } toGPU(ctx, packed.data(), buffer, packedCount * sizeof(uint32_t)); } @@ -2154,9 +2214,9 @@ inline void toGPU(Context &ctx, const uint16_t *data, WGPUBuffer buffer, size_t packedCount = (numElements + 1) / 2; std::vector packed(packedCount, 0); for (size_t i = 0; i < numElements; ++i) { - size_t idx = i / 2; - size_t shift = (i % 2) * 16; - packed[idx] |= (static_cast(data[i]) << shift); + size_t idx = i / 2; + size_t shift = (i % 2) * 16; + packed[idx] |= (static_cast(data[i]) << shift); } toGPU(ctx, packed.data(), buffer, packedCount * sizeof(uint32_t)); } @@ -2798,7 +2858,6 @@ inline std::future dispatchKernelAsync(Context &ctx, Kernel &kernel) { workDoneCallbackInfo.userdata1 = reinterpret_cast(promise); workDoneCallbackInfo.userdata2 = nullptr; - // IMPORTANT: Pass the address of the callback info structure. wgpuQueueOnSubmittedWorkDone(ctx.queue, workDoneCallbackInfo); return future; From 8f1038797e8009caffa1143b4a8549b6eb2c88f3 Mon Sep 17 00:00:00 2001 From: Junji Hashimoto Date: Sat, 6 Sep 2025 03:51:33 +0900 Subject: [PATCH 44/54] Add dev branch to CI --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 21eacea..9e42ce8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,10 +4,12 @@ on: push: branches: - main + - dev pull_request: types: [opened, reopened, labeled, unlabeled, synchronize] branches: - main + - dev workflow_dispatch: jobs: From c5f7a00dd9d65adac54c3fe3c237203501aa43df Mon Sep 17 00:00:00 2001 From: Junji Hashimoto Date: Sat, 6 Sep 2025 03:58:00 +0900 Subject: [PATCH 45/54] Add third_party/headers/webgpu to the INCLUDE path --- Makefile | 2 +- bindings/python/Makefile | 2 +- examples/Makefile | 2 +- examples/float16/Makefile | 2 +- examples/gpu_puzzles/Makefile | 4 ++-- examples/hello_world/Makefile | 2 +- examples/matmul/Makefile | 2 +- examples/physics/Makefile | 2 +- examples/render/Makefile | 2 +- examples/shadertui/Makefile | 2 +- examples/transpose/Makefile | 2 +- experimental/kernels/Makefile | 2 +- 12 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 8e5d67b..9d03548 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ CXX=clang++ GPUCPP ?= $(PWD) LIBDIR ?= $(GPUCPP)/third_party/lib LIBSPEC ?= . $(GPUCPP)/source -INCLUDES ?= -I$(GPUCPP) -I$(GPUCPP)/third_party/headers +INCLUDES ?= -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/null 2>&1 ; echo $$?),0) STDLIB := else diff --git a/bindings/python/Makefile b/bindings/python/Makefile index 78e0b58..0f39278 100644 --- a/bindings/python/Makefile +++ b/bindings/python/Makefile @@ -10,7 +10,7 @@ else STDLIB := -stdlib=libc++ endif -FLAGS=-shared -fPIC -std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib -lwebgpu_dawn \ +FLAGS=-shared -fPIC -std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib -lwebgpu_dawn \ `python3 -m pybind11 --includes` \ `python3-config --includes --ldflags` diff --git a/examples/Makefile b/examples/Makefile index 3036e22..f864291 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -13,7 +13,7 @@ else STDLIB := -stdlib=libc++ endif -FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib +FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib LFLAGS=-ldl -lwebgpu_dawn .PHONY: default all_release all_debug dawnlib run_setup check-python diff --git a/examples/float16/Makefile b/examples/float16/Makefile index 51e895a..1418cb9 100644 --- a/examples/float16/Makefile +++ b/examples/float16/Makefile @@ -9,7 +9,7 @@ ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/nu else STDLIB := -stdlib=libc++ endif -FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn +FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn run: ./build/$(TARGET) dawnlib $(LIBSPEC) && ./build/$(TARGET) diff --git a/examples/gpu_puzzles/Makefile b/examples/gpu_puzzles/Makefile index 90dfc2d..5b853b3 100644 --- a/examples/gpu_puzzles/Makefile +++ b/examples/gpu_puzzles/Makefile @@ -9,8 +9,8 @@ ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/nu else STDLIB := -stdlib=libc++ endif -FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn -FLAGS_KEY=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib key.cpp -ldl -lwebgpu_dawn +FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn +FLAGS_KEY=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib key.cpp -ldl -lwebgpu_dawn run: ./build/$(TARGET) $(LIBSPEC) && ./build/$(TARGET) diff --git a/examples/hello_world/Makefile b/examples/hello_world/Makefile index 7e64553..9f9312b 100644 --- a/examples/hello_world/Makefile +++ b/examples/hello_world/Makefile @@ -9,7 +9,7 @@ ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/nu else STDLIB := -stdlib=libc++ endif -FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn +FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn run: ./build/$(TARGET) dawnlib $(LIBSPEC) && ./build/$(TARGET) diff --git a/examples/matmul/Makefile b/examples/matmul/Makefile index 03cd20e..35a8923 100644 --- a/examples/matmul/Makefile +++ b/examples/matmul/Makefile @@ -10,7 +10,7 @@ ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/nu else STDLIB := -stdlib=libc++ endif -FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn +FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn run: ./build/$(TARGET) $(LIBSPEC) && ./build/$(TARGET) diff --git a/examples/physics/Makefile b/examples/physics/Makefile index 10cfb13..df61994 100644 --- a/examples/physics/Makefile +++ b/examples/physics/Makefile @@ -9,7 +9,7 @@ ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/nu else STDLIB := -stdlib=libc++ endif -FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn +FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn run: ./build/$(TARGET) $(LIBSPEC) && ./build/$(TARGET) diff --git a/examples/render/Makefile b/examples/render/Makefile index d07048c..5e05288 100644 --- a/examples/render/Makefile +++ b/examples/render/Makefile @@ -9,7 +9,7 @@ ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/nu else STDLIB := -stdlib=libc++ endif -FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn +FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn run: ./build/$(TARGET) $(LIBSPEC) && ./build/$(TARGET) diff --git a/examples/shadertui/Makefile b/examples/shadertui/Makefile index 81c740b..d7b67cc 100644 --- a/examples/shadertui/Makefile +++ b/examples/shadertui/Makefile @@ -10,7 +10,7 @@ ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/nu else STDLIB := -stdlib=libc++ endif -FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn +FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn run: ./build/$(TARGET) diff --git a/examples/transpose/Makefile b/examples/transpose/Makefile index 1495c96..45b09ac 100644 --- a/examples/transpose/Makefile +++ b/examples/transpose/Makefile @@ -10,7 +10,7 @@ ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/nu else STDLIB := -stdlib=libc++ endif -FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn +FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn run: ./build/$(TARGET) $(LIBSPEC) && ./build/$(TARGET) diff --git a/experimental/kernels/Makefile b/experimental/kernels/Makefile index e2d89b1..9208d4b 100644 --- a/experimental/kernels/Makefile +++ b/experimental/kernels/Makefile @@ -12,7 +12,7 @@ endif # ASYNCIFY allows emscripten to sleep EMFLAGS=-std=c++17 -I$(GPUCPP) -I$(GPUCPP)/third_party/headers/wasm -I. -Iunittest_llmc -I$(GPUCPP)/third_party/llm.c -s USE_WEBGPU=1 -s ASYNCIFY=1 -s STACK_SIZE=100000 -s MEMORY64=1 -s ALLOW_MEMORY_GROWTH=1 -CXXFLAGS=-std=c++17 -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I. -Iunittest_llmc +CXXFLAGS=-std=c++17 -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -I. -Iunittest_llmc CFLAGS=-Ofast -march=native -I. -Iunittest_llmc # CFLAGS=-O2 -march=native -I. -Iunittest_llmc From 2b1767df72d1ad502bc7e376f665836b29ddeb1b Mon Sep 17 00:00:00 2001 From: Junji Hashimoto Date: Sat, 6 Sep 2025 04:16:20 +0900 Subject: [PATCH 46/54] Fix dispatchKernel arguments in the examples --- examples/float16/run.cpp | 5 +---- examples/gpu_puzzles/run.cpp | 5 +---- examples/matmul/run.cpp | 8 +------- examples/physics/run.cpp | 5 +---- examples/shadertui/run.cpp | 5 +---- examples/transpose/run.cpp | 11 +---------- 6 files changed, 6 insertions(+), 33 deletions(-) diff --git a/examples/float16/run.cpp b/examples/float16/run.cpp index 8f97210..85d436b 100644 --- a/examples/float16/run.cpp +++ b/examples/float16/run.cpp @@ -46,12 +46,9 @@ int main(int argc, char **argv) { } Tensor input = createTensor(ctx, Shape{N}, kf16, inputArr.data()); Tensor output = createTensor(ctx, Shape{N}, kf16); - std::promise promise; - std::future future = promise.get_future(); Kernel op = createKernel(ctx, {kGelu, 256, kf16}, Bindings{input, output}, {cdiv(N, 256), 1, 1}); - dispatchKernel(ctx, op, promise); - wait(ctx, future); + dispatchKernel(ctx, op); toCPU(ctx, output, outputArr.data(), sizeof(outputArr)); for (int i = 0; i < 12; ++i) { diff --git a/examples/gpu_puzzles/run.cpp b/examples/gpu_puzzles/run.cpp index e337688..54fb353 100644 --- a/examples/gpu_puzzles/run.cpp +++ b/examples/gpu_puzzles/run.cpp @@ -23,11 +23,8 @@ template std::array makeData() { template void showResult(Context &ctx, Kernel &op, Tensor &output) { - std::promise promise; - std::future future = promise.get_future(); - dispatchKernel(ctx, op, promise); + dispatchKernel(ctx, op); std::array outputArr; - wait(ctx, future); toCPU(ctx, output, outputArr.data(), sizeof(outputArr)); printf("%s", show(outputArr, "output").c_str()); } diff --git a/examples/matmul/run.cpp b/examples/matmul/run.cpp index 42d7009..47edc05 100644 --- a/examples/matmul/run.cpp +++ b/examples/matmul/run.cpp @@ -838,12 +838,9 @@ void runTest(int version, size_t M, size_t K, size_t N, // Initialize Kernel and bind GPU buffers // pre-allocate for async dispatch - std::array, nIter> promises; - std::array, nIter> futures; std::array kernels; std::array outputs; for (int i = 0; i < nIter; i++) { - futures[i] = promises[i].get_future(); outputs[i] = createTensor(ctx, Shape{M, N}, numtype); kernels[i] = selectMatmul(ctx, version, {input, weights, outputs[i]}, M, K, N, numtype); } @@ -854,10 +851,7 @@ void runTest(int version, size_t M, size_t K, size_t N, // Dispatch kernel nIter times auto start = std::chrono::high_resolution_clock::now(); for (int i = 0; i < nIter; i++) { - dispatchKernel(ctx, kernels[i], promises[i]); - } - for (int i = 0; i < nIter; i++) { - wait(ctx, futures[i]); + dispatchKernel(ctx, kernels[i]); } auto end = std::chrono::high_resolution_clock::now(); diff --git a/examples/physics/run.cpp b/examples/physics/run.cpp index 02b7e9f..8d16737 100644 --- a/examples/physics/run.cpp +++ b/examples/physics/run.cpp @@ -84,10 +84,7 @@ int main() { printf("\033[2J\033[H"); while (true) { auto start = std::chrono::high_resolution_clock::now(); - std::promise promise; - std::future future = promise.get_future(); - dispatchKernel(ctx, update, promise); - wait(ctx, future); + dispatchKernel(ctx, update); toCPU(ctx, pos, posArr.data(), sizeof(posArr)); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed = end - start; diff --git a/examples/shadertui/run.cpp b/examples/shadertui/run.cpp index 943180b..438eff6 100644 --- a/examples/shadertui/run.cpp +++ b/examples/shadertui/run.cpp @@ -126,10 +126,7 @@ int main() { params.time = getCurrentTimeInMilliseconds(start); toGPU(ctx, params, renderKernel); auto frameStart = std::chrono::high_resolution_clock::now(); - std::promise promise; - std::future future = promise.get_future(); - dispatchKernel(ctx, renderKernel, promise); - wait(ctx, future); + dispatchKernel(ctx, renderKernel); resetCommandBuffer(ctx.device, renderKernel); toCPU(ctx, screen, screenArr); rasterize(screenArr, raster); diff --git a/examples/transpose/run.cpp b/examples/transpose/run.cpp index 4b0a28a..20c1fe5 100644 --- a/examples/transpose/run.cpp +++ b/examples/transpose/run.cpp @@ -162,20 +162,11 @@ void runTest(int version, size_t M, size_t N, LOG(kDefLog, kInfo, "Dispatching Kernel version %d, %d iterations ...", version, nIter); - // pre-allocate promises and futures for async dispatch - // TODO(avh): implement a pooling mechanism for promises/futures in gpu.h - std::array, nIter> promises; - std::array, nIter> futures; - for (int i = 0; i < nIter; i++) { - futures[i] = promises[i].get_future(); - } - // Dispatch kernel nIter times auto start = std::chrono::high_resolution_clock::now(); for (int i = 0; i < nIter; i++) { if (!isCPU) { - dispatchKernel(ctx, kernel, promises[i]); - wait(ctx, futures[i]); + dispatchKernel(ctx, kernel); resetCommandBuffer(ctx.device, kernel); } else { transpose(inputPtr.get(), outputPtr.get(), M, N); From b8b4c589acf49140ae930dcf978a7cf529c84778 Mon Sep 17 00:00:00 2001 From: Junji Hashimoto Date: Sun, 7 Sep 2025 01:23:25 +0900 Subject: [PATCH 47/54] Add cmake-ci of github-actions --- .github/workflows/cmake-ci.yml | 41 ++++ Makefile | 3 + cmake/dawn.cmake | 353 +++++++++++++++++---------------- 3 files changed, 230 insertions(+), 167 deletions(-) create mode 100644 .github/workflows/cmake-ci.yml diff --git a/.github/workflows/cmake-ci.yml b/.github/workflows/cmake-ci.yml new file mode 100644 index 0000000..14bc96b --- /dev/null +++ b/.github/workflows/cmake-ci.yml @@ -0,0 +1,41 @@ +name: CMake CI + +on: + push: + branches: + - main + - dev + pull_request: + types: [opened, reopened, labeled, unlabeled, synchronize] + branches: + - main + - dev + workflow_dispatch: + +jobs: + build: + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Install dependencies (Ubuntu) + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt-get update + sudo apt-get install -y cmake + sudo apt-get install -y libvulkan1 mesa-vulkan-drivers vulkan-tools + sudo apt-get install -y libxrandr-dev + + - name: Build with CMake + run: CMAKE_VERBOSE_MAKEFILE=1 make all-cmake + + - name: Test + run: make test-cmake diff --git a/Makefile b/Makefile index 9d03548..ddb1526 100644 --- a/Makefile +++ b/Makefile @@ -97,6 +97,9 @@ debug-cmake: check-clang check-cmake all-cmake: check-clang check-cmake $(CMAKE_CMD) $(RELEASE_FLAGS) && make -j$(NUM_JOBS) $(TARGET_ALL) +test-cmake: check-clang check-cmake + ./build/test_gpu + ################################################################################ # Cleanup ################################################################################ diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake index baed5ad..15669ff 100644 --- a/cmake/dawn.cmake +++ b/cmake/dawn.cmake @@ -1,167 +1,186 @@ -cmake_minimum_required(VERSION 3.14) - -include(ExternalProject) -include(FetchContent) - -# include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/print_target.cmake") - - -# Setup directories and basic paths -set(FETCHCONTENT_BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external") -set(DAWN_DIR "${FETCHCONTENT_BASE_DIR}/dawn" CACHE INTERNAL "Dawn source directory") - -# For Emscripten builds (if desired) -set(EM_SDK_DIR $ENV{EMSDK} CACHE INTERNAL "") -set(EMSCRIPTEN_DIR "${EM_SDK_DIR}/upstream/emscripten" CACHE INTERNAL "") - -# Decide where to build Dawn’s build files. -if(EMSCRIPTEN) - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_web" CACHE INTERNAL "web build directory" FORCE) -elseif(WIN32) - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_win" CACHE INTERNAL "windows build directory" FORCE) -elseif(IOS) - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_ios" CACHE INTERNAL "ios build directory" FORCE) -elseif(APPLE) - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_mac" CACHE INTERNAL "mac build directory" FORCE) -elseif(ANDROID) - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_android" CACHE INTERNAL "android build directory" FORCE) -else() - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_unix" CACHE INTERNAL "linux build directory" FORCE) -endif() - -# Add Dawn header include directories so that they are available later. -include_directories(BEFORE PUBLIC - "${DAWN_BUILD_DIR}/src/dawn/native/" - "${DAWN_BUILD_DIR}/src/dawn/native/Debug" - "${DAWN_BUILD_DIR}/src/dawn/native/Release" -) - - -# Optionally try to find an existing Dawn build. -set(ENABLE_DAWN_FIND OFF CACHE BOOL "Attempt to find an existing Dawn build" FORCE) -set(DAWN_BUILD_FOUND OFF CACHE BOOL "Dawn build found" FORCE) - -if(ENABLE_DAWN_FIND) - message(STATUS "Attempting to find an existing Dawn build...") - if(WIN32) - find_library(WEBGPU_DAWN_DEBUG NAMES webgpu_dawn HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Debug") - find_library(WEBGPU_DAWN_RELEASE NAMES webgpu_dawn HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Release") - - if(WEBGPU_DAWN_DEBUG OR WEBGPU_DAWN_RELEASE) - message(STATUS "Dawn build found on Windows. Debug: ${WEBGPU_DAWN_DEBUG}, Release: ${WEBGPU_DAWN_RELEASE}") - set(DAWN_BUILD_FOUND ON) - endif() - elseif(NOT EMSCRIPTEN AND NOT WIN32) - find_library(WEBGPU_DAWN_LIB NAMES webgpu_dawn.so PATHS "${DAWN_BUILD_DIR}/src/dawn/native") - - if(WEBGPU_DAWN_LIB) - message(STATUS "Dawn build found on Linux/Unix. Library: ${WEBGPU_DAWN_LIB}") - set(DAWN_BUILD_FOUND ON) - endif() - endif() -endif() - - -# Pre-build Dawn at configuration time if not already built. -if(NOT DAWN_BUILD_FOUND) - message(STATUS "Dawn build not found - pre-building Dawn.") - - # Force Dawn build options. - set(DAWN_ALWAYS_ASSERT ON CACHE INTERNAL "Always assert in Dawn" FORCE) - set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE INTERNAL "Build Dawn monolithically" FORCE) - set(DAWN_BUILD_EXAMPLES OFF CACHE INTERNAL "Build Dawn examples" FORCE) - set(DAWN_BUILD_SAMPLES OFF CACHE INTERNAL "Build Dawn samples" FORCE) - set(DAWN_BUILD_TESTS OFF CACHE INTERNAL "Build Dawn tests" FORCE) - set(DAWN_ENABLE_INSTALL OFF CACHE INTERNAL "Enable Dawn installation" FORCE) - set(DAWN_FETCH_DEPENDENCIES ON CACHE INTERNAL "Fetch Dawn dependencies" FORCE) - set(TINT_BUILD_TESTS OFF CACHE INTERNAL "Build Tint Tests" FORCE) - set(TINT_BUILD_IR_BINARY OFF CACHE INTERNAL "Build Tint IR binary" FORCE) - set(TINT_BUILD_CMD_TOOLS OFF CACHE INTERNAL "Build Tint command line tools" FORCE) - set(DAWN_EMSCRIPTEN_TOOLCHAIN ${EMSCRIPTEN_DIR} CACHE INTERNAL "Emscripten toolchain" FORCE) - - set(DAWN_COMMIT "66d57f910357befb441b91162f29a97f687af6d9" CACHE STRING "Dawn commit to checkout" FORCE) - - file(MAKE_DIRECTORY ${DAWN_DIR}) - # Initialize Git and set/update remote. - execute_process(COMMAND git init - WORKING_DIRECTORY "${DAWN_DIR}" - ) - execute_process( - COMMAND git remote add origin https://dawn.googlesource.com/dawn - WORKING_DIRECTORY "${DAWN_DIR}" - ) - # Fetch and checkout the specified commit. - execute_process( - COMMAND git fetch origin ${DAWN_COMMIT} - WORKING_DIRECTORY "${DAWN_DIR}" - ) - execute_process( - COMMAND git checkout ${DAWN_COMMIT} - WORKING_DIRECTORY "${DAWN_DIR}" - ) - execute_process( - COMMAND git reset --hard ${DAWN_COMMIT} - WORKING_DIRECTORY "${DAWN_DIR}" - ) - # Fetch the Dawn repository if not already present. - FetchContent_Declare( - dawn - SOURCE_DIR ${DAWN_DIR} - SUBBUILD_DIR ${DAWN_BUILD_DIR}/tmp - BINARY_DIR ${DAWN_BUILD_DIR} - ) - FetchContent_MakeAvailable(dawn) - - set(CMAKE_INCLUDE_PATH "${CMAKE_INCLUDE_PATH};${DAWN_DIR}/src" CACHE INTERNAL "") - - set(DAWN_BUILD_FOUND ON) -endif() # End pre-build Dawn - -# Create an IMPORTED target for the Dawn library. -# Adjust the expected output name/extension per platform. -if(MSVC) -message(STATUS "Dawn build found on Windows.") -# MSVC: use separate debug and release dlls. -if((NOT WEBGPU_DAWN_DEBUG) OR (WEBGPU_DAWN_DEBUG MATCHES "NOTFOUND")) - find_library(WEBGPU_DAWN_DEBUG NAMES webgpu_dawn PATHS "${DAWN_BUILD_DIR}/src/dawn/native/Debug") -endif() -if((NOT WEBGPU_DAWN_RELEASE) OR (WEBGPU_DAWN_RELEASE MATCHES "NOTFOUND")) - find_library(WEBGPU_DAWN_RELEASE NAMES webgpu_dawn PATHS "${DAWN_BUILD_DIR}/src/dawn/native/Release") -endif() - -if(WEBGPU_DAWN_DEBUG OR WEBGPU_DAWN_RELEASE) - if(NOT TARGET webgpu_dawn) - add_library(webgpu_dawn INTERFACE) - target_link_libraries(webgpu_dawn INTERFACE - $<$:${WEBGPU_DAWN_DEBUG}> - $<$:${WEBGPU_DAWN_RELEASE}> - ) - endif() -endif() -elseif(IOS) - # On iOS, it is common to build a static library. - if(NOT TARGET webgpu_dawn) - add_library(webgpu_dawn STATIC IMPORTED) - set_target_properties(webgpu_dawn PROPERTIES - IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.a") - endif() -elseif(APPLE) - # On macOS (non-iOS), typically a dynamic library (.dylib) is built. - if(NOT TARGET webgpu_dawn) - add_library(webgpu_dawn SHARED IMPORTED) - set_target_properties(webgpu_dawn PROPERTIES - IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.dylib") - endif() -elseif(ANDROID) - if(NOT TARGET webgpu_dawn) - add_library(webgpu_dawn SHARED IMPORTED) - set_target_properties(webgpu_dawn PROPERTIES - IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.so") - endif() -elseif(NOT EMSCRIPTEN) # For Linux and other Unix-like systems. - if(NOT TARGET webgpu_dawn) - add_library(webgpu_dawn SHARED IMPORTED) - set_target_properties(webgpu_dawn PROPERTIES - IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.so") - endif() -endif() \ No newline at end of file +cmake_minimum_required(VERSION 3.14) + +include(ExternalProject) +include(FetchContent) + +# include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/print_target.cmake") + + +# Setup directories and basic paths +set(FETCHCONTENT_BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external") +set(DAWN_DIR "${FETCHCONTENT_BASE_DIR}/dawn" CACHE INTERNAL "Dawn source directory") + +# For Emscripten builds (if desired) +set(EM_SDK_DIR $ENV{EMSDK} CACHE INTERNAL "") +set(EMSCRIPTEN_DIR "${EM_SDK_DIR}/upstream/emscripten" CACHE INTERNAL "") + +# Decide where to build Dawn’s build files. +if(EMSCRIPTEN) + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_web" CACHE INTERNAL "web build directory" FORCE) +elseif(WIN32) + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_win" CACHE INTERNAL "windows build directory" FORCE) +elseif(IOS) + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_ios" CACHE INTERNAL "ios build directory" FORCE) +elseif(APPLE) + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_mac" CACHE INTERNAL "mac build directory" FORCE) +elseif(ANDROID) + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_android" CACHE INTERNAL "android build directory" FORCE) +else() + set(DAWN_BUILD_DIR "${DAWN_DIR}/build_unix" CACHE INTERNAL "linux build directory" FORCE) +endif() + +# Add Dawn header include directories so that they are available later. +include_directories(BEFORE PUBLIC + "${DAWN_BUILD_DIR}/src/dawn/native/" + "${DAWN_BUILD_DIR}/src/dawn/native/Debug" + "${DAWN_BUILD_DIR}/src/dawn/native/Release" +) + + +# Optionally try to find an existing Dawn build. +set(ENABLE_DAWN_FIND OFF CACHE BOOL "Attempt to find an existing Dawn build" FORCE) +set(DAWN_BUILD_FOUND OFF CACHE BOOL "Dawn build found" FORCE) + +if(ENABLE_DAWN_FIND) + message(STATUS "Attempting to find an existing Dawn build...") + if(WIN32) + find_library(WEBGPU_DAWN_DEBUG NAMES webgpu_dawn HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Debug") + find_library(WEBGPU_DAWN_RELEASE NAMES webgpu_dawn HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Release") + + if(WEBGPU_DAWN_DEBUG OR WEBGPU_DAWN_RELEASE) + message(STATUS "Dawn build found on Windows. Debug: ${WEBGPU_DAWN_DEBUG}, Release: ${WEBGPU_DAWN_RELEASE}") + set(DAWN_BUILD_FOUND ON) + endif() + elseif(NOT EMSCRIPTEN AND NOT WIN32) + find_library(WEBGPU_DAWN_LIB NAMES webgpu_dawn.so PATHS "${DAWN_BUILD_DIR}/src/dawn/native") + + if(WEBGPU_DAWN_LIB) + message(STATUS "Dawn build found on Linux/Unix. Library: ${WEBGPU_DAWN_LIB}") + set(DAWN_BUILD_FOUND ON) + endif() + endif() +endif() + + +# Pre-build Dawn at configuration time if not already built. +if(NOT DAWN_BUILD_FOUND) + message(STATUS "Dawn build not found - pre-building Dawn.") + + set(DAWN_ALWAYS_ASSERT ON CACHE INTERNAL "Always assert in Dawn" FORCE) + set(DAWN_BUILD_PROTOBUF OFF CACHE INTERNAL "Build protobuf" FORCE) + set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE INTERNAL "Build Dawn monolithically" FORCE) + set(DAWN_BUILD_EXAMPLES OFF CACHE INTERNAL "Build Dawn examples" FORCE) + set(DAWN_BUILD_SAMPLES OFF CACHE INTERNAL "Build Dawn samples" FORCE) + set(DAWN_BUILD_TESTS OFF CACHE INTERNAL "Build Dawn tests" FORCE) + set(DAWN_ENABLE_INSTALL ON CACHE INTERNAL "Enable Dawn installation" FORCE) + set(DAWN_FETCH_DEPENDENCIES ON CACHE INTERNAL "Fetch Dawn dependencies" FORCE) + set(TINT_BUILD_TESTS OFF CACHE INTERNAL "Build Tint Tests" FORCE) + set(TINT_BUILD_IR_BINARY OFF CACHE INTERNAL "Build Tint IR binary" FORCE) + set(TINT_BUILD_CMD_TOOLS OFF CACHE INTERNAL "Build Tint command line tools" FORCE) + set(TINT_BUILD_DOCS OFF CACHE INTERNAL "Build Tint docs" FORCE) + set(DAWN_EMSCRIPTEN_TOOLCHAIN ${EMSCRIPTEN_DIR} CACHE INTERNAL "Emscripten toolchain" FORCE) + + set(DAWN_COMMIT "66d57f910357befb441b91162f29a97f687af6d9" CACHE STRING "Dawn commit to checkout" FORCE) + + file(MAKE_DIRECTORY ${DAWN_DIR}) + # Initialize Git and set/update remote. + execute_process(COMMAND git init + WORKING_DIRECTORY "${DAWN_DIR}" + ) + execute_process( + COMMAND git remote add origin https://dawn.googlesource.com/dawn + WORKING_DIRECTORY "${DAWN_DIR}" + ) + # Fetch and checkout the specified commit. + execute_process( + COMMAND git fetch origin ${DAWN_COMMIT} + WORKING_DIRECTORY "${DAWN_DIR}" + ) + execute_process( + COMMAND git checkout ${DAWN_COMMIT} + WORKING_DIRECTORY "${DAWN_DIR}" + ) + execute_process( + COMMAND git submodule init + WORKING_DIRECTORY "${DAWN_DIR}" + ) + execute_process( + COMMAND git submodule update + WORKING_DIRECTORY "${DAWN_DIR}" + ) + execute_process( + COMMAND git reset --hard ${DAWN_COMMIT} + WORKING_DIRECTORY "${DAWN_DIR}" + ) + + if(APPLE) + set(ABSEIL_COPTS_FILE "${DAWN_DIR}/third_party/abseil-cpp/absl/copts/GENERATED_AbseilCopts.cmake") + if(EXISTS "${ABSEIL_COPTS_FILE}") + file(READ "${ABSEIL_COPTS_FILE}" COPTS_CONTENT) + string(REGEX REPLACE "-msse4\\.1" "" COPTS_CONTENT "${COPTS_CONTENT}") + file(WRITE "${ABSEIL_COPTS_FILE}" "${COPTS_CONTENT}") + endif() + endif() + +# Fetch the Dawn repository if not already present. + FetchContent_Declare( + dawn + SOURCE_DIR ${DAWN_DIR} + SUBBUILD_DIR ${DAWN_BUILD_DIR}/tmp + BINARY_DIR ${DAWN_BUILD_DIR} + ) + FetchContent_MakeAvailable(dawn) + + set(CMAKE_INCLUDE_PATH "${CMAKE_INCLUDE_PATH};${DAWN_DIR}/src" CACHE INTERNAL "") + + set(DAWN_BUILD_FOUND ON) +endif() # End pre-build Dawn + +# Create an IMPORTED target for the Dawn library. +# Adjust the expected output name/extension per platform. +if(MSVC) +message(STATUS "Dawn build found on Windows.") +# MSVC: use separate debug and release dlls. +if((NOT WEBGPU_DAWN_DEBUG) OR (WEBGPU_DAWN_DEBUG MATCHES "NOTFOUND")) + find_library(WEBGPU_DAWN_DEBUG NAMES webgpu_dawn PATHS "${DAWN_BUILD_DIR}/src/dawn/native/Debug") +endif() +if((NOT WEBGPU_DAWN_RELEASE) OR (WEBGPU_DAWN_RELEASE MATCHES "NOTFOUND")) + find_library(WEBGPU_DAWN_RELEASE NAMES webgpu_dawn PATHS "${DAWN_BUILD_DIR}/src/dawn/native/Release") +endif() + +if(WEBGPU_DAWN_DEBUG OR WEBGPU_DAWN_RELEASE) + if(NOT TARGET webgpu_dawn) + add_library(webgpu_dawn INTERFACE) + target_link_libraries(webgpu_dawn INTERFACE + $<$:${WEBGPU_DAWN_DEBUG}> + $<$:${WEBGPU_DAWN_RELEASE}> + ) + endif() +endif() +elseif(IOS) + # On iOS, it is common to build a static library. + if(NOT TARGET webgpu_dawn) + add_library(webgpu_dawn STATIC IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.a") + endif() +elseif(APPLE) + # On macOS (non-iOS), typically a dynamic library (.dylib) is built. + if(NOT TARGET webgpu_dawn) + add_library(webgpu_dawn SHARED IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.dylib") + endif() +elseif(ANDROID) + if(NOT TARGET webgpu_dawn) + add_library(webgpu_dawn SHARED IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.so") + endif() +elseif(NOT EMSCRIPTEN) # For Linux and other Unix-like systems. + if(NOT TARGET webgpu_dawn) + add_library(webgpu_dawn SHARED IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.so") + endif() +endif() From bcb81e156ad288e52c7a0273c30221d8043b3c54 Mon Sep 17 00:00:00 2001 From: Junji Hashimoto Date: Tue, 9 Sep 2025 08:15:21 +0900 Subject: [PATCH 48/54] Add libxinerama-dev, libxcursor-dev, libxi-dev, libgl-dev and libxcb-dev --- .github/workflows/cmake-ci.yml | 2 +- CMakeLists.txt | 2 +- cmake/dawn.cmake | 6 +- examples/hello_world/Makefile | 4 +- test/test_gpu.cpp | 4 +- third_party/headers/webgpu/webgpu.h | 2694 +++++++++++++++------------ 6 files changed, 1522 insertions(+), 1190 deletions(-) diff --git a/.github/workflows/cmake-ci.yml b/.github/workflows/cmake-ci.yml index 14bc96b..cab53b8 100644 --- a/.github/workflows/cmake-ci.yml +++ b/.github/workflows/cmake-ci.yml @@ -32,7 +32,7 @@ jobs: sudo apt-get update sudo apt-get install -y cmake sudo apt-get install -y libvulkan1 mesa-vulkan-drivers vulkan-tools - sudo apt-get install -y libxrandr-dev + sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev libxi-dev libgl-dev libx11-xcb-dev - name: Build with CMake run: CMAKE_VERBOSE_MAKEFILE=1 make all-cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 85911a7..b4df19c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ endif() option(DEBUG "Option to enable debug flags" OFF) if(DEBUG) set(CMAKE_BUILD_TYPE Debug) - set(CMAKE_CXX_FLAGS "-O0 -g") + set(CMAKE_CXX_FLAGS "-O0 -g -fsanitize=address -fno-omit-frame-pointer") endif() include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/dawn.cmake") diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake index 15669ff..90d9978 100644 --- a/cmake/dawn.cmake +++ b/cmake/dawn.cmake @@ -101,11 +101,7 @@ if(NOT DAWN_BUILD_FOUND) WORKING_DIRECTORY "${DAWN_DIR}" ) execute_process( - COMMAND git submodule init - WORKING_DIRECTORY "${DAWN_DIR}" - ) - execute_process( - COMMAND git submodule update + COMMAND git submodule update --init third_party/abseil-cpp WORKING_DIRECTORY "${DAWN_DIR}" ) execute_process( diff --git a/examples/hello_world/Makefile b/examples/hello_world/Makefile index 9f9312b..575914e 100644 --- a/examples/hello_world/Makefile +++ b/examples/hello_world/Makefile @@ -9,7 +9,7 @@ ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/nu else STDLIB := -stdlib=libc++ endif -FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn +FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn -Wl,-rpath,$(GPUCPP)/third_party/lib run: ./build/$(TARGET) dawnlib $(LIBSPEC) && ./build/$(TARGET) @@ -23,7 +23,7 @@ build/$(TARGET): run.cpp mkdir -p build && $(CXX) $(FLAGS) -DNO_LOG -o ./build/$(TARGET) debug: run.cpp - mkdir -p build && $(CXX) $(FLAGS) -g -o ./build/$(TARGET) + mkdir -p build && $(CXX) $(FLAGS) -g -Wall -o ./build/$(TARGET) clean: read -r -p "This will delete the contents of build/*. Are you sure? [CTRL-C to abort] " response && rm -rf build/* diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 7f07dbf..8b7a436 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -194,8 +194,8 @@ void testAddKernelInt8() { } // These store the int8 data packed into i32 format on the GPU - Tensor aTensorPacked = createTensor(ctx, Shape{N}, ki8, aInput.data()); - Tensor bTensorPacked = createTensor(ctx, Shape{N}, ki8, bInput.data()); + Tensor aTensorPacked = createTensor(ctx, Shape{N}, ki8, (const int8_t *)aInput.data()); + Tensor bTensorPacked = createTensor(ctx, Shape{N}, ki8, (const int8_t *)bInput.data()); // Final output tensor, also in packed format Tensor outputTensorPacked = createTensor(ctx, Shape{N}, ki8); diff --git a/third_party/headers/webgpu/webgpu.h b/third_party/headers/webgpu/webgpu.h index a77052f..deea339 100644 --- a/third_party/headers/webgpu/webgpu.h +++ b/third_party/headers/webgpu/webgpu.h @@ -38,8 +38,6 @@ #define WGPU_BREAKING_CHANGE_STRING_VIEW_LABELS #define WGPU_BREAKING_CHANGE_STRING_VIEW_OUTPUT_STRUCTS #define WGPU_BREAKING_CHANGE_STRING_VIEW_CALLBACKS -#define WGPU_BREAKING_CHANGE_FUTURE_CALLBACK_TYPES -#define WGPU_BREAKING_CHANGE_LOGGING_CALLBACK_TYPE #if defined(WGPU_SHARED_LIBRARY) # if defined(_WIN32) @@ -75,25 +73,31 @@ #define WGPU_NULLABLE #endif -#define WGPU_BREAKING_CHANGE_DROP_DESCRIPTOR - #include #include +#include #if defined(__cplusplus) +# define _wgpu_ENUM_ZERO_INIT(type) type(0) +# define _wgpu_STRUCT_ZERO_INIT {} # if __cplusplus >= 201103L -# define WGPU_MAKE_INIT_STRUCT(type, value) (type value) +# define _wgpu_MAKE_INIT_STRUCT(type, value) (type value) # else -# define WGPU_MAKE_INIT_STRUCT(type, value) value +# define _wgpu_MAKE_INIT_STRUCT(type, value) value # endif -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L -# define WGPU_MAKE_INIT_STRUCT(type, value) ((type) value) #else -# define WGPU_MAKE_INIT_STRUCT(type, value) value +# define _wgpu_ENUM_ZERO_INIT(type) (type)0 +# define _wgpu_STRUCT_ZERO_INIT {0} +# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +# define _wgpu_MAKE_INIT_STRUCT(type, value) ((type) value) +# else +# define _wgpu_MAKE_INIT_STRUCT(type, value) value +# endif #endif #define WGPU_ARRAY_LAYER_COUNT_UNDEFINED UINT32_MAX #define WGPU_COPY_STRIDE_UNDEFINED UINT32_MAX +#define WGPU_DEPTH_CLEAR_VALUE_UNDEFINED NAN #define WGPU_DEPTH_SLICE_UNDEFINED UINT32_MAX #define WGPU_LIMIT_U32_UNDEFINED UINT32_MAX #define WGPU_LIMIT_U64_UNDEFINED UINT64_MAX @@ -138,7 +142,6 @@ struct WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER; struct WGPUAdapterPropertiesD3D; struct WGPUAdapterPropertiesSubgroups; struct WGPUAdapterPropertiesVk; -struct WGPUBindGroupEntry; struct WGPUBlendComponent; struct WGPUBufferBindingLayout; struct WGPUBufferHostMappedPointer; @@ -148,11 +151,12 @@ struct WGPUCopyTextureForBrowserOptions; struct WGPUDawnWGSLBlocklist; struct WGPUDawnAdapterPropertiesPowerPreference; struct WGPUDawnBufferDescriptorErrorInfoFromWireClient; +struct WGPUDawnCompilationMessageUtf16; struct WGPUDawnDrmFormatProperties; struct WGPUDawnEncoderInternalUsageDescriptor; struct WGPUDawnExperimentalImmediateDataLimits; struct WGPUDawnExperimentalSubgroupLimits; -struct WGPUDawnFormatCapabilities; +struct WGPUDawnInjectedInvalidSType; struct WGPUDawnRenderPassColorAttachmentRenderToSingleSampled; struct WGPUDawnShaderModuleSPIRVOptionsDescriptor; struct WGPUDawnTexelCopyBufferRowAlignmentLimits; @@ -165,7 +169,6 @@ struct WGPUExternalTextureBindingEntry; struct WGPUExternalTextureBindingLayout; struct WGPUFuture; struct WGPUInstanceCapabilities; -struct WGPULimits; struct WGPUMemoryHeapInfo; struct WGPUMultisampleState; struct WGPUOrigin2D; @@ -176,7 +179,7 @@ struct WGPUPrimitiveState; struct WGPURenderPassDepthStencilAttachment; struct WGPURenderPassDescriptorExpandResolveRect; struct WGPURenderPassMaxDrawCount; -struct WGPURequestAdapterOptions; +struct WGPURequestAdapterWebXROptions; struct WGPUSamplerBindingLayout; struct WGPUShaderModuleCompilationOptions; struct WGPUShaderSourceSPIRV; @@ -185,9 +188,10 @@ struct WGPUSharedBufferMemoryEndAccessState; struct WGPUSharedBufferMemoryProperties; struct WGPUSharedFenceDXGISharedHandleDescriptor; struct WGPUSharedFenceDXGISharedHandleExportInfo; +struct WGPUSharedFenceEGLSyncDescriptor; +struct WGPUSharedFenceEGLSyncExportInfo; struct WGPUSharedFenceMTLSharedEventDescriptor; struct WGPUSharedFenceMTLSharedEventExportInfo; -struct WGPUSharedFenceExportInfo; struct WGPUSharedFenceSyncFDDescriptor; struct WGPUSharedFenceSyncFDExportInfo; struct WGPUSharedFenceVkSemaphoreOpaqueFDDescriptor; @@ -199,9 +203,7 @@ struct WGPUSharedTextureMemoryDXGISharedHandleDescriptor; struct WGPUSharedTextureMemoryEGLImageDescriptor; struct WGPUSharedTextureMemoryIOSurfaceDescriptor; struct WGPUSharedTextureMemoryAHardwareBufferDescriptor; -struct WGPUSharedTextureMemoryBeginAccessDescriptor; struct WGPUSharedTextureMemoryDmaBufPlane; -struct WGPUSharedTextureMemoryEndAccessState; struct WGPUSharedTextureMemoryOpaqueFDDescriptor; struct WGPUSharedTextureMemoryVkDedicatedAllocationDescriptor; struct WGPUSharedTextureMemoryVkImageLayoutBeginState; @@ -211,9 +213,11 @@ struct WGPUStaticSamplerBindingLayout; struct WGPUStencilFaceState; struct WGPUStorageTextureBindingLayout; struct WGPUStringView; +struct WGPUSubgroupMatrixConfig; struct WGPUSupportedWGSLLanguageFeatures; struct WGPUSupportedFeatures; struct WGPUSurfaceCapabilities; +struct WGPUSurfaceColorManagement; struct WGPUSurfaceConfiguration; struct WGPUSurfaceDescriptorFromWindowsCoreWindow; struct WGPUSurfaceDescriptorFromWindowsSwapChainPanel; @@ -224,15 +228,15 @@ struct WGPUSurfaceSourceWaylandSurface; struct WGPUSurfaceSourceWindowsHWND; struct WGPUSurfaceSourceXlibWindow; struct WGPUSurfaceTexture; +struct WGPUTexelCopyBufferLayout; struct WGPUTextureBindingLayout; struct WGPUTextureBindingViewDimensionDescriptor; -struct WGPUTextureDataLayout; struct WGPUVertexAttribute; struct WGPUYCbCrVkDescriptor; struct WGPUAHardwareBufferProperties; -struct WGPUAdapterInfo; struct WGPUAdapterPropertiesMemoryHeaps; -struct WGPUBindGroupDescriptor; +struct WGPUAdapterPropertiesSubgroupMatrixConfigs; +struct WGPUBindGroupEntry; struct WGPUBindGroupLayoutEntry; struct WGPUBlendState; struct WGPUBufferDescriptor; @@ -247,11 +251,9 @@ struct WGPUDepthStencilState; struct WGPUEmscriptenSurfaceSourceCanvasHTMLSelector; struct WGPUExternalTextureDescriptor; struct WGPUFutureWaitInfo; -struct WGPUImageCopyBuffer; struct WGPUImageCopyExternalTexture; -struct WGPUImageCopyTexture; struct WGPUInstanceDescriptor; -struct WGPUPipelineLayoutDescriptor; +struct WGPULimits; struct WGPUPipelineLayoutPixelLocalStorage; struct WGPUQuerySetDescriptor; struct WGPUQueueDescriptor; @@ -259,31 +261,39 @@ struct WGPURenderBundleDescriptor; struct WGPURenderBundleEncoderDescriptor; struct WGPURenderPassColorAttachment; struct WGPURenderPassStorageAttachment; -struct WGPURequiredLimits; +struct WGPURequestAdapterOptions; struct WGPUSamplerDescriptor; -struct WGPUShaderModuleDescriptor; struct WGPUShaderSourceWGSL; struct WGPUSharedBufferMemoryDescriptor; struct WGPUSharedFenceDescriptor; +struct WGPUSharedFenceExportInfo; struct WGPUSharedTextureMemoryAHardwareBufferProperties; -struct WGPUSharedTextureMemoryDescriptor; +struct WGPUSharedTextureMemoryBeginAccessDescriptor; struct WGPUSharedTextureMemoryDmaBufDescriptor; -struct WGPUSharedTextureMemoryProperties; -struct WGPUSupportedLimits; -struct WGPUSurfaceDescriptor; +struct WGPUSharedTextureMemoryEndAccessState; +struct WGPUTexelCopyBufferInfo; +struct WGPUTexelCopyTextureInfo; struct WGPUTextureDescriptor; struct WGPUTextureViewDescriptor; struct WGPUVertexBufferLayout; +struct WGPUAdapterInfo; +struct WGPUBindGroupDescriptor; struct WGPUBindGroupLayoutDescriptor; struct WGPUColorTargetState; struct WGPUCompilationInfo; struct WGPUComputeState; +struct WGPUDawnFormatCapabilities; struct WGPUDeviceDescriptor; -struct WGPURenderPassDescriptor; +struct WGPUPipelineLayoutDescriptor; struct WGPURenderPassPixelLocalStorage; +struct WGPUShaderModuleDescriptor; +struct WGPUSharedTextureMemoryDescriptor; +struct WGPUSharedTextureMemoryProperties; +struct WGPUSurfaceDescriptor; struct WGPUVertexState; struct WGPUComputePipelineDescriptor; struct WGPUFragmentState; +struct WGPURenderPassDescriptor; struct WGPURenderPipelineDescriptor; typedef enum WGPUWGSLLanguageFeatureName { @@ -291,6 +301,7 @@ typedef enum WGPUWGSLLanguageFeatureName { WGPUWGSLLanguageFeatureName_Packed4x8IntegerDotProduct = 0x00000002, WGPUWGSLLanguageFeatureName_UnrestrictedPointerParameters = 0x00000003, WGPUWGSLLanguageFeatureName_PointerCompositeAccess = 0x00000004, + WGPUWGSLLanguageFeatureName_SizedBindingArray = 0x00000005, WGPUWGSLLanguageFeatureName_ChromiumTestingUnimplemented = 0x00050000, WGPUWGSLLanguageFeatureName_ChromiumTestingUnsafeExperimental = 0x00050001, WGPUWGSLLanguageFeatureName_ChromiumTestingExperimental = 0x00050002, @@ -464,16 +475,20 @@ typedef enum WGPUFeatureName { WGPUFeatureName_Depth32FloatStencil8 = 0x00000002, WGPUFeatureName_TimestampQuery = 0x00000003, WGPUFeatureName_TextureCompressionBC = 0x00000004, - WGPUFeatureName_TextureCompressionETC2 = 0x00000005, - WGPUFeatureName_TextureCompressionASTC = 0x00000006, - WGPUFeatureName_IndirectFirstInstance = 0x00000007, - WGPUFeatureName_ShaderF16 = 0x00000008, - WGPUFeatureName_RG11B10UfloatRenderable = 0x00000009, - WGPUFeatureName_BGRA8UnormStorage = 0x0000000A, - WGPUFeatureName_Float32Filterable = 0x0000000B, - WGPUFeatureName_Float32Blendable = 0x0000000C, - WGPUFeatureName_Subgroups = 0x0000000D, - WGPUFeatureName_SubgroupsF16 = 0x0000000E, + WGPUFeatureName_TextureCompressionBCSliced3D = 0x00000005, + WGPUFeatureName_TextureCompressionETC2 = 0x00000006, + WGPUFeatureName_TextureCompressionASTC = 0x00000007, + WGPUFeatureName_TextureCompressionASTCSliced3D = 0x00000008, + WGPUFeatureName_IndirectFirstInstance = 0x00000009, + WGPUFeatureName_ShaderF16 = 0x0000000A, + WGPUFeatureName_RG11B10UfloatRenderable = 0x0000000B, + WGPUFeatureName_BGRA8UnormStorage = 0x0000000C, + WGPUFeatureName_Float32Filterable = 0x0000000D, + WGPUFeatureName_Float32Blendable = 0x0000000E, + WGPUFeatureName_ClipDistances = 0x0000000F, + WGPUFeatureName_DualSourceBlending = 0x00000010, + WGPUFeatureName_Subgroups = 0x00000011, + WGPUFeatureName_CoreFeaturesAndLimits = 0x00000012, WGPUFeatureName_DawnInternalUsages = 0x00050000, WGPUFeatureName_DawnMultiPlanarFormats = 0x00050001, WGPUFeatureName_DawnNative = 0x00050002, @@ -482,7 +497,7 @@ typedef enum WGPUFeatureName { WGPUFeatureName_ChromiumExperimentalImmediateData = 0x00050005, WGPUFeatureName_TransientAttachments = 0x00050006, WGPUFeatureName_MSAARenderToSingleSampled = 0x00050007, - WGPUFeatureName_DualSourceBlending = 0x00050008, + WGPUFeatureName_SubgroupsF16 = 0x00050008, WGPUFeatureName_D3D11MultithreadProtected = 0x00050009, WGPUFeatureName_ANGLETextureSharing = 0x0005000A, WGPUFeatureName_PixelLocalStorageCoherent = 0x0005000B, @@ -528,9 +543,10 @@ typedef enum WGPUFeatureName { WGPUFeatureName_DawnLoadResolveTexture = 0x00050033, WGPUFeatureName_DawnPartialLoadResolveTexture = 0x00050034, WGPUFeatureName_MultiDrawIndirect = 0x00050035, - WGPUFeatureName_ClipDistances = 0x00050036, WGPUFeatureName_DawnTexelCopyBufferRowAlignment = 0x00050037, WGPUFeatureName_FlexibleTextureViews = 0x00050038, + WGPUFeatureName_ChromiumExperimentalSubgroupMatrix = 0x00050039, + WGPUFeatureName_SharedFenceEGLSync = 0x0005003A, WGPUFeatureName_Force32 = 0x7FFFFFFF } WGPUFeatureName WGPU_ENUM_ATTRIBUTE; typedef enum WGPUFilterMode { @@ -587,7 +603,7 @@ typedef enum WGPUOptionalBool { typedef enum WGPUPopErrorScopeStatus { WGPUPopErrorScopeStatus_Success = 0x00000001, WGPUPopErrorScopeStatus_InstanceDropped = 0x00000002, - WGPUPopErrorScopeStatus_EmptyStack = 0x00000003, + WGPUPopErrorScopeStatus_Error = 0x00000003, WGPUPopErrorScopeStatus_Force32 = 0x7FFFFFFF } WGPUPopErrorScopeStatus WGPU_ENUM_ATTRIBUTE; typedef enum WGPUPowerPreference { @@ -596,7 +612,13 @@ typedef enum WGPUPowerPreference { WGPUPowerPreference_HighPerformance = 0x00000002, WGPUPowerPreference_Force32 = 0x7FFFFFFF } WGPUPowerPreference WGPU_ENUM_ATTRIBUTE; +typedef enum WGPUPredefinedColorSpace { + WGPUPredefinedColorSpace_SRGB = 0x00000001, + WGPUPredefinedColorSpace_DisplayP3 = 0x00000002, + WGPUPredefinedColorSpace_Force32 = 0x7FFFFFFF +} WGPUPredefinedColorSpace WGPU_ENUM_ATTRIBUTE; typedef enum WGPUPresentMode { + WGPUPresentMode_Undefined = 0x00000000, WGPUPresentMode_Fifo = 0x00000001, WGPUPresentMode_FifoRelaxed = 0x00000002, WGPUPresentMode_Immediate = 0x00000003, @@ -646,7 +668,9 @@ typedef enum WGPUSType { WGPUSType_SurfaceSourceWaylandSurface = 0x00000007, WGPUSType_SurfaceSourceAndroidNativeWindow = 0x00000008, WGPUSType_SurfaceSourceXCBWindow = 0x00000009, - WGPUSType_AdapterPropertiesSubgroups = 0x0000000A, + WGPUSType_SurfaceColorManagement = 0x0000000A, + WGPUSType_RequestAdapterWebXROptions = 0x0000000B, + WGPUSType_AdapterPropertiesSubgroups = 0x0000000C, WGPUSType_TextureBindingViewDimensionDescriptor = 0x00020000, WGPUSType_EmscriptenSurfaceSourceCanvasHTMLSelector = 0x00040000, WGPUSType_SurfaceDescriptorFromWindowsCoreWindow = 0x00050000, @@ -709,6 +733,11 @@ typedef enum WGPUSType { WGPUSType_AHardwareBufferProperties = 0x00050039, WGPUSType_DawnExperimentalImmediateDataLimits = 0x0005003A, WGPUSType_DawnTexelCopyBufferRowAlignmentLimits = 0x0005003B, + WGPUSType_AdapterPropertiesSubgroupMatrixConfigs = 0x0005003C, + WGPUSType_SharedFenceEGLSyncDescriptor = 0x0005003D, + WGPUSType_SharedFenceEGLSyncExportInfo = 0x0005003E, + WGPUSType_DawnInjectedInvalidSType = 0x0005003F, + WGPUSType_DawnCompilationMessageUtf16 = 0x00050040, WGPUSType_Force32 = 0x7FFFFFFF } WGPUSType WGPU_ENUM_ATTRIBUTE; typedef enum WGPUSamplerBindingType { @@ -725,6 +754,7 @@ typedef enum WGPUSharedFenceType { WGPUSharedFenceType_VkSemaphoreZirconHandle = 0x00000003, WGPUSharedFenceType_DXGISharedHandle = 0x00000004, WGPUSharedFenceType_MTLSharedEvent = 0x00000005, + WGPUSharedFenceType_EGLSync = 0x00000006, WGPUSharedFenceType_Force32 = 0x7FFFFFFF } WGPUSharedFenceType WGPU_ENUM_ATTRIBUTE; typedef enum WGPUStatus { @@ -758,14 +788,20 @@ typedef enum WGPUStoreOp { WGPUStoreOp_Discard = 0x00000002, WGPUStoreOp_Force32 = 0x7FFFFFFF } WGPUStoreOp WGPU_ENUM_ATTRIBUTE; +typedef enum WGPUSubgroupMatrixComponentType { + WGPUSubgroupMatrixComponentType_F32 = 0x00000001, + WGPUSubgroupMatrixComponentType_F16 = 0x00000002, + WGPUSubgroupMatrixComponentType_U32 = 0x00000003, + WGPUSubgroupMatrixComponentType_I32 = 0x00000004, + WGPUSubgroupMatrixComponentType_Force32 = 0x7FFFFFFF +} WGPUSubgroupMatrixComponentType WGPU_ENUM_ATTRIBUTE; typedef enum WGPUSurfaceGetCurrentTextureStatus { - WGPUSurfaceGetCurrentTextureStatus_Success = 0x00000001, - WGPUSurfaceGetCurrentTextureStatus_Timeout = 0x00000002, - WGPUSurfaceGetCurrentTextureStatus_Outdated = 0x00000003, - WGPUSurfaceGetCurrentTextureStatus_Lost = 0x00000004, - WGPUSurfaceGetCurrentTextureStatus_OutOfMemory = 0x00000005, - WGPUSurfaceGetCurrentTextureStatus_DeviceLost = 0x00000006, - WGPUSurfaceGetCurrentTextureStatus_Error = 0x00000007, + WGPUSurfaceGetCurrentTextureStatus_SuccessOptimal = 0x00000001, + WGPUSurfaceGetCurrentTextureStatus_SuccessSuboptimal = 0x00000002, + WGPUSurfaceGetCurrentTextureStatus_Timeout = 0x00000003, + WGPUSurfaceGetCurrentTextureStatus_Outdated = 0x00000004, + WGPUSurfaceGetCurrentTextureStatus_Lost = 0x00000005, + WGPUSurfaceGetCurrentTextureStatus_Error = 0x00000006, WGPUSurfaceGetCurrentTextureStatus_Force32 = 0x7FFFFFFF } WGPUSurfaceGetCurrentTextureStatus WGPU_ENUM_ATTRIBUTE; typedef enum WGPUTextureAspect { @@ -918,6 +954,11 @@ typedef enum WGPUTextureViewDimension { WGPUTextureViewDimension_3D = 0x00000006, WGPUTextureViewDimension_Force32 = 0x7FFFFFFF } WGPUTextureViewDimension WGPU_ENUM_ATTRIBUTE; +typedef enum WGPUToneMappingMode { + WGPUToneMappingMode_Standard = 0x00000001, + WGPUToneMappingMode_Extended = 0x00000002, + WGPUToneMappingMode_Force32 = 0x7FFFFFFF +} WGPUToneMappingMode WGPU_ENUM_ATTRIBUTE; typedef enum WGPUVertexFormat { WGPUVertexFormat_Uint8 = 0x00000001, WGPUVertexFormat_Uint8x2 = 0x00000002, @@ -995,6 +1036,7 @@ static const WGPUColorWriteMask WGPUColorWriteMask_Blue = 0x0000000000000004; static const WGPUColorWriteMask WGPUColorWriteMask_Alpha = 0x0000000000000008; static const WGPUColorWriteMask WGPUColorWriteMask_All = 0x000000000000000F; typedef WGPUFlags WGPUHeapProperty; +static const WGPUHeapProperty WGPUHeapProperty_None = 0x0000000000000000; static const WGPUHeapProperty WGPUHeapProperty_DeviceLocal = 0x0000000000000001; static const WGPUHeapProperty WGPUHeapProperty_HostVisible = 0x0000000000000002; static const WGPUHeapProperty WGPUHeapProperty_HostCoherent = 0x0000000000000004; @@ -1024,17 +1066,17 @@ typedef void (*WGPUDawnStoreCacheDataFunction)(void const * key, size_t keySize, typedef void (*WGPUProc)(void) WGPU_FUNCTION_ATTRIBUTE; // Callback function pointers -typedef void (*WGPUBufferMapCallback)(WGPUMapAsyncStatus status, struct WGPUStringView message, void* userdata1, void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUCompilationInfoCallback)(WGPUCompilationInfoRequestStatus status, struct WGPUCompilationInfo const * compilationInfo, void* userdata1, void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUCreateComputePipelineAsyncCallback)(WGPUCreatePipelineAsyncStatus status, WGPUComputePipeline pipeline, struct WGPUStringView message, void* userdata1, void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUCreateRenderPipelineAsyncCallback)(WGPUCreatePipelineAsyncStatus status, WGPURenderPipeline pipeline, struct WGPUStringView message, void* userdata1, void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUDeviceLostCallback)(WGPUDevice const * device, WGPUDeviceLostReason reason, struct WGPUStringView message, void* userdata1, void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPULoggingCallback)(WGPULoggingType type, struct WGPUStringView message, void* userdata1, void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUPopErrorScopeCallback)(WGPUPopErrorScopeStatus status, WGPUErrorType type, struct WGPUStringView message, void* userdata1, void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUQueueWorkDoneCallback)(WGPUQueueWorkDoneStatus status, void* userdata1, void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPURequestAdapterCallback)(WGPURequestAdapterStatus status, WGPUAdapter adapter, struct WGPUStringView message, void* userdata1, void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPURequestDeviceCallback)(WGPURequestDeviceStatus status, WGPUDevice device, struct WGPUStringView message, void* userdata1, void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUUncapturedErrorCallback)(WGPUDevice const * device, WGPUErrorType type, struct WGPUStringView message, void* userdata1, void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUBufferMapCallback)(WGPUMapAsyncStatus status, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUCompilationInfoCallback)(WGPUCompilationInfoRequestStatus status, struct WGPUCompilationInfo const * compilationInfo, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUCreateComputePipelineAsyncCallback)(WGPUCreatePipelineAsyncStatus status, WGPUComputePipeline pipeline, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUCreateRenderPipelineAsyncCallback)(WGPUCreatePipelineAsyncStatus status, WGPURenderPipeline pipeline, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUDeviceLostCallback)(WGPUDevice const * device, WGPUDeviceLostReason reason, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPULoggingCallback)(WGPULoggingType type, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUPopErrorScopeCallback)(WGPUPopErrorScopeStatus status, WGPUErrorType type, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUQueueWorkDoneCallback)(WGPUQueueWorkDoneStatus status, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPURequestAdapterCallback)(WGPURequestAdapterStatus status, WGPUAdapter adapter, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPURequestDeviceCallback)(WGPURequestDeviceStatus status, WGPUDevice device, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUUncapturedErrorCallback)(WGPUDevice const * device, WGPUErrorType type, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; typedef struct WGPUChainedStruct { struct WGPUChainedStruct * next; @@ -1042,178 +1084,178 @@ typedef struct WGPUChainedStruct { } WGPUChainedStruct WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COMMA , +#define _wgpu_COMMA , typedef struct WGPUBufferMapCallbackInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUCallbackMode mode; WGPUBufferMapCallback callback; - void* userdata1; - void* userdata2; + WGPU_NULLABLE void* userdata1; + WGPU_NULLABLE void* userdata2; } WGPUBufferMapCallbackInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_BUFFER_MAP_CALLBACK_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUBufferMapCallbackInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.mode=*/{} WGPU_COMMA \ - /*.callback=*/NULL WGPU_COMMA \ - /*.userdata1=*/NULL WGPU_COMMA \ - /*.userdata2=*/NULL WGPU_COMMA \ +#define WGPU_BUFFER_MAP_CALLBACK_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBufferMapCallbackInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.mode=*/_wgpu_ENUM_ZERO_INIT(WGPUCallbackMode) _wgpu_COMMA \ + /*.callback=*/NULL _wgpu_COMMA \ + /*.userdata1=*/NULL _wgpu_COMMA \ + /*.userdata2=*/NULL _wgpu_COMMA \ }) typedef struct WGPUCompilationInfoCallbackInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUCallbackMode mode; WGPUCompilationInfoCallback callback; - void* userdata1; - void* userdata2; + WGPU_NULLABLE void* userdata1; + WGPU_NULLABLE void* userdata2; } WGPUCompilationInfoCallbackInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COMPILATION_INFO_CALLBACK_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUCompilationInfoCallbackInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.mode=*/{} WGPU_COMMA \ - /*.callback=*/NULL WGPU_COMMA \ - /*.userdata1=*/NULL WGPU_COMMA \ - /*.userdata2=*/NULL WGPU_COMMA \ +#define WGPU_COMPILATION_INFO_CALLBACK_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCompilationInfoCallbackInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.mode=*/_wgpu_ENUM_ZERO_INIT(WGPUCallbackMode) _wgpu_COMMA \ + /*.callback=*/NULL _wgpu_COMMA \ + /*.userdata1=*/NULL _wgpu_COMMA \ + /*.userdata2=*/NULL _wgpu_COMMA \ }) typedef struct WGPUCreateComputePipelineAsyncCallbackInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUCallbackMode mode; WGPUCreateComputePipelineAsyncCallback callback; - void* userdata1; - void* userdata2; + WGPU_NULLABLE void* userdata1; + WGPU_NULLABLE void* userdata2; } WGPUCreateComputePipelineAsyncCallbackInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_CREATE_COMPUTE_PIPELINE_ASYNC_CALLBACK_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUCreateComputePipelineAsyncCallbackInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.mode=*/{} WGPU_COMMA \ - /*.callback=*/NULL WGPU_COMMA \ - /*.userdata1=*/NULL WGPU_COMMA \ - /*.userdata2=*/NULL WGPU_COMMA \ +#define WGPU_CREATE_COMPUTE_PIPELINE_ASYNC_CALLBACK_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCreateComputePipelineAsyncCallbackInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.mode=*/_wgpu_ENUM_ZERO_INIT(WGPUCallbackMode) _wgpu_COMMA \ + /*.callback=*/NULL _wgpu_COMMA \ + /*.userdata1=*/NULL _wgpu_COMMA \ + /*.userdata2=*/NULL _wgpu_COMMA \ }) typedef struct WGPUCreateRenderPipelineAsyncCallbackInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUCallbackMode mode; WGPUCreateRenderPipelineAsyncCallback callback; - void* userdata1; - void* userdata2; + WGPU_NULLABLE void* userdata1; + WGPU_NULLABLE void* userdata2; } WGPUCreateRenderPipelineAsyncCallbackInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_CREATE_RENDER_PIPELINE_ASYNC_CALLBACK_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUCreateRenderPipelineAsyncCallbackInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.mode=*/{} WGPU_COMMA \ - /*.callback=*/NULL WGPU_COMMA \ - /*.userdata1=*/NULL WGPU_COMMA \ - /*.userdata2=*/NULL WGPU_COMMA \ +#define WGPU_CREATE_RENDER_PIPELINE_ASYNC_CALLBACK_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCreateRenderPipelineAsyncCallbackInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.mode=*/_wgpu_ENUM_ZERO_INIT(WGPUCallbackMode) _wgpu_COMMA \ + /*.callback=*/NULL _wgpu_COMMA \ + /*.userdata1=*/NULL _wgpu_COMMA \ + /*.userdata2=*/NULL _wgpu_COMMA \ }) typedef struct WGPUDeviceLostCallbackInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUCallbackMode mode; WGPUDeviceLostCallback callback; - void* userdata1; - void* userdata2; + WGPU_NULLABLE void* userdata1; + WGPU_NULLABLE void* userdata2; } WGPUDeviceLostCallbackInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DEVICE_LOST_CALLBACK_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUDeviceLostCallbackInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.mode=*/{} WGPU_COMMA \ - /*.callback=*/NULL WGPU_COMMA \ - /*.userdata1=*/NULL WGPU_COMMA \ - /*.userdata2=*/NULL WGPU_COMMA \ +#define WGPU_DEVICE_LOST_CALLBACK_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDeviceLostCallbackInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.mode=*/_wgpu_ENUM_ZERO_INIT(WGPUCallbackMode) _wgpu_COMMA \ + /*.callback=*/NULL _wgpu_COMMA \ + /*.userdata1=*/NULL _wgpu_COMMA \ + /*.userdata2=*/NULL _wgpu_COMMA \ }) typedef struct WGPULoggingCallbackInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPULoggingCallback callback; - void* userdata1; - void* userdata2; + WGPU_NULLABLE void* userdata1; + WGPU_NULLABLE void* userdata2; } WGPULoggingCallbackInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_LOGGING_CALLBACK_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPULoggingCallbackInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.callback=*/NULL WGPU_COMMA \ - /*.userdata1=*/NULL WGPU_COMMA \ - /*.userdata2=*/NULL WGPU_COMMA \ +#define WGPU_LOGGING_CALLBACK_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPULoggingCallbackInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.callback=*/NULL _wgpu_COMMA \ + /*.userdata1=*/NULL _wgpu_COMMA \ + /*.userdata2=*/NULL _wgpu_COMMA \ }) typedef struct WGPUPopErrorScopeCallbackInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUCallbackMode mode; WGPUPopErrorScopeCallback callback; - void* userdata1; - void* userdata2; + WGPU_NULLABLE void* userdata1; + WGPU_NULLABLE void* userdata2; } WGPUPopErrorScopeCallbackInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_POP_ERROR_SCOPE_CALLBACK_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUPopErrorScopeCallbackInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.mode=*/{} WGPU_COMMA \ - /*.callback=*/NULL WGPU_COMMA \ - /*.userdata1=*/NULL WGPU_COMMA \ - /*.userdata2=*/NULL WGPU_COMMA \ +#define WGPU_POP_ERROR_SCOPE_CALLBACK_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUPopErrorScopeCallbackInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.mode=*/_wgpu_ENUM_ZERO_INIT(WGPUCallbackMode) _wgpu_COMMA \ + /*.callback=*/NULL _wgpu_COMMA \ + /*.userdata1=*/NULL _wgpu_COMMA \ + /*.userdata2=*/NULL _wgpu_COMMA \ }) typedef struct WGPUQueueWorkDoneCallbackInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUCallbackMode mode; WGPUQueueWorkDoneCallback callback; - void* userdata1; - void* userdata2; + WGPU_NULLABLE void* userdata1; + WGPU_NULLABLE void* userdata2; } WGPUQueueWorkDoneCallbackInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_QUEUE_WORK_DONE_CALLBACK_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUQueueWorkDoneCallbackInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.mode=*/{} WGPU_COMMA \ - /*.callback=*/NULL WGPU_COMMA \ - /*.userdata1=*/NULL WGPU_COMMA \ - /*.userdata2=*/NULL WGPU_COMMA \ +#define WGPU_QUEUE_WORK_DONE_CALLBACK_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUQueueWorkDoneCallbackInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.mode=*/_wgpu_ENUM_ZERO_INIT(WGPUCallbackMode) _wgpu_COMMA \ + /*.callback=*/NULL _wgpu_COMMA \ + /*.userdata1=*/NULL _wgpu_COMMA \ + /*.userdata2=*/NULL _wgpu_COMMA \ }) typedef struct WGPURequestAdapterCallbackInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUCallbackMode mode; WGPURequestAdapterCallback callback; - void* userdata1; - void* userdata2; + WGPU_NULLABLE void* userdata1; + WGPU_NULLABLE void* userdata2; } WGPURequestAdapterCallbackInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_REQUEST_ADAPTER_CALLBACK_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPURequestAdapterCallbackInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.mode=*/{} WGPU_COMMA \ - /*.callback=*/NULL WGPU_COMMA \ - /*.userdata1=*/NULL WGPU_COMMA \ - /*.userdata2=*/NULL WGPU_COMMA \ +#define WGPU_REQUEST_ADAPTER_CALLBACK_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPURequestAdapterCallbackInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.mode=*/_wgpu_ENUM_ZERO_INIT(WGPUCallbackMode) _wgpu_COMMA \ + /*.callback=*/NULL _wgpu_COMMA \ + /*.userdata1=*/NULL _wgpu_COMMA \ + /*.userdata2=*/NULL _wgpu_COMMA \ }) typedef struct WGPURequestDeviceCallbackInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUCallbackMode mode; WGPURequestDeviceCallback callback; - void* userdata1; - void* userdata2; + WGPU_NULLABLE void* userdata1; + WGPU_NULLABLE void* userdata2; } WGPURequestDeviceCallbackInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_REQUEST_DEVICE_CALLBACK_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPURequestDeviceCallbackInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.mode=*/{} WGPU_COMMA \ - /*.callback=*/NULL WGPU_COMMA \ - /*.userdata1=*/NULL WGPU_COMMA \ - /*.userdata2=*/NULL WGPU_COMMA \ +#define WGPU_REQUEST_DEVICE_CALLBACK_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPURequestDeviceCallbackInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.mode=*/_wgpu_ENUM_ZERO_INIT(WGPUCallbackMode) _wgpu_COMMA \ + /*.callback=*/NULL _wgpu_COMMA \ + /*.userdata1=*/NULL _wgpu_COMMA \ + /*.userdata2=*/NULL _wgpu_COMMA \ }) typedef struct WGPUUncapturedErrorCallbackInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUUncapturedErrorCallback callback; - void* userdata1; - void* userdata2; + WGPU_NULLABLE void* userdata1; + WGPU_NULLABLE void* userdata2; } WGPUUncapturedErrorCallbackInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_UNCAPTURED_ERROR_CALLBACK_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUUncapturedErrorCallbackInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.callback=*/NULL WGPU_COMMA \ - /*.userdata1=*/NULL WGPU_COMMA \ - /*.userdata2=*/NULL WGPU_COMMA \ +#define WGPU_UNCAPTURED_ERROR_CALLBACK_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUUncapturedErrorCallbackInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.callback=*/NULL _wgpu_COMMA \ + /*.userdata1=*/NULL _wgpu_COMMA \ + /*.userdata2=*/NULL _wgpu_COMMA \ }) @@ -1221,8 +1263,8 @@ typedef struct WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER { WGPUBool unused; } WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_INTERNAL_HAVE_EMDAWNWEBGPU_HEADER_INIT WGPU_MAKE_INIT_STRUCT(WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER, { \ - /*.unused=*/false WGPU_COMMA \ +#define WGPU_INTERNAL_HAVE_EMDAWNWEBGPU_HEADER_INIT _wgpu_MAKE_INIT_STRUCT(WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER, { \ + /*.unused=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUAdapterInfo @@ -1231,9 +1273,12 @@ typedef struct WGPUAdapterPropertiesD3D { uint32_t shaderModel; } WGPUAdapterPropertiesD3D WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_ADAPTER_PROPERTIES_D3D_INIT WGPU_MAKE_INIT_STRUCT(WGPUAdapterPropertiesD3D, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_AdapterPropertiesD3D} WGPU_COMMA \ - /*.shaderModel=*/{} WGPU_COMMA \ +#define WGPU_ADAPTER_PROPERTIES_D3D_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAdapterPropertiesD3D, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_AdapterPropertiesD3D _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.shaderModel=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUAdapterInfo @@ -1243,10 +1288,13 @@ typedef struct WGPUAdapterPropertiesSubgroups { uint32_t subgroupMaxSize; } WGPUAdapterPropertiesSubgroups WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_ADAPTER_PROPERTIES_SUBGROUPS_INIT WGPU_MAKE_INIT_STRUCT(WGPUAdapterPropertiesSubgroups, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_AdapterPropertiesSubgroups} WGPU_COMMA \ - /*.subgroupMinSize=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.subgroupMaxSize=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ +#define WGPU_ADAPTER_PROPERTIES_SUBGROUPS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAdapterPropertiesSubgroups, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_AdapterPropertiesSubgroups _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.subgroupMinSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.subgroupMaxSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ }) // Can be chained in WGPUAdapterInfo @@ -1255,29 +1303,12 @@ typedef struct WGPUAdapterPropertiesVk { uint32_t driverVersion; } WGPUAdapterPropertiesVk WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_ADAPTER_PROPERTIES_VK_INIT WGPU_MAKE_INIT_STRUCT(WGPUAdapterPropertiesVk, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_AdapterPropertiesVk} WGPU_COMMA \ - /*.driverVersion=*/{} WGPU_COMMA \ -}) - -typedef struct WGPUBindGroupEntry { - WGPUChainedStruct* nextInChain; - uint32_t binding; - WGPU_NULLABLE WGPUBuffer buffer; - uint64_t offset; - uint64_t size; - WGPU_NULLABLE WGPUSampler sampler; - WGPU_NULLABLE WGPUTextureView textureView; -} WGPUBindGroupEntry WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_BIND_GROUP_ENTRY_INIT WGPU_MAKE_INIT_STRUCT(WGPUBindGroupEntry, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.binding=*/{} WGPU_COMMA \ - /*.buffer=*/NULL WGPU_COMMA \ - /*.offset=*/0 WGPU_COMMA \ - /*.size=*/WGPU_WHOLE_SIZE WGPU_COMMA \ - /*.sampler=*/NULL WGPU_COMMA \ - /*.textureView=*/NULL WGPU_COMMA \ +#define WGPU_ADAPTER_PROPERTIES_VK_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAdapterPropertiesVk, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_AdapterPropertiesVk _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.driverVersion=*/0 _wgpu_COMMA \ }) typedef struct WGPUBlendComponent { @@ -1286,24 +1317,24 @@ typedef struct WGPUBlendComponent { WGPUBlendFactor dstFactor; } WGPUBlendComponent WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_BLEND_COMPONENT_INIT WGPU_MAKE_INIT_STRUCT(WGPUBlendComponent, { \ - /*.operation=*/WGPUBlendOperation_Add WGPU_COMMA \ - /*.srcFactor=*/WGPUBlendFactor_One WGPU_COMMA \ - /*.dstFactor=*/WGPUBlendFactor_Zero WGPU_COMMA \ +#define WGPU_BLEND_COMPONENT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBlendComponent, { \ + /*.operation=*/WGPUBlendOperation_Undefined _wgpu_COMMA \ + /*.srcFactor=*/WGPUBlendFactor_Undefined _wgpu_COMMA \ + /*.dstFactor=*/WGPUBlendFactor_Undefined _wgpu_COMMA \ }) typedef struct WGPUBufferBindingLayout { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUBufferBindingType type; WGPUBool hasDynamicOffset; uint64_t minBindingSize; } WGPUBufferBindingLayout WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_BUFFER_BINDING_LAYOUT_INIT WGPU_MAKE_INIT_STRUCT(WGPUBufferBindingLayout, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.type=*/WGPUBufferBindingType_Uniform WGPU_COMMA \ - /*.hasDynamicOffset=*/false WGPU_COMMA \ - /*.minBindingSize=*/0 WGPU_COMMA \ +#define WGPU_BUFFER_BINDING_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBufferBindingLayout, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.type=*/WGPUBufferBindingType_Undefined _wgpu_COMMA \ + /*.hasDynamicOffset=*/0 _wgpu_COMMA \ + /*.minBindingSize=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUBufferDescriptor @@ -1314,11 +1345,14 @@ typedef struct WGPUBufferHostMappedPointer { void * userdata; } WGPUBufferHostMappedPointer WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_BUFFER_HOST_MAPPED_POINTER_INIT WGPU_MAKE_INIT_STRUCT(WGPUBufferHostMappedPointer, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_BufferHostMappedPointer} WGPU_COMMA \ - /*.pointer=*/{} WGPU_COMMA \ - /*.disposeCallback=*/{} WGPU_COMMA \ - /*.userdata=*/{} WGPU_COMMA \ +#define WGPU_BUFFER_HOST_MAPPED_POINTER_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBufferHostMappedPointer, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_BufferHostMappedPointer _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.pointer=*/NULL _wgpu_COMMA \ + /*.disposeCallback=*/NULL _wgpu_COMMA \ + /*.userdata=*/NULL _wgpu_COMMA \ }) typedef struct WGPUColor { @@ -1328,11 +1362,11 @@ typedef struct WGPUColor { double a; } WGPUColor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COLOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUColor, { \ - /*.r=*/{} WGPU_COMMA \ - /*.g=*/{} WGPU_COMMA \ - /*.b=*/{} WGPU_COMMA \ - /*.a=*/{} WGPU_COMMA \ +#define WGPU_COLOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUColor, { \ + /*.r=*/0. _wgpu_COMMA \ + /*.g=*/0. _wgpu_COMMA \ + /*.b=*/0. _wgpu_COMMA \ + /*.a=*/0. _wgpu_COMMA \ }) // Can be chained in WGPUColorTargetState @@ -1341,13 +1375,16 @@ typedef struct WGPUColorTargetStateExpandResolveTextureDawn { WGPUBool enabled; } WGPUColorTargetStateExpandResolveTextureDawn WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COLOR_TARGET_STATE_EXPAND_RESOLVE_TEXTURE_DAWN_INIT WGPU_MAKE_INIT_STRUCT(WGPUColorTargetStateExpandResolveTextureDawn, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_ColorTargetStateExpandResolveTextureDawn} WGPU_COMMA \ - /*.enabled=*/false WGPU_COMMA \ +#define WGPU_COLOR_TARGET_STATE_EXPAND_RESOLVE_TEXTURE_DAWN_INIT _wgpu_MAKE_INIT_STRUCT(WGPUColorTargetStateExpandResolveTextureDawn, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_ColorTargetStateExpandResolveTextureDawn _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.enabled=*/0 _wgpu_COMMA \ }) typedef struct WGPUCopyTextureForBrowserOptions { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUBool flipY; WGPUBool needsColorSpaceConversion; WGPUAlphaMode srcAlphaMode; @@ -1358,16 +1395,16 @@ typedef struct WGPUCopyTextureForBrowserOptions { WGPUBool internalUsage; } WGPUCopyTextureForBrowserOptions WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COPY_TEXTURE_FOR_BROWSER_OPTIONS_INIT WGPU_MAKE_INIT_STRUCT(WGPUCopyTextureForBrowserOptions, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.flipY=*/false WGPU_COMMA \ - /*.needsColorSpaceConversion=*/false WGPU_COMMA \ - /*.srcAlphaMode=*/WGPUAlphaMode_Unpremultiplied WGPU_COMMA \ - /*.srcTransferFunctionParameters=*/NULL WGPU_COMMA \ - /*.conversionMatrix=*/NULL WGPU_COMMA \ - /*.dstTransferFunctionParameters=*/NULL WGPU_COMMA \ - /*.dstAlphaMode=*/WGPUAlphaMode_Unpremultiplied WGPU_COMMA \ - /*.internalUsage=*/false WGPU_COMMA \ +#define WGPU_COPY_TEXTURE_FOR_BROWSER_OPTIONS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCopyTextureForBrowserOptions, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.flipY=*/0 _wgpu_COMMA \ + /*.needsColorSpaceConversion=*/0 _wgpu_COMMA \ + /*.srcAlphaMode=*/WGPUAlphaMode_Unpremultiplied _wgpu_COMMA \ + /*.srcTransferFunctionParameters=*/NULL _wgpu_COMMA \ + /*.conversionMatrix=*/NULL _wgpu_COMMA \ + /*.dstTransferFunctionParameters=*/NULL _wgpu_COMMA \ + /*.dstAlphaMode=*/WGPUAlphaMode_Unpremultiplied _wgpu_COMMA \ + /*.internalUsage=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUInstanceDescriptor @@ -1377,10 +1414,13 @@ typedef struct WGPUDawnWGSLBlocklist { const char* const * blocklistedFeatures; } WGPUDawnWGSLBlocklist WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_WGSL_BLOCKLIST_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnWGSLBlocklist, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnWGSLBlocklist} WGPU_COMMA \ - /*.blocklistedFeatureCount=*/0 WGPU_COMMA \ - /*.blocklistedFeatures=*/{} WGPU_COMMA \ +#define WGPU_DAWN_WGSL_BLOCKLIST_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnWGSLBlocklist, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnWGSLBlocklist _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.blocklistedFeatureCount=*/0 _wgpu_COMMA \ + /*.blocklistedFeatures=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUAdapterInfo @@ -1389,9 +1429,12 @@ typedef struct WGPUDawnAdapterPropertiesPowerPreference { WGPUPowerPreference powerPreference; } WGPUDawnAdapterPropertiesPowerPreference WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_ADAPTER_PROPERTIES_POWER_PREFERENCE_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnAdapterPropertiesPowerPreference, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnAdapterPropertiesPowerPreference} WGPU_COMMA \ - /*.powerPreference=*/WGPUPowerPreference_Undefined WGPU_COMMA \ +#define WGPU_DAWN_ADAPTER_PROPERTIES_POWER_PREFERENCE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnAdapterPropertiesPowerPreference, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnAdapterPropertiesPowerPreference _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.powerPreference=*/WGPUPowerPreference_Undefined _wgpu_COMMA \ }) // Can be chained in WGPUBufferDescriptor @@ -1400,9 +1443,30 @@ typedef struct WGPUDawnBufferDescriptorErrorInfoFromWireClient { WGPUBool outOfMemory; } WGPUDawnBufferDescriptorErrorInfoFromWireClient WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_BUFFER_DESCRIPTOR_ERROR_INFO_FROM_WIRE_CLIENT_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnBufferDescriptorErrorInfoFromWireClient, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnBufferDescriptorErrorInfoFromWireClient} WGPU_COMMA \ - /*.outOfMemory=*/false WGPU_COMMA \ +#define WGPU_DAWN_BUFFER_DESCRIPTOR_ERROR_INFO_FROM_WIRE_CLIENT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnBufferDescriptorErrorInfoFromWireClient, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnBufferDescriptorErrorInfoFromWireClient _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.outOfMemory=*/0 _wgpu_COMMA \ +}) + +// Can be chained in WGPUCompilationMessage +typedef struct WGPUDawnCompilationMessageUtf16 { + WGPUChainedStruct chain; + uint64_t linePos; + uint64_t offset; + uint64_t length; +} WGPUDawnCompilationMessageUtf16 WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_DAWN_COMPILATION_MESSAGE_UTF16_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnCompilationMessageUtf16, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnCompilationMessageUtf16 _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.linePos=*/0 _wgpu_COMMA \ + /*.offset=*/0 _wgpu_COMMA \ + /*.length=*/0 _wgpu_COMMA \ }) typedef struct WGPUDawnDrmFormatProperties { @@ -1410,9 +1474,9 @@ typedef struct WGPUDawnDrmFormatProperties { uint32_t modifierPlaneCount; } WGPUDawnDrmFormatProperties WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_DRM_FORMAT_PROPERTIES_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnDrmFormatProperties, { \ - /*.modifier=*/{} WGPU_COMMA \ - /*.modifierPlaneCount=*/{} WGPU_COMMA \ +#define WGPU_DAWN_DRM_FORMAT_PROPERTIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnDrmFormatProperties, { \ + /*.modifier=*/0 _wgpu_COMMA \ + /*.modifierPlaneCount=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUCommandEncoderDescriptor @@ -1421,41 +1485,55 @@ typedef struct WGPUDawnEncoderInternalUsageDescriptor { WGPUBool useInternalUsages; } WGPUDawnEncoderInternalUsageDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_ENCODER_INTERNAL_USAGE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnEncoderInternalUsageDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnEncoderInternalUsageDescriptor} WGPU_COMMA \ - /*.useInternalUsages=*/false WGPU_COMMA \ +#define WGPU_DAWN_ENCODER_INTERNAL_USAGE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnEncoderInternalUsageDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnEncoderInternalUsageDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.useInternalUsages=*/0 _wgpu_COMMA \ }) -// Can be chained in WGPUSupportedLimits +// Can be chained in WGPULimits typedef struct WGPUDawnExperimentalImmediateDataLimits { WGPUChainedStruct chain; uint32_t maxImmediateDataRangeByteSize; } WGPUDawnExperimentalImmediateDataLimits WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_EXPERIMENTAL_IMMEDIATE_DATA_LIMITS_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnExperimentalImmediateDataLimits, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnExperimentalImmediateDataLimits} WGPU_COMMA \ - /*.maxImmediateDataRangeByteSize=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ +#define WGPU_DAWN_EXPERIMENTAL_IMMEDIATE_DATA_LIMITS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnExperimentalImmediateDataLimits, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnExperimentalImmediateDataLimits _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.maxImmediateDataRangeByteSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ }) -// Can be chained in WGPUSupportedLimits +// Can be chained in WGPULimits typedef struct WGPUDawnExperimentalSubgroupLimits { WGPUChainedStruct chain; uint32_t minSubgroupSize; uint32_t maxSubgroupSize; } WGPUDawnExperimentalSubgroupLimits WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_EXPERIMENTAL_SUBGROUP_LIMITS_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnExperimentalSubgroupLimits, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnExperimentalSubgroupLimits} WGPU_COMMA \ - /*.minSubgroupSize=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxSubgroupSize=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ +#define WGPU_DAWN_EXPERIMENTAL_SUBGROUP_LIMITS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnExperimentalSubgroupLimits, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnExperimentalSubgroupLimits _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.minSubgroupSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxSubgroupSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ }) -typedef struct WGPUDawnFormatCapabilities { - WGPUChainedStruct* nextInChain; -} WGPUDawnFormatCapabilities WGPU_STRUCTURE_ATTRIBUTE; +typedef struct WGPUDawnInjectedInvalidSType { + WGPUChainedStruct chain; + WGPUSType invalidSType; +} WGPUDawnInjectedInvalidSType WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_FORMAT_CAPABILITIES_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnFormatCapabilities, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ +#define WGPU_DAWN_INJECTED_INVALID_S_TYPE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnInjectedInvalidSType, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnInjectedInvalidSType _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.invalidSType=*/_wgpu_ENUM_ZERO_INIT(WGPUSType) _wgpu_COMMA \ }) // Can be chained in WGPURenderPassColorAttachment @@ -1464,9 +1542,12 @@ typedef struct WGPUDawnRenderPassColorAttachmentRenderToSingleSampled { uint32_t implicitSampleCount; } WGPUDawnRenderPassColorAttachmentRenderToSingleSampled WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_RENDER_PASS_COLOR_ATTACHMENT_RENDER_TO_SINGLE_SAMPLED_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnRenderPassColorAttachmentRenderToSingleSampled, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnRenderPassColorAttachmentRenderToSingleSampled} WGPU_COMMA \ - /*.implicitSampleCount=*/1 WGPU_COMMA \ +#define WGPU_DAWN_RENDER_PASS_COLOR_ATTACHMENT_RENDER_TO_SINGLE_SAMPLED_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnRenderPassColorAttachmentRenderToSingleSampled, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnRenderPassColorAttachmentRenderToSingleSampled _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.implicitSampleCount=*/1 _wgpu_COMMA \ }) // Can be chained in WGPUShaderModuleDescriptor @@ -1475,20 +1556,26 @@ typedef struct WGPUDawnShaderModuleSPIRVOptionsDescriptor { WGPUBool allowNonUniformDerivatives; } WGPUDawnShaderModuleSPIRVOptionsDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_SHADER_MODULE_SPIRV_OPTIONS_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnShaderModuleSPIRVOptionsDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnShaderModuleSPIRVOptionsDescriptor} WGPU_COMMA \ - /*.allowNonUniformDerivatives=*/false WGPU_COMMA \ +#define WGPU_DAWN_SHADER_MODULE_SPIRV_OPTIONS_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnShaderModuleSPIRVOptionsDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnShaderModuleSPIRVOptionsDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.allowNonUniformDerivatives=*/0 _wgpu_COMMA \ }) -// Can be chained in WGPUSupportedLimits +// Can be chained in WGPULimits typedef struct WGPUDawnTexelCopyBufferRowAlignmentLimits { WGPUChainedStruct chain; uint32_t minTexelCopyBufferRowAlignment; } WGPUDawnTexelCopyBufferRowAlignmentLimits WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_TEXEL_COPY_BUFFER_ROW_ALIGNMENT_LIMITS_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnTexelCopyBufferRowAlignmentLimits, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnTexelCopyBufferRowAlignmentLimits} WGPU_COMMA \ - /*.minTexelCopyBufferRowAlignment=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ +#define WGPU_DAWN_TEXEL_COPY_BUFFER_ROW_ALIGNMENT_LIMITS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnTexelCopyBufferRowAlignmentLimits, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnTexelCopyBufferRowAlignmentLimits _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.minTexelCopyBufferRowAlignment=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ }) // Can be chained in WGPUTextureDescriptor @@ -1497,9 +1584,12 @@ typedef struct WGPUDawnTextureInternalUsageDescriptor { WGPUTextureUsage internalUsage; } WGPUDawnTextureInternalUsageDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_TEXTURE_INTERNAL_USAGE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnTextureInternalUsageDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnTextureInternalUsageDescriptor} WGPU_COMMA \ - /*.internalUsage=*/WGPUTextureUsage_None WGPU_COMMA \ +#define WGPU_DAWN_TEXTURE_INTERNAL_USAGE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnTextureInternalUsageDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnTextureInternalUsageDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.internalUsage=*/WGPUTextureUsage_None _wgpu_COMMA \ }) // Can be chained in WGPUInstanceDescriptor @@ -1513,12 +1603,15 @@ typedef struct WGPUDawnTogglesDescriptor { const char* const * disabledToggles; } WGPUDawnTogglesDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_TOGGLES_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnTogglesDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnTogglesDescriptor} WGPU_COMMA \ - /*.enabledToggleCount=*/0 WGPU_COMMA \ - /*.enabledToggles=*/{} WGPU_COMMA \ - /*.disabledToggleCount=*/0 WGPU_COMMA \ - /*.disabledToggles=*/{} WGPU_COMMA \ +#define WGPU_DAWN_TOGGLES_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnTogglesDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnTogglesDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.enabledToggleCount=*/0 _wgpu_COMMA \ + /*.enabledToggles=*/NULL _wgpu_COMMA \ + /*.disabledToggleCount=*/0 _wgpu_COMMA \ + /*.disabledToggles=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUInstanceDescriptor @@ -1529,11 +1622,14 @@ typedef struct WGPUDawnWireWGSLControl { WGPUBool enableTesting; } WGPUDawnWireWGSLControl WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_WIRE_WGSL_CONTROL_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnWireWGSLControl, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnWireWGSLControl} WGPU_COMMA \ - /*.enableExperimental=*/false WGPU_COMMA \ - /*.enableUnsafe=*/false WGPU_COMMA \ - /*.enableTesting=*/false WGPU_COMMA \ +#define WGPU_DAWN_WIRE_WGSL_CONTROL_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnWireWGSLControl, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnWireWGSLControl _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.enableExperimental=*/0 _wgpu_COMMA \ + /*.enableUnsafe=*/0 _wgpu_COMMA \ + /*.enableTesting=*/0 _wgpu_COMMA \ }) typedef struct WGPUExtent2D { @@ -1541,9 +1637,9 @@ typedef struct WGPUExtent2D { uint32_t height; } WGPUExtent2D WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_EXTENT_2D_INIT WGPU_MAKE_INIT_STRUCT(WGPUExtent2D, { \ - /*.width=*/{} WGPU_COMMA \ - /*.height=*/{} WGPU_COMMA \ +#define WGPU_EXTENT_2D_INIT _wgpu_MAKE_INIT_STRUCT(WGPUExtent2D, { \ + /*.width=*/0 _wgpu_COMMA \ + /*.height=*/0 _wgpu_COMMA \ }) typedef struct WGPUExtent3D { @@ -1552,10 +1648,10 @@ typedef struct WGPUExtent3D { uint32_t depthOrArrayLayers; } WGPUExtent3D WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_EXTENT_3D_INIT WGPU_MAKE_INIT_STRUCT(WGPUExtent3D, { \ - /*.width=*/{} WGPU_COMMA \ - /*.height=*/1 WGPU_COMMA \ - /*.depthOrArrayLayers=*/1 WGPU_COMMA \ +#define WGPU_EXTENT_3D_INIT _wgpu_MAKE_INIT_STRUCT(WGPUExtent3D, { \ + /*.width=*/0 _wgpu_COMMA \ + /*.height=*/1 _wgpu_COMMA \ + /*.depthOrArrayLayers=*/1 _wgpu_COMMA \ }) // Can be chained in WGPUBindGroupEntry @@ -1564,9 +1660,12 @@ typedef struct WGPUExternalTextureBindingEntry { WGPUExternalTexture externalTexture; } WGPUExternalTextureBindingEntry WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_EXTERNAL_TEXTURE_BINDING_ENTRY_INIT WGPU_MAKE_INIT_STRUCT(WGPUExternalTextureBindingEntry, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_ExternalTextureBindingEntry} WGPU_COMMA \ - /*.externalTexture=*/{} WGPU_COMMA \ +#define WGPU_EXTERNAL_TEXTURE_BINDING_ENTRY_INIT _wgpu_MAKE_INIT_STRUCT(WGPUExternalTextureBindingEntry, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_ExternalTextureBindingEntry _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.externalTexture=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUBindGroupLayoutEntry @@ -1574,106 +1673,31 @@ typedef struct WGPUExternalTextureBindingLayout { WGPUChainedStruct chain; } WGPUExternalTextureBindingLayout WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_EXTERNAL_TEXTURE_BINDING_LAYOUT_INIT WGPU_MAKE_INIT_STRUCT(WGPUExternalTextureBindingLayout, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_ExternalTextureBindingLayout} WGPU_COMMA \ +#define WGPU_EXTERNAL_TEXTURE_BINDING_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUExternalTextureBindingLayout, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_ExternalTextureBindingLayout _wgpu_COMMA \ + }) _wgpu_COMMA \ }) typedef struct WGPUFuture { uint64_t id; } WGPUFuture WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_FUTURE_INIT WGPU_MAKE_INIT_STRUCT(WGPUFuture, { \ - /*.id=*/{} WGPU_COMMA \ +#define WGPU_FUTURE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUFuture, { \ + /*.id=*/0 _wgpu_COMMA \ }) typedef struct WGPUInstanceCapabilities { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUBool timedWaitAnyEnable; size_t timedWaitAnyMaxCount; } WGPUInstanceCapabilities WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_INSTANCE_CAPABILITIES_INIT WGPU_MAKE_INIT_STRUCT(WGPUInstanceCapabilities, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.timedWaitAnyEnable=*/false WGPU_COMMA \ - /*.timedWaitAnyMaxCount=*/0 WGPU_COMMA \ -}) - -typedef struct WGPULimits { - uint32_t maxTextureDimension1D; - uint32_t maxTextureDimension2D; - uint32_t maxTextureDimension3D; - uint32_t maxTextureArrayLayers; - uint32_t maxBindGroups; - uint32_t maxBindGroupsPlusVertexBuffers; - uint32_t maxBindingsPerBindGroup; - uint32_t maxDynamicUniformBuffersPerPipelineLayout; - uint32_t maxDynamicStorageBuffersPerPipelineLayout; - uint32_t maxSampledTexturesPerShaderStage; - uint32_t maxSamplersPerShaderStage; - uint32_t maxStorageBuffersPerShaderStage; - uint32_t maxStorageTexturesPerShaderStage; - uint32_t maxUniformBuffersPerShaderStage; - uint64_t maxUniformBufferBindingSize; - uint64_t maxStorageBufferBindingSize; - uint32_t minUniformBufferOffsetAlignment; - uint32_t minStorageBufferOffsetAlignment; - uint32_t maxVertexBuffers; - uint64_t maxBufferSize; - uint32_t maxVertexAttributes; - uint32_t maxVertexBufferArrayStride; - uint32_t maxInterStageShaderComponents; - uint32_t maxInterStageShaderVariables; - uint32_t maxColorAttachments; - uint32_t maxColorAttachmentBytesPerSample; - uint32_t maxComputeWorkgroupStorageSize; - uint32_t maxComputeInvocationsPerWorkgroup; - uint32_t maxComputeWorkgroupSizeX; - uint32_t maxComputeWorkgroupSizeY; - uint32_t maxComputeWorkgroupSizeZ; - uint32_t maxComputeWorkgroupsPerDimension; - uint32_t maxStorageBuffersInVertexStage; - uint32_t maxStorageTexturesInVertexStage; - uint32_t maxStorageBuffersInFragmentStage; - uint32_t maxStorageTexturesInFragmentStage; -} WGPULimits WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_LIMITS_INIT WGPU_MAKE_INIT_STRUCT(WGPULimits, { \ - /*.maxTextureDimension1D=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxTextureDimension2D=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxTextureDimension3D=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxTextureArrayLayers=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxBindGroups=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxBindGroupsPlusVertexBuffers=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxBindingsPerBindGroup=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxDynamicUniformBuffersPerPipelineLayout=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxDynamicStorageBuffersPerPipelineLayout=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxSampledTexturesPerShaderStage=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxSamplersPerShaderStage=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxStorageBuffersPerShaderStage=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxStorageTexturesPerShaderStage=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxUniformBuffersPerShaderStage=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxUniformBufferBindingSize=*/WGPU_LIMIT_U64_UNDEFINED WGPU_COMMA \ - /*.maxStorageBufferBindingSize=*/WGPU_LIMIT_U64_UNDEFINED WGPU_COMMA \ - /*.minUniformBufferOffsetAlignment=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.minStorageBufferOffsetAlignment=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxVertexBuffers=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxBufferSize=*/WGPU_LIMIT_U64_UNDEFINED WGPU_COMMA \ - /*.maxVertexAttributes=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxVertexBufferArrayStride=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxInterStageShaderComponents=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxInterStageShaderVariables=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxColorAttachments=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxColorAttachmentBytesPerSample=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxComputeWorkgroupStorageSize=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxComputeInvocationsPerWorkgroup=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxComputeWorkgroupSizeX=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxComputeWorkgroupSizeY=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxComputeWorkgroupSizeZ=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxComputeWorkgroupsPerDimension=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxStorageBuffersInVertexStage=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxStorageTexturesInVertexStage=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxStorageBuffersInFragmentStage=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ - /*.maxStorageTexturesInFragmentStage=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ +#define WGPU_INSTANCE_CAPABILITIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUInstanceCapabilities, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.timedWaitAnyEnable=*/0 _wgpu_COMMA \ + /*.timedWaitAnyMaxCount=*/0 _wgpu_COMMA \ }) typedef struct WGPUMemoryHeapInfo { @@ -1681,23 +1705,23 @@ typedef struct WGPUMemoryHeapInfo { uint64_t size; } WGPUMemoryHeapInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_MEMORY_HEAP_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUMemoryHeapInfo, { \ - /*.properties=*/{} WGPU_COMMA \ - /*.size=*/{} WGPU_COMMA \ +#define WGPU_MEMORY_HEAP_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUMemoryHeapInfo, { \ + /*.properties=*/WGPUHeapProperty_None _wgpu_COMMA \ + /*.size=*/0 _wgpu_COMMA \ }) typedef struct WGPUMultisampleState { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; uint32_t count; uint32_t mask; WGPUBool alphaToCoverageEnabled; } WGPUMultisampleState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_MULTISAMPLE_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUMultisampleState, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.count=*/1 WGPU_COMMA \ - /*.mask=*/0xFFFFFFFF WGPU_COMMA \ - /*.alphaToCoverageEnabled=*/false WGPU_COMMA \ +#define WGPU_MULTISAMPLE_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUMultisampleState, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.count=*/1 _wgpu_COMMA \ + /*.mask=*/0xFFFFFFFF _wgpu_COMMA \ + /*.alphaToCoverageEnabled=*/0 _wgpu_COMMA \ }) typedef struct WGPUOrigin2D { @@ -1705,9 +1729,9 @@ typedef struct WGPUOrigin2D { uint32_t y; } WGPUOrigin2D WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_ORIGIN_2D_INIT WGPU_MAKE_INIT_STRUCT(WGPUOrigin2D, { \ - /*.x=*/0 WGPU_COMMA \ - /*.y=*/0 WGPU_COMMA \ +#define WGPU_ORIGIN_2D_INIT _wgpu_MAKE_INIT_STRUCT(WGPUOrigin2D, { \ + /*.x=*/0 _wgpu_COMMA \ + /*.y=*/0 _wgpu_COMMA \ }) typedef struct WGPUOrigin3D { @@ -1716,38 +1740,40 @@ typedef struct WGPUOrigin3D { uint32_t z; } WGPUOrigin3D WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_ORIGIN_3D_INIT WGPU_MAKE_INIT_STRUCT(WGPUOrigin3D, { \ - /*.x=*/0 WGPU_COMMA \ - /*.y=*/0 WGPU_COMMA \ - /*.z=*/0 WGPU_COMMA \ +#define WGPU_ORIGIN_3D_INIT _wgpu_MAKE_INIT_STRUCT(WGPUOrigin3D, { \ + /*.x=*/0 _wgpu_COMMA \ + /*.y=*/0 _wgpu_COMMA \ + /*.z=*/0 _wgpu_COMMA \ }) typedef struct WGPUPassTimestampWrites { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUQuerySet querySet; uint32_t beginningOfPassWriteIndex; uint32_t endOfPassWriteIndex; } WGPUPassTimestampWrites WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_PASS_TIMESTAMP_WRITES_INIT WGPU_MAKE_INIT_STRUCT(WGPUPassTimestampWrites, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.querySet=*/{} WGPU_COMMA \ - /*.beginningOfPassWriteIndex=*/WGPU_QUERY_SET_INDEX_UNDEFINED WGPU_COMMA \ - /*.endOfPassWriteIndex=*/WGPU_QUERY_SET_INDEX_UNDEFINED WGPU_COMMA \ +#define WGPU_PASS_TIMESTAMP_WRITES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUPassTimestampWrites, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.querySet=*/NULL _wgpu_COMMA \ + /*.beginningOfPassWriteIndex=*/WGPU_QUERY_SET_INDEX_UNDEFINED _wgpu_COMMA \ + /*.endOfPassWriteIndex=*/WGPU_QUERY_SET_INDEX_UNDEFINED _wgpu_COMMA \ }) typedef struct WGPUPipelineLayoutStorageAttachment { + WGPUChainedStruct * nextInChain; uint64_t offset; WGPUTextureFormat format; } WGPUPipelineLayoutStorageAttachment WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_PIPELINE_LAYOUT_STORAGE_ATTACHMENT_INIT WGPU_MAKE_INIT_STRUCT(WGPUPipelineLayoutStorageAttachment, { \ - /*.offset=*/0 WGPU_COMMA \ - /*.format=*/{} WGPU_COMMA \ +#define WGPU_PIPELINE_LAYOUT_STORAGE_ATTACHMENT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUPipelineLayoutStorageAttachment, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.offset=*/0 _wgpu_COMMA \ + /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ }) typedef struct WGPUPrimitiveState { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUPrimitiveTopology topology; WGPUIndexFormat stripIndexFormat; WGPUFrontFace frontFace; @@ -1755,17 +1781,17 @@ typedef struct WGPUPrimitiveState { WGPUBool unclippedDepth; } WGPUPrimitiveState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_PRIMITIVE_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUPrimitiveState, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.topology=*/WGPUPrimitiveTopology_TriangleList WGPU_COMMA \ - /*.stripIndexFormat=*/WGPUIndexFormat_Undefined WGPU_COMMA \ - /*.frontFace=*/WGPUFrontFace_CCW WGPU_COMMA \ - /*.cullMode=*/WGPUCullMode_None WGPU_COMMA \ - /*.unclippedDepth=*/false WGPU_COMMA \ +#define WGPU_PRIMITIVE_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUPrimitiveState, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.topology=*/WGPUPrimitiveTopology_Undefined _wgpu_COMMA \ + /*.stripIndexFormat=*/WGPUIndexFormat_Undefined _wgpu_COMMA \ + /*.frontFace=*/WGPUFrontFace_Undefined _wgpu_COMMA \ + /*.cullMode=*/WGPUCullMode_Undefined _wgpu_COMMA \ + /*.unclippedDepth=*/0 _wgpu_COMMA \ }) typedef struct WGPURenderPassDepthStencilAttachment { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUTextureView view; WGPULoadOp depthLoadOp; WGPUStoreOp depthStoreOp; @@ -1777,17 +1803,17 @@ typedef struct WGPURenderPassDepthStencilAttachment { WGPUBool stencilReadOnly; } WGPURenderPassDepthStencilAttachment WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_RENDER_PASS_DEPTH_STENCIL_ATTACHMENT_INIT WGPU_MAKE_INIT_STRUCT(WGPURenderPassDepthStencilAttachment, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.view=*/{} WGPU_COMMA \ - /*.depthLoadOp=*/WGPULoadOp_Undefined WGPU_COMMA \ - /*.depthStoreOp=*/WGPUStoreOp_Undefined WGPU_COMMA \ - /*.depthClearValue=*/NAN WGPU_COMMA \ - /*.depthReadOnly=*/false WGPU_COMMA \ - /*.stencilLoadOp=*/WGPULoadOp_Undefined WGPU_COMMA \ - /*.stencilStoreOp=*/WGPUStoreOp_Undefined WGPU_COMMA \ - /*.stencilClearValue=*/0 WGPU_COMMA \ - /*.stencilReadOnly=*/false WGPU_COMMA \ +#define WGPU_RENDER_PASS_DEPTH_STENCIL_ATTACHMENT_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderPassDepthStencilAttachment, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.view=*/NULL _wgpu_COMMA \ + /*.depthLoadOp=*/WGPULoadOp_Undefined _wgpu_COMMA \ + /*.depthStoreOp=*/WGPUStoreOp_Undefined _wgpu_COMMA \ + /*.depthClearValue=*/WGPU_DEPTH_CLEAR_VALUE_UNDEFINED _wgpu_COMMA \ + /*.depthReadOnly=*/0 _wgpu_COMMA \ + /*.stencilLoadOp=*/WGPULoadOp_Undefined _wgpu_COMMA \ + /*.stencilStoreOp=*/WGPUStoreOp_Undefined _wgpu_COMMA \ + /*.stencilClearValue=*/0 _wgpu_COMMA \ + /*.stencilReadOnly=*/0 _wgpu_COMMA \ }) // Can be chained in WGPURenderPassDescriptor @@ -1799,12 +1825,15 @@ typedef struct WGPURenderPassDescriptorExpandResolveRect { uint32_t height; } WGPURenderPassDescriptorExpandResolveRect WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_RENDER_PASS_DESCRIPTOR_EXPAND_RESOLVE_RECT_INIT WGPU_MAKE_INIT_STRUCT(WGPURenderPassDescriptorExpandResolveRect, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_RenderPassDescriptorExpandResolveRect} WGPU_COMMA \ - /*.x=*/{} WGPU_COMMA \ - /*.y=*/{} WGPU_COMMA \ - /*.width=*/{} WGPU_COMMA \ - /*.height=*/{} WGPU_COMMA \ +#define WGPU_RENDER_PASS_DESCRIPTOR_EXPAND_RESOLVE_RECT_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderPassDescriptorExpandResolveRect, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_RenderPassDescriptorExpandResolveRect _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.x=*/0 _wgpu_COMMA \ + /*.y=*/0 _wgpu_COMMA \ + /*.width=*/0 _wgpu_COMMA \ + /*.height=*/0 _wgpu_COMMA \ }) // Can be chained in WGPURenderPassDescriptor @@ -1813,37 +1842,36 @@ typedef struct WGPURenderPassMaxDrawCount { uint64_t maxDrawCount; } WGPURenderPassMaxDrawCount WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_RENDER_PASS_MAX_DRAW_COUNT_INIT WGPU_MAKE_INIT_STRUCT(WGPURenderPassMaxDrawCount, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_RenderPassMaxDrawCount} WGPU_COMMA \ - /*.maxDrawCount=*/50000000 WGPU_COMMA \ +#define WGPU_RENDER_PASS_MAX_DRAW_COUNT_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderPassMaxDrawCount, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_RenderPassMaxDrawCount _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.maxDrawCount=*/50000000 _wgpu_COMMA \ }) -typedef struct WGPURequestAdapterOptions { - WGPUChainedStruct* nextInChain; - WGPU_NULLABLE WGPUSurface compatibleSurface; - WGPUFeatureLevel featureLevel; - WGPUPowerPreference powerPreference; - WGPUBackendType backendType; - WGPUBool forceFallbackAdapter; -} WGPURequestAdapterOptions WGPU_STRUCTURE_ATTRIBUTE; +// Can be chained in WGPURequestAdapterOptions +typedef struct WGPURequestAdapterWebXROptions { + WGPUChainedStruct chain; + WGPUBool xrCompatible; +} WGPURequestAdapterWebXROptions WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_REQUEST_ADAPTER_OPTIONS_INIT WGPU_MAKE_INIT_STRUCT(WGPURequestAdapterOptions, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.compatibleSurface=*/NULL WGPU_COMMA \ - /*.featureLevel=*/WGPUFeatureLevel_Core WGPU_COMMA \ - /*.powerPreference=*/WGPUPowerPreference_Undefined WGPU_COMMA \ - /*.backendType=*/WGPUBackendType_Undefined WGPU_COMMA \ - /*.forceFallbackAdapter=*/false WGPU_COMMA \ +#define WGPU_REQUEST_ADAPTER_WEBXR_OPTIONS_INIT _wgpu_MAKE_INIT_STRUCT(WGPURequestAdapterWebXROptions, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_RequestAdapterWebXROptions _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.xrCompatible=*/0 _wgpu_COMMA \ }) typedef struct WGPUSamplerBindingLayout { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUSamplerBindingType type; } WGPUSamplerBindingLayout WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SAMPLER_BINDING_LAYOUT_INIT WGPU_MAKE_INIT_STRUCT(WGPUSamplerBindingLayout, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.type=*/WGPUSamplerBindingType_Filtering WGPU_COMMA \ +#define WGPU_SAMPLER_BINDING_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSamplerBindingLayout, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.type=*/WGPUSamplerBindingType_Undefined _wgpu_COMMA \ }) // Can be chained in WGPUShaderModuleDescriptor @@ -1852,9 +1880,12 @@ typedef struct WGPUShaderModuleCompilationOptions { WGPUBool strictMath; } WGPUShaderModuleCompilationOptions WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHADER_MODULE_COMPILATION_OPTIONS_INIT WGPU_MAKE_INIT_STRUCT(WGPUShaderModuleCompilationOptions, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_ShaderModuleCompilationOptions} WGPU_COMMA \ - /*.strictMath=*/{} WGPU_COMMA \ +#define WGPU_SHADER_MODULE_COMPILATION_OPTIONS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUShaderModuleCompilationOptions, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_ShaderModuleCompilationOptions _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.strictMath=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUShaderModuleDescriptor @@ -1864,54 +1895,57 @@ typedef struct WGPUShaderSourceSPIRV { uint32_t const * code; } WGPUShaderSourceSPIRV WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHADER_SOURCE_SPIRV_INIT WGPU_MAKE_INIT_STRUCT(WGPUShaderSourceSPIRV, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_ShaderSourceSPIRV} WGPU_COMMA \ - /*.codeSize=*/{} WGPU_COMMA \ - /*.code=*/{} WGPU_COMMA \ +#define WGPU_SHADER_SOURCE_SPIRV_INIT _wgpu_MAKE_INIT_STRUCT(WGPUShaderSourceSPIRV, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_ShaderSourceSPIRV _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.codeSize=*/0 _wgpu_COMMA \ + /*.code=*/NULL _wgpu_COMMA \ }) typedef struct WGPUSharedBufferMemoryBeginAccessDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUBool initialized; size_t fenceCount; WGPUSharedFence const * fences; uint64_t const * signaledValues; } WGPUSharedBufferMemoryBeginAccessDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_BUFFER_MEMORY_BEGIN_ACCESS_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryBeginAccessDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.initialized=*/{} WGPU_COMMA \ - /*.fenceCount=*/0 WGPU_COMMA \ - /*.fences=*/{} WGPU_COMMA \ - /*.signaledValues=*/{} WGPU_COMMA \ +#define WGPU_SHARED_BUFFER_MEMORY_BEGIN_ACCESS_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryBeginAccessDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.initialized=*/0 _wgpu_COMMA \ + /*.fenceCount=*/0 _wgpu_COMMA \ + /*.fences=*/NULL _wgpu_COMMA \ + /*.signaledValues=*/NULL _wgpu_COMMA \ }) typedef struct WGPUSharedBufferMemoryEndAccessState { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUBool initialized; size_t fenceCount; WGPUSharedFence const * fences; uint64_t const * signaledValues; } WGPUSharedBufferMemoryEndAccessState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_BUFFER_MEMORY_END_ACCESS_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryEndAccessState, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.initialized=*/{} WGPU_COMMA \ - /*.fenceCount=*/0 WGPU_COMMA \ - /*.fences=*/{} WGPU_COMMA \ - /*.signaledValues=*/{} WGPU_COMMA \ +#define WGPU_SHARED_BUFFER_MEMORY_END_ACCESS_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryEndAccessState, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.initialized=*/0 _wgpu_COMMA \ + /*.fenceCount=*/0 _wgpu_COMMA \ + /*.fences=*/NULL _wgpu_COMMA \ + /*.signaledValues=*/NULL _wgpu_COMMA \ }) typedef struct WGPUSharedBufferMemoryProperties { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUBufferUsage usage; uint64_t size; } WGPUSharedBufferMemoryProperties WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_BUFFER_MEMORY_PROPERTIES_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryProperties, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.usage=*/{} WGPU_COMMA \ - /*.size=*/{} WGPU_COMMA \ +#define WGPU_SHARED_BUFFER_MEMORY_PROPERTIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryProperties, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.usage=*/WGPUBufferUsage_None _wgpu_COMMA \ + /*.size=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedFenceDescriptor @@ -1920,9 +1954,12 @@ typedef struct WGPUSharedFenceDXGISharedHandleDescriptor { void * handle; } WGPUSharedFenceDXGISharedHandleDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_FENCE_DXGI_SHARED_HANDLE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceDXGISharedHandleDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedFenceDXGISharedHandleDescriptor} WGPU_COMMA \ - /*.handle=*/{} WGPU_COMMA \ +#define WGPU_SHARED_FENCE_DXGI_SHARED_HANDLE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceDXGISharedHandleDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceDXGISharedHandleDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.handle=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSharedFenceExportInfo @@ -1931,9 +1968,40 @@ typedef struct WGPUSharedFenceDXGISharedHandleExportInfo { void * handle; } WGPUSharedFenceDXGISharedHandleExportInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_FENCE_DXGI_SHARED_HANDLE_EXPORT_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceDXGISharedHandleExportInfo, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedFenceDXGISharedHandleExportInfo} WGPU_COMMA \ - /*.handle=*/{} WGPU_COMMA \ +#define WGPU_SHARED_FENCE_DXGI_SHARED_HANDLE_EXPORT_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceDXGISharedHandleExportInfo, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceDXGISharedHandleExportInfo _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.handle=*/NULL _wgpu_COMMA \ +}) + +// Can be chained in WGPUSharedFenceDescriptor +typedef struct WGPUSharedFenceEGLSyncDescriptor { + WGPUChainedStruct chain; + void * sync; +} WGPUSharedFenceEGLSyncDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHARED_FENCE_EGL_SYNC_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceEGLSyncDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceEGLSyncDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.sync=*/NULL _wgpu_COMMA \ +}) + +// Can be chained in WGPUSharedFenceExportInfo +typedef struct WGPUSharedFenceEGLSyncExportInfo { + WGPUChainedStruct chain; + void * sync; +} WGPUSharedFenceEGLSyncExportInfo WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHARED_FENCE_EGL_SYNC_EXPORT_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceEGLSyncExportInfo, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceEGLSyncExportInfo _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.sync=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSharedFenceDescriptor @@ -1942,9 +2010,12 @@ typedef struct WGPUSharedFenceMTLSharedEventDescriptor { void * sharedEvent; } WGPUSharedFenceMTLSharedEventDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_FENCE_MTL_SHARED_EVENT_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceMTLSharedEventDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedFenceMTLSharedEventDescriptor} WGPU_COMMA \ - /*.sharedEvent=*/{} WGPU_COMMA \ +#define WGPU_SHARED_FENCE_MTL_SHARED_EVENT_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceMTLSharedEventDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceMTLSharedEventDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.sharedEvent=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSharedFenceExportInfo @@ -1953,19 +2024,12 @@ typedef struct WGPUSharedFenceMTLSharedEventExportInfo { void * sharedEvent; } WGPUSharedFenceMTLSharedEventExportInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_FENCE_MTL_SHARED_EVENT_EXPORT_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceMTLSharedEventExportInfo, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedFenceMTLSharedEventExportInfo} WGPU_COMMA \ - /*.sharedEvent=*/{} WGPU_COMMA \ -}) - -typedef struct WGPUSharedFenceExportInfo { - WGPUChainedStruct* nextInChain; - WGPUSharedFenceType type; -} WGPUSharedFenceExportInfo WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_SHARED_FENCE_EXPORT_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceExportInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.type=*/{} WGPU_COMMA \ +#define WGPU_SHARED_FENCE_MTL_SHARED_EVENT_EXPORT_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceMTLSharedEventExportInfo, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceMTLSharedEventExportInfo _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.sharedEvent=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSharedFenceDescriptor @@ -1974,9 +2038,12 @@ typedef struct WGPUSharedFenceSyncFDDescriptor { int handle; } WGPUSharedFenceSyncFDDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_FENCE_SYNC_FD_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceSyncFDDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedFenceSyncFDDescriptor} WGPU_COMMA \ - /*.handle=*/{} WGPU_COMMA \ +#define WGPU_SHARED_FENCE_SYNC_FD_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceSyncFDDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceSyncFDDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.handle=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedFenceExportInfo @@ -1985,9 +2052,12 @@ typedef struct WGPUSharedFenceSyncFDExportInfo { int handle; } WGPUSharedFenceSyncFDExportInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_FENCE_SYNC_FD_EXPORT_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceSyncFDExportInfo, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedFenceSyncFDExportInfo} WGPU_COMMA \ - /*.handle=*/{} WGPU_COMMA \ +#define WGPU_SHARED_FENCE_SYNC_FD_EXPORT_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceSyncFDExportInfo, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceSyncFDExportInfo _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.handle=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedFenceDescriptor @@ -1996,9 +2066,12 @@ typedef struct WGPUSharedFenceVkSemaphoreOpaqueFDDescriptor { int handle; } WGPUSharedFenceVkSemaphoreOpaqueFDDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_FENCE_VK_SEMAPHORE_OPAQUE_FD_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceVkSemaphoreOpaqueFDDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedFenceVkSemaphoreOpaqueFDDescriptor} WGPU_COMMA \ - /*.handle=*/{} WGPU_COMMA \ +#define WGPU_SHARED_FENCE_VK_SEMAPHORE_OPAQUE_FD_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceVkSemaphoreOpaqueFDDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceVkSemaphoreOpaqueFDDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.handle=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedFenceExportInfo @@ -2007,9 +2080,12 @@ typedef struct WGPUSharedFenceVkSemaphoreOpaqueFDExportInfo { int handle; } WGPUSharedFenceVkSemaphoreOpaqueFDExportInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_FENCE_VK_SEMAPHORE_OPAQUE_FD_EXPORT_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceVkSemaphoreOpaqueFDExportInfo, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedFenceVkSemaphoreOpaqueFDExportInfo} WGPU_COMMA \ - /*.handle=*/{} WGPU_COMMA \ +#define WGPU_SHARED_FENCE_VK_SEMAPHORE_OPAQUE_FD_EXPORT_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceVkSemaphoreOpaqueFDExportInfo, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceVkSemaphoreOpaqueFDExportInfo _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.handle=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedFenceDescriptor @@ -2018,9 +2094,12 @@ typedef struct WGPUSharedFenceVkSemaphoreZirconHandleDescriptor { uint32_t handle; } WGPUSharedFenceVkSemaphoreZirconHandleDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_FENCE_VK_SEMAPHORE_ZIRCON_HANDLE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceVkSemaphoreZirconHandleDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedFenceVkSemaphoreZirconHandleDescriptor} WGPU_COMMA \ - /*.handle=*/{} WGPU_COMMA \ +#define WGPU_SHARED_FENCE_VK_SEMAPHORE_ZIRCON_HANDLE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceVkSemaphoreZirconHandleDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceVkSemaphoreZirconHandleDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.handle=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedFenceExportInfo @@ -2029,9 +2108,12 @@ typedef struct WGPUSharedFenceVkSemaphoreZirconHandleExportInfo { uint32_t handle; } WGPUSharedFenceVkSemaphoreZirconHandleExportInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_FENCE_VK_SEMAPHORE_ZIRCON_HANDLE_EXPORT_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceVkSemaphoreZirconHandleExportInfo, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedFenceVkSemaphoreZirconHandleExportInfo} WGPU_COMMA \ - /*.handle=*/{} WGPU_COMMA \ +#define WGPU_SHARED_FENCE_VK_SEMAPHORE_ZIRCON_HANDLE_EXPORT_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceVkSemaphoreZirconHandleExportInfo, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedFenceVkSemaphoreZirconHandleExportInfo _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.handle=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryBeginAccessDescriptor @@ -2040,9 +2122,12 @@ typedef struct WGPUSharedTextureMemoryD3DSwapchainBeginState { WGPUBool isSwapchain; } WGPUSharedTextureMemoryD3DSwapchainBeginState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_D3D_SWAPCHAIN_BEGIN_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryD3DSwapchainBeginState, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryD3DSwapchainBeginState} WGPU_COMMA \ - /*.isSwapchain=*/false WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_D3D_SWAPCHAIN_BEGIN_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryD3DSwapchainBeginState, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryD3DSwapchainBeginState _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.isSwapchain=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor @@ -2052,10 +2137,13 @@ typedef struct WGPUSharedTextureMemoryDXGISharedHandleDescriptor { WGPUBool useKeyedMutex; } WGPUSharedTextureMemoryDXGISharedHandleDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_DXGI_SHARED_HANDLE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryDXGISharedHandleDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryDXGISharedHandleDescriptor} WGPU_COMMA \ - /*.handle=*/{} WGPU_COMMA \ - /*.useKeyedMutex=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_DXGI_SHARED_HANDLE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryDXGISharedHandleDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryDXGISharedHandleDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.handle=*/NULL _wgpu_COMMA \ + /*.useKeyedMutex=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor @@ -2064,20 +2152,28 @@ typedef struct WGPUSharedTextureMemoryEGLImageDescriptor { void * image; } WGPUSharedTextureMemoryEGLImageDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_EGL_IMAGE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryEGLImageDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryEGLImageDescriptor} WGPU_COMMA \ - /*.image=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_EGL_IMAGE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryEGLImageDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryEGLImageDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.image=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor typedef struct WGPUSharedTextureMemoryIOSurfaceDescriptor { WGPUChainedStruct chain; void * ioSurface; + WGPUBool allowStorageBinding; } WGPUSharedTextureMemoryIOSurfaceDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_IO_SURFACE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryIOSurfaceDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryIOSurfaceDescriptor} WGPU_COMMA \ - /*.ioSurface=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_IO_SURFACE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryIOSurfaceDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryIOSurfaceDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.ioSurface=*/NULL _wgpu_COMMA \ + /*.allowStorageBinding=*/1 _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor @@ -2087,28 +2183,13 @@ typedef struct WGPUSharedTextureMemoryAHardwareBufferDescriptor { WGPUBool useExternalFormat; } WGPUSharedTextureMemoryAHardwareBufferDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_A_HARDWARE_BUFFER_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryAHardwareBufferDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryAHardwareBufferDescriptor} WGPU_COMMA \ - /*.handle=*/{} WGPU_COMMA \ - /*.useExternalFormat=*/{} WGPU_COMMA \ -}) - -typedef struct WGPUSharedTextureMemoryBeginAccessDescriptor { - WGPUChainedStruct* nextInChain; - WGPUBool concurrentRead; - WGPUBool initialized; - size_t fenceCount; - WGPUSharedFence const * fences; - uint64_t const * signaledValues; -} WGPUSharedTextureMemoryBeginAccessDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_SHARED_TEXTURE_MEMORY_BEGIN_ACCESS_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryBeginAccessDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.concurrentRead=*/{} WGPU_COMMA \ - /*.initialized=*/{} WGPU_COMMA \ - /*.fenceCount=*/{} WGPU_COMMA \ - /*.fences=*/{} WGPU_COMMA \ - /*.signaledValues=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_A_HARDWARE_BUFFER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryAHardwareBufferDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryAHardwareBufferDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.handle=*/NULL _wgpu_COMMA \ + /*.useExternalFormat=*/0 _wgpu_COMMA \ }) typedef struct WGPUSharedTextureMemoryDmaBufPlane { @@ -2117,26 +2198,10 @@ typedef struct WGPUSharedTextureMemoryDmaBufPlane { uint32_t stride; } WGPUSharedTextureMemoryDmaBufPlane WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_DMA_BUF_PLANE_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryDmaBufPlane, { \ - /*.fd=*/{} WGPU_COMMA \ - /*.offset=*/{} WGPU_COMMA \ - /*.stride=*/{} WGPU_COMMA \ -}) - -typedef struct WGPUSharedTextureMemoryEndAccessState { - WGPUChainedStruct* nextInChain; - WGPUBool initialized; - size_t fenceCount; - WGPUSharedFence const * fences; - uint64_t const * signaledValues; -} WGPUSharedTextureMemoryEndAccessState WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_SHARED_TEXTURE_MEMORY_END_ACCESS_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryEndAccessState, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.initialized=*/{} WGPU_COMMA \ - /*.fenceCount=*/{} WGPU_COMMA \ - /*.fences=*/{} WGPU_COMMA \ - /*.signaledValues=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_DMA_BUF_PLANE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryDmaBufPlane, { \ + /*.fd=*/0 _wgpu_COMMA \ + /*.offset=*/0 _wgpu_COMMA \ + /*.stride=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor @@ -2149,13 +2214,16 @@ typedef struct WGPUSharedTextureMemoryOpaqueFDDescriptor { WGPUBool dedicatedAllocation; } WGPUSharedTextureMemoryOpaqueFDDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_OPAQUE_FD_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryOpaqueFDDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryOpaqueFDDescriptor} WGPU_COMMA \ - /*.vkImageCreateInfo=*/{} WGPU_COMMA \ - /*.memoryFD=*/{} WGPU_COMMA \ - /*.memoryTypeIndex=*/{} WGPU_COMMA \ - /*.allocationSize=*/{} WGPU_COMMA \ - /*.dedicatedAllocation=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_OPAQUE_FD_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryOpaqueFDDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryOpaqueFDDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.vkImageCreateInfo=*/NULL _wgpu_COMMA \ + /*.memoryFD=*/0 _wgpu_COMMA \ + /*.memoryTypeIndex=*/0 _wgpu_COMMA \ + /*.allocationSize=*/0 _wgpu_COMMA \ + /*.dedicatedAllocation=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor @@ -2164,9 +2232,12 @@ typedef struct WGPUSharedTextureMemoryVkDedicatedAllocationDescriptor { WGPUBool dedicatedAllocation; } WGPUSharedTextureMemoryVkDedicatedAllocationDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_VK_DEDICATED_ALLOCATION_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryVkDedicatedAllocationDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryVkDedicatedAllocationDescriptor} WGPU_COMMA \ - /*.dedicatedAllocation=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_VK_DEDICATED_ALLOCATION_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryVkDedicatedAllocationDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryVkDedicatedAllocationDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.dedicatedAllocation=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryBeginAccessDescriptor @@ -2176,10 +2247,13 @@ typedef struct WGPUSharedTextureMemoryVkImageLayoutBeginState { int32_t newLayout; } WGPUSharedTextureMemoryVkImageLayoutBeginState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_VK_IMAGE_LAYOUT_BEGIN_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryVkImageLayoutBeginState, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryVkImageLayoutBeginState} WGPU_COMMA \ - /*.oldLayout=*/{} WGPU_COMMA \ - /*.newLayout=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_VK_IMAGE_LAYOUT_BEGIN_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryVkImageLayoutBeginState, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryVkImageLayoutBeginState _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.oldLayout=*/0 _wgpu_COMMA \ + /*.newLayout=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryEndAccessState @@ -2189,10 +2263,13 @@ typedef struct WGPUSharedTextureMemoryVkImageLayoutEndState { int32_t newLayout; } WGPUSharedTextureMemoryVkImageLayoutEndState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_VK_IMAGE_LAYOUT_END_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryVkImageLayoutEndState, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryVkImageLayoutEndState} WGPU_COMMA \ - /*.oldLayout=*/{} WGPU_COMMA \ - /*.newLayout=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_VK_IMAGE_LAYOUT_END_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryVkImageLayoutEndState, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryVkImageLayoutEndState _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.oldLayout=*/0 _wgpu_COMMA \ + /*.newLayout=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor @@ -2202,10 +2279,13 @@ typedef struct WGPUSharedTextureMemoryZirconHandleDescriptor { uint64_t allocationSize; } WGPUSharedTextureMemoryZirconHandleDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_ZIRCON_HANDLE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryZirconHandleDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryZirconHandleDescriptor} WGPU_COMMA \ - /*.memoryFD=*/{} WGPU_COMMA \ - /*.allocationSize=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_ZIRCON_HANDLE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryZirconHandleDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryZirconHandleDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.memoryFD=*/0 _wgpu_COMMA \ + /*.allocationSize=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUBindGroupLayoutEntry @@ -2215,10 +2295,13 @@ typedef struct WGPUStaticSamplerBindingLayout { uint32_t sampledTextureBinding; } WGPUStaticSamplerBindingLayout WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_STATIC_SAMPLER_BINDING_LAYOUT_INIT WGPU_MAKE_INIT_STRUCT(WGPUStaticSamplerBindingLayout, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_StaticSamplerBindingLayout} WGPU_COMMA \ - /*.sampler=*/{} WGPU_COMMA \ - /*.sampledTextureBinding=*/WGPU_LIMIT_U32_UNDEFINED WGPU_COMMA \ +#define WGPU_STATIC_SAMPLER_BINDING_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUStaticSamplerBindingLayout, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_StaticSamplerBindingLayout _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.sampler=*/NULL _wgpu_COMMA \ + /*.sampledTextureBinding=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ }) typedef struct WGPUStencilFaceState { @@ -2228,25 +2311,25 @@ typedef struct WGPUStencilFaceState { WGPUStencilOperation passOp; } WGPUStencilFaceState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_STENCIL_FACE_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUStencilFaceState, { \ - /*.compare=*/WGPUCompareFunction_Always WGPU_COMMA \ - /*.failOp=*/WGPUStencilOperation_Keep WGPU_COMMA \ - /*.depthFailOp=*/WGPUStencilOperation_Keep WGPU_COMMA \ - /*.passOp=*/WGPUStencilOperation_Keep WGPU_COMMA \ +#define WGPU_STENCIL_FACE_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUStencilFaceState, { \ + /*.compare=*/WGPUCompareFunction_Undefined _wgpu_COMMA \ + /*.failOp=*/WGPUStencilOperation_Undefined _wgpu_COMMA \ + /*.depthFailOp=*/WGPUStencilOperation_Undefined _wgpu_COMMA \ + /*.passOp=*/WGPUStencilOperation_Undefined _wgpu_COMMA \ }) typedef struct WGPUStorageTextureBindingLayout { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStorageTextureAccess access; WGPUTextureFormat format; WGPUTextureViewDimension viewDimension; } WGPUStorageTextureBindingLayout WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_STORAGE_TEXTURE_BINDING_LAYOUT_INIT WGPU_MAKE_INIT_STRUCT(WGPUStorageTextureBindingLayout, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.access=*/WGPUStorageTextureAccess_WriteOnly WGPU_COMMA \ - /*.format=*/WGPUTextureFormat_Undefined WGPU_COMMA \ - /*.viewDimension=*/WGPUTextureViewDimension_2D WGPU_COMMA \ +#define WGPU_STORAGE_TEXTURE_BINDING_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUStorageTextureBindingLayout, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.access=*/WGPUStorageTextureAccess_Undefined _wgpu_COMMA \ + /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ + /*.viewDimension=*/WGPUTextureViewDimension_Undefined _wgpu_COMMA \ }) typedef struct WGPUStringView { @@ -2254,9 +2337,25 @@ typedef struct WGPUStringView { size_t length; } WGPUStringView WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_STRING_VIEW_INIT WGPU_MAKE_INIT_STRUCT(WGPUStringView, { \ - /*.data=*/NULL WGPU_COMMA \ - /*.length=*/WGPU_STRLEN WGPU_COMMA \ +#define WGPU_STRING_VIEW_INIT _wgpu_MAKE_INIT_STRUCT(WGPUStringView, { \ + /*.data=*/NULL _wgpu_COMMA \ + /*.length=*/WGPU_STRLEN _wgpu_COMMA \ +}) + +typedef struct WGPUSubgroupMatrixConfig { + WGPUSubgroupMatrixComponentType componentType; + WGPUSubgroupMatrixComponentType resultComponentType; + uint32_t M; + uint32_t N; + uint32_t K; +} WGPUSubgroupMatrixConfig WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SUBGROUP_MATRIX_CONFIG_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSubgroupMatrixConfig, { \ + /*.componentType=*/_wgpu_ENUM_ZERO_INIT(WGPUSubgroupMatrixComponentType) _wgpu_COMMA \ + /*.resultComponentType=*/_wgpu_ENUM_ZERO_INIT(WGPUSubgroupMatrixComponentType) _wgpu_COMMA \ + /*.M=*/0 _wgpu_COMMA \ + /*.N=*/0 _wgpu_COMMA \ + /*.K=*/0 _wgpu_COMMA \ }) typedef struct WGPUSupportedWGSLLanguageFeatures { @@ -2264,9 +2363,9 @@ typedef struct WGPUSupportedWGSLLanguageFeatures { WGPUWGSLLanguageFeatureName const * features; } WGPUSupportedWGSLLanguageFeatures WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SUPPORTED_WGSL_LANGUAGE_FEATURES_INIT WGPU_MAKE_INIT_STRUCT(WGPUSupportedWGSLLanguageFeatures, { \ - /*.featureCount=*/{} WGPU_COMMA \ - /*.features=*/{} WGPU_COMMA \ +#define WGPU_SUPPORTED_WGSL_LANGUAGE_FEATURES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSupportedWGSLLanguageFeatures, { \ + /*.featureCount=*/0 _wgpu_COMMA \ + /*.features=*/NULL _wgpu_COMMA \ }) typedef struct WGPUSupportedFeatures { @@ -2274,13 +2373,13 @@ typedef struct WGPUSupportedFeatures { WGPUFeatureName const * features; } WGPUSupportedFeatures WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SUPPORTED_FEATURES_INIT WGPU_MAKE_INIT_STRUCT(WGPUSupportedFeatures, { \ - /*.featureCount=*/{} WGPU_COMMA \ - /*.features=*/{} WGPU_COMMA \ +#define WGPU_SUPPORTED_FEATURES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSupportedFeatures, { \ + /*.featureCount=*/0 _wgpu_COMMA \ + /*.features=*/NULL _wgpu_COMMA \ }) typedef struct WGPUSurfaceCapabilities { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUTextureUsage usages; size_t formatCount; WGPUTextureFormat const * formats; @@ -2290,41 +2389,57 @@ typedef struct WGPUSurfaceCapabilities { WGPUCompositeAlphaMode const * alphaModes; } WGPUSurfaceCapabilities WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_CAPABILITIES_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceCapabilities, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.usages=*/{} WGPU_COMMA \ - /*.formatCount=*/{} WGPU_COMMA \ - /*.formats=*/{} WGPU_COMMA \ - /*.presentModeCount=*/{} WGPU_COMMA \ - /*.presentModes=*/{} WGPU_COMMA \ - /*.alphaModeCount=*/{} WGPU_COMMA \ - /*.alphaModes=*/{} WGPU_COMMA \ +#define WGPU_SURFACE_CAPABILITIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceCapabilities, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.usages=*/WGPUTextureUsage_None _wgpu_COMMA \ + /*.formatCount=*/0 _wgpu_COMMA \ + /*.formats=*/NULL _wgpu_COMMA \ + /*.presentModeCount=*/0 _wgpu_COMMA \ + /*.presentModes=*/NULL _wgpu_COMMA \ + /*.alphaModeCount=*/0 _wgpu_COMMA \ + /*.alphaModes=*/NULL _wgpu_COMMA \ +}) + +// Can be chained in WGPUSurfaceDescriptor +typedef struct WGPUSurfaceColorManagement { + WGPUChainedStruct chain; + WGPUPredefinedColorSpace colorSpace; + WGPUToneMappingMode toneMappingMode; +} WGPUSurfaceColorManagement WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SURFACE_COLOR_MANAGEMENT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceColorManagement, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceColorManagement _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.colorSpace=*/_wgpu_ENUM_ZERO_INIT(WGPUPredefinedColorSpace) _wgpu_COMMA \ + /*.toneMappingMode=*/_wgpu_ENUM_ZERO_INIT(WGPUToneMappingMode) _wgpu_COMMA \ }) typedef struct WGPUSurfaceConfiguration { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUDevice device; WGPUTextureFormat format; WGPUTextureUsage usage; + uint32_t width; + uint32_t height; size_t viewFormatCount; WGPUTextureFormat const * viewFormats; WGPUCompositeAlphaMode alphaMode; - uint32_t width; - uint32_t height; WGPUPresentMode presentMode; } WGPUSurfaceConfiguration WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_CONFIGURATION_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceConfiguration, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.device=*/{} WGPU_COMMA \ - /*.format=*/{} WGPU_COMMA \ - /*.usage=*/WGPUTextureUsage_RenderAttachment WGPU_COMMA \ - /*.viewFormatCount=*/0 WGPU_COMMA \ - /*.viewFormats=*/NULL WGPU_COMMA \ - /*.alphaMode=*/WGPUCompositeAlphaMode_Auto WGPU_COMMA \ - /*.width=*/{} WGPU_COMMA \ - /*.height=*/{} WGPU_COMMA \ - /*.presentMode=*/WGPUPresentMode_Fifo WGPU_COMMA \ +#define WGPU_SURFACE_CONFIGURATION_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceConfiguration, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.device=*/NULL _wgpu_COMMA \ + /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ + /*.usage=*/WGPUTextureUsage_RenderAttachment _wgpu_COMMA \ + /*.width=*/0 _wgpu_COMMA \ + /*.height=*/0 _wgpu_COMMA \ + /*.viewFormatCount=*/0 _wgpu_COMMA \ + /*.viewFormats=*/NULL _wgpu_COMMA \ + /*.alphaMode=*/WGPUCompositeAlphaMode_Auto _wgpu_COMMA \ + /*.presentMode=*/WGPUPresentMode_Undefined _wgpu_COMMA \ }) // Can be chained in WGPUSurfaceDescriptor @@ -2333,9 +2448,12 @@ typedef struct WGPUSurfaceDescriptorFromWindowsCoreWindow { void * coreWindow; } WGPUSurfaceDescriptorFromWindowsCoreWindow WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_DESCRIPTOR_FROM_WINDOWS_CORE_WINDOW_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceDescriptorFromWindowsCoreWindow, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SurfaceDescriptorFromWindowsCoreWindow} WGPU_COMMA \ - /*.coreWindow=*/{} WGPU_COMMA \ +#define WGPU_SURFACE_DESCRIPTOR_FROM_WINDOWS_CORE_WINDOW_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceDescriptorFromWindowsCoreWindow, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceDescriptorFromWindowsCoreWindow _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.coreWindow=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSurfaceDescriptor @@ -2344,9 +2462,12 @@ typedef struct WGPUSurfaceDescriptorFromWindowsSwapChainPanel { void * swapChainPanel; } WGPUSurfaceDescriptorFromWindowsSwapChainPanel WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_DESCRIPTOR_FROM_WINDOWS_SWAP_CHAIN_PANEL_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceDescriptorFromWindowsSwapChainPanel, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SurfaceDescriptorFromWindowsSwapChainPanel} WGPU_COMMA \ - /*.swapChainPanel=*/{} WGPU_COMMA \ +#define WGPU_SURFACE_DESCRIPTOR_FROM_WINDOWS_SWAP_CHAIN_PANEL_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceDescriptorFromWindowsSwapChainPanel, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceDescriptorFromWindowsSwapChainPanel _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.swapChainPanel=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSurfaceDescriptor @@ -2356,10 +2477,13 @@ typedef struct WGPUSurfaceSourceXCBWindow { uint32_t window; } WGPUSurfaceSourceXCBWindow WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_SOURCE_XCB_WINDOW_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceSourceXCBWindow, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SurfaceSourceXCBWindow} WGPU_COMMA \ - /*.connection=*/{} WGPU_COMMA \ - /*.window=*/{} WGPU_COMMA \ +#define WGPU_SURFACE_SOURCE_XCB_WINDOW_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceSourceXCBWindow, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceSourceXCBWindow _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.connection=*/NULL _wgpu_COMMA \ + /*.window=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSurfaceDescriptor @@ -2368,9 +2492,12 @@ typedef struct WGPUSurfaceSourceAndroidNativeWindow { void * window; } WGPUSurfaceSourceAndroidNativeWindow WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_SOURCE_ANDROID_NATIVE_WINDOW_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceSourceAndroidNativeWindow, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SurfaceSourceAndroidNativeWindow} WGPU_COMMA \ - /*.window=*/{} WGPU_COMMA \ +#define WGPU_SURFACE_SOURCE_ANDROID_NATIVE_WINDOW_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceSourceAndroidNativeWindow, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceSourceAndroidNativeWindow _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.window=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSurfaceDescriptor @@ -2379,9 +2506,12 @@ typedef struct WGPUSurfaceSourceMetalLayer { void * layer; } WGPUSurfaceSourceMetalLayer WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_SOURCE_METAL_LAYER_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceSourceMetalLayer, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SurfaceSourceMetalLayer} WGPU_COMMA \ - /*.layer=*/{} WGPU_COMMA \ +#define WGPU_SURFACE_SOURCE_METAL_LAYER_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceSourceMetalLayer, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceSourceMetalLayer _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.layer=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSurfaceDescriptor @@ -2391,10 +2521,13 @@ typedef struct WGPUSurfaceSourceWaylandSurface { void * surface; } WGPUSurfaceSourceWaylandSurface WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_SOURCE_WAYLAND_SURFACE_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceSourceWaylandSurface, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SurfaceSourceWaylandSurface} WGPU_COMMA \ - /*.display=*/{} WGPU_COMMA \ - /*.surface=*/{} WGPU_COMMA \ +#define WGPU_SURFACE_SOURCE_WAYLAND_SURFACE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceSourceWaylandSurface, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceSourceWaylandSurface _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.display=*/NULL _wgpu_COMMA \ + /*.surface=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSurfaceDescriptor @@ -2404,10 +2537,13 @@ typedef struct WGPUSurfaceSourceWindowsHWND { void * hwnd; } WGPUSurfaceSourceWindowsHWND WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_SOURCE_WINDOWS_HWND_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceSourceWindowsHWND, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SurfaceSourceWindowsHWND} WGPU_COMMA \ - /*.hinstance=*/{} WGPU_COMMA \ - /*.hwnd=*/{} WGPU_COMMA \ +#define WGPU_SURFACE_SOURCE_WINDOWS_HWND_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceSourceWindowsHWND, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceSourceWindowsHWND _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.hinstance=*/NULL _wgpu_COMMA \ + /*.hwnd=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSurfaceDescriptor @@ -2417,36 +2553,51 @@ typedef struct WGPUSurfaceSourceXlibWindow { uint64_t window; } WGPUSurfaceSourceXlibWindow WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_SOURCE_XLIB_WINDOW_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceSourceXlibWindow, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SurfaceSourceXlibWindow} WGPU_COMMA \ - /*.display=*/{} WGPU_COMMA \ - /*.window=*/{} WGPU_COMMA \ +#define WGPU_SURFACE_SOURCE_XLIB_WINDOW_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceSourceXlibWindow, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceSourceXlibWindow _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.display=*/NULL _wgpu_COMMA \ + /*.window=*/0 _wgpu_COMMA \ }) typedef struct WGPUSurfaceTexture { + WGPUChainedStruct * nextInChain; WGPUTexture texture; - WGPUBool suboptimal; WGPUSurfaceGetCurrentTextureStatus status; } WGPUSurfaceTexture WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_TEXTURE_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceTexture, { \ - /*.texture=*/{} WGPU_COMMA \ - /*.suboptimal=*/{} WGPU_COMMA \ - /*.status=*/{} WGPU_COMMA \ +#define WGPU_SURFACE_TEXTURE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceTexture, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.texture=*/NULL _wgpu_COMMA \ + /*.status=*/_wgpu_ENUM_ZERO_INIT(WGPUSurfaceGetCurrentTextureStatus) _wgpu_COMMA \ +}) + +typedef struct WGPUTexelCopyBufferLayout { + uint64_t offset; + uint32_t bytesPerRow; + uint32_t rowsPerImage; +} WGPUTexelCopyBufferLayout WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_TEXEL_COPY_BUFFER_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTexelCopyBufferLayout, { \ + /*.offset=*/0 _wgpu_COMMA \ + /*.bytesPerRow=*/WGPU_COPY_STRIDE_UNDEFINED _wgpu_COMMA \ + /*.rowsPerImage=*/WGPU_COPY_STRIDE_UNDEFINED _wgpu_COMMA \ }) typedef struct WGPUTextureBindingLayout { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUTextureSampleType sampleType; WGPUTextureViewDimension viewDimension; WGPUBool multisampled; } WGPUTextureBindingLayout WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_TEXTURE_BINDING_LAYOUT_INIT WGPU_MAKE_INIT_STRUCT(WGPUTextureBindingLayout, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.sampleType=*/WGPUTextureSampleType_Float WGPU_COMMA \ - /*.viewDimension=*/WGPUTextureViewDimension_2D WGPU_COMMA \ - /*.multisampled=*/false WGPU_COMMA \ +#define WGPU_TEXTURE_BINDING_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTextureBindingLayout, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.sampleType=*/WGPUTextureSampleType_Undefined _wgpu_COMMA \ + /*.viewDimension=*/WGPUTextureViewDimension_Undefined _wgpu_COMMA \ + /*.multisampled=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUTextureDescriptor @@ -2455,35 +2606,26 @@ typedef struct WGPUTextureBindingViewDimensionDescriptor { WGPUTextureViewDimension textureBindingViewDimension; } WGPUTextureBindingViewDimensionDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_TEXTURE_BINDING_VIEW_DIMENSION_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUTextureBindingViewDimensionDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_TextureBindingViewDimensionDescriptor} WGPU_COMMA \ - /*.textureBindingViewDimension=*/WGPUTextureViewDimension_Undefined WGPU_COMMA \ -}) - -typedef struct WGPUTextureDataLayout { - WGPUChainedStruct* nextInChain; - uint64_t offset; - uint32_t bytesPerRow; - uint32_t rowsPerImage; -} WGPUTextureDataLayout WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_TEXTURE_DATA_LAYOUT_INIT WGPU_MAKE_INIT_STRUCT(WGPUTextureDataLayout, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.offset=*/0 WGPU_COMMA \ - /*.bytesPerRow=*/WGPU_COPY_STRIDE_UNDEFINED WGPU_COMMA \ - /*.rowsPerImage=*/WGPU_COPY_STRIDE_UNDEFINED WGPU_COMMA \ +#define WGPU_TEXTURE_BINDING_VIEW_DIMENSION_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTextureBindingViewDimensionDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_TextureBindingViewDimensionDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.textureBindingViewDimension=*/WGPUTextureViewDimension_Undefined _wgpu_COMMA \ }) typedef struct WGPUVertexAttribute { + WGPUChainedStruct * nextInChain; WGPUVertexFormat format; uint64_t offset; uint32_t shaderLocation; } WGPUVertexAttribute WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_VERTEX_ATTRIBUTE_INIT WGPU_MAKE_INIT_STRUCT(WGPUVertexAttribute, { \ - /*.format=*/{} WGPU_COMMA \ - /*.offset=*/{} WGPU_COMMA \ - /*.shaderLocation=*/{} WGPU_COMMA \ +#define WGPU_VERTEX_ATTRIBUTE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUVertexAttribute, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.format=*/_wgpu_ENUM_ZERO_INIT(WGPUVertexFormat) _wgpu_COMMA \ + /*.offset=*/0 _wgpu_COMMA \ + /*.shaderLocation=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSamplerDescriptor @@ -2504,54 +2646,31 @@ typedef struct WGPUYCbCrVkDescriptor { uint64_t externalFormat; } WGPUYCbCrVkDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_Y_CB_CR_VK_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUYCbCrVkDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_YCbCrVkDescriptor} WGPU_COMMA \ - /*.vkFormat=*/0 WGPU_COMMA \ - /*.vkYCbCrModel=*/0 WGPU_COMMA \ - /*.vkYCbCrRange=*/0 WGPU_COMMA \ - /*.vkComponentSwizzleRed=*/0 WGPU_COMMA \ - /*.vkComponentSwizzleGreen=*/0 WGPU_COMMA \ - /*.vkComponentSwizzleBlue=*/0 WGPU_COMMA \ - /*.vkComponentSwizzleAlpha=*/0 WGPU_COMMA \ - /*.vkXChromaOffset=*/0 WGPU_COMMA \ - /*.vkYChromaOffset=*/0 WGPU_COMMA \ - /*.vkChromaFilter=*/WGPUFilterMode_Nearest WGPU_COMMA \ - /*.forceExplicitReconstruction=*/false WGPU_COMMA \ - /*.externalFormat=*/0 WGPU_COMMA \ +#define WGPU_Y_CB_CR_VK_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUYCbCrVkDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_YCbCrVkDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.vkFormat=*/0 _wgpu_COMMA \ + /*.vkYCbCrModel=*/0 _wgpu_COMMA \ + /*.vkYCbCrRange=*/0 _wgpu_COMMA \ + /*.vkComponentSwizzleRed=*/0 _wgpu_COMMA \ + /*.vkComponentSwizzleGreen=*/0 _wgpu_COMMA \ + /*.vkComponentSwizzleBlue=*/0 _wgpu_COMMA \ + /*.vkComponentSwizzleAlpha=*/0 _wgpu_COMMA \ + /*.vkXChromaOffset=*/0 _wgpu_COMMA \ + /*.vkYChromaOffset=*/0 _wgpu_COMMA \ + /*.vkChromaFilter=*/WGPUFilterMode_Undefined _wgpu_COMMA \ + /*.forceExplicitReconstruction=*/0 _wgpu_COMMA \ + /*.externalFormat=*/0 _wgpu_COMMA \ }) typedef struct WGPUAHardwareBufferProperties { WGPUYCbCrVkDescriptor yCbCrInfo; } WGPUAHardwareBufferProperties WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_A_HARDWARE_BUFFER_PROPERTIES_INIT WGPU_MAKE_INIT_STRUCT(WGPUAHardwareBufferProperties, { \ - /*.yCbCrInfo=*/WGPU_Y_CB_CR_VK_DESCRIPTOR_INIT WGPU_COMMA \ -}) - -typedef struct WGPUAdapterInfo { - WGPUChainedStruct* nextInChain; - WGPUStringView vendor; - WGPUStringView architecture; - WGPUStringView device; - WGPUStringView description; - WGPUBackendType backendType; - WGPUAdapterType adapterType; - uint32_t vendorID; - uint32_t deviceID; - WGPUBool compatibilityMode; -} WGPUAdapterInfo WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_ADAPTER_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUAdapterInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.vendor=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.architecture=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.device=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.description=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.backendType=*/{} WGPU_COMMA \ - /*.adapterType=*/{} WGPU_COMMA \ - /*.vendorID=*/{} WGPU_COMMA \ - /*.deviceID=*/{} WGPU_COMMA \ - /*.compatibilityMode=*/false WGPU_COMMA \ +#define WGPU_A_HARDWARE_BUFFER_PROPERTIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAHardwareBufferProperties, { \ + /*.yCbCrInfo=*/WGPU_Y_CB_CR_VK_DESCRIPTOR_INIT _wgpu_COMMA \ }) // Can be chained in WGPUAdapterInfo @@ -2561,30 +2680,53 @@ typedef struct WGPUAdapterPropertiesMemoryHeaps { WGPUMemoryHeapInfo const * heapInfo; } WGPUAdapterPropertiesMemoryHeaps WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_ADAPTER_PROPERTIES_MEMORY_HEAPS_INIT WGPU_MAKE_INIT_STRUCT(WGPUAdapterPropertiesMemoryHeaps, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_AdapterPropertiesMemoryHeaps} WGPU_COMMA \ - /*.heapCount=*/{} WGPU_COMMA \ - /*.heapInfo=*/{} WGPU_COMMA \ +#define WGPU_ADAPTER_PROPERTIES_MEMORY_HEAPS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAdapterPropertiesMemoryHeaps, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_AdapterPropertiesMemoryHeaps _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.heapCount=*/0 _wgpu_COMMA \ + /*.heapInfo=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUBindGroupDescriptor { - WGPUChainedStruct* nextInChain; - WGPUStringView label; - WGPUBindGroupLayout layout; - size_t entryCount; - WGPUBindGroupEntry const * entries; -} WGPUBindGroupDescriptor WGPU_STRUCTURE_ATTRIBUTE; +// Can be chained in WGPUAdapterInfo +typedef struct WGPUAdapterPropertiesSubgroupMatrixConfigs { + WGPUChainedStruct chain; + size_t configCount; + WGPUSubgroupMatrixConfig const * configs; +} WGPUAdapterPropertiesSubgroupMatrixConfigs WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_ADAPTER_PROPERTIES_SUBGROUP_MATRIX_CONFIGS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAdapterPropertiesSubgroupMatrixConfigs, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_AdapterPropertiesSubgroupMatrixConfigs _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.configCount=*/0 _wgpu_COMMA \ + /*.configs=*/NULL _wgpu_COMMA \ +}) + +typedef struct WGPUBindGroupEntry { + WGPUChainedStruct * nextInChain; + uint32_t binding; + WGPU_NULLABLE WGPUBuffer buffer; + uint64_t offset; + uint64_t size; + WGPU_NULLABLE WGPUSampler sampler; + WGPU_NULLABLE WGPUTextureView textureView; +} WGPUBindGroupEntry WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_BIND_GROUP_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUBindGroupDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.layout=*/{} WGPU_COMMA \ - /*.entryCount=*/{} WGPU_COMMA \ - /*.entries=*/{} WGPU_COMMA \ +#define WGPU_BIND_GROUP_ENTRY_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBindGroupEntry, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.binding=*/0 _wgpu_COMMA \ + /*.buffer=*/NULL _wgpu_COMMA \ + /*.offset=*/0 _wgpu_COMMA \ + /*.size=*/WGPU_WHOLE_SIZE _wgpu_COMMA \ + /*.sampler=*/NULL _wgpu_COMMA \ + /*.textureView=*/NULL _wgpu_COMMA \ }) typedef struct WGPUBindGroupLayoutEntry { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; uint32_t binding; WGPUShaderStage visibility; WGPUBufferBindingLayout buffer; @@ -2593,14 +2735,14 @@ typedef struct WGPUBindGroupLayoutEntry { WGPUStorageTextureBindingLayout storageTexture; } WGPUBindGroupLayoutEntry WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_BIND_GROUP_LAYOUT_ENTRY_INIT WGPU_MAKE_INIT_STRUCT(WGPUBindGroupLayoutEntry, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.binding=*/{} WGPU_COMMA \ - /*.visibility=*/{} WGPU_COMMA \ - /*.buffer=*/WGPU_BUFFER_BINDING_LAYOUT_INIT WGPU_COMMA \ - /*.sampler=*/WGPU_SAMPLER_BINDING_LAYOUT_INIT WGPU_COMMA \ - /*.texture=*/WGPU_TEXTURE_BINDING_LAYOUT_INIT WGPU_COMMA \ - /*.storageTexture=*/WGPU_STORAGE_TEXTURE_BINDING_LAYOUT_INIT WGPU_COMMA \ +#define WGPU_BIND_GROUP_LAYOUT_ENTRY_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBindGroupLayoutEntry, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.binding=*/0 _wgpu_COMMA \ + /*.visibility=*/WGPUShaderStage_None _wgpu_COMMA \ + /*.buffer=*/_wgpu_STRUCT_ZERO_INIT _wgpu_COMMA \ + /*.sampler=*/_wgpu_STRUCT_ZERO_INIT _wgpu_COMMA \ + /*.texture=*/_wgpu_STRUCT_ZERO_INIT _wgpu_COMMA \ + /*.storageTexture=*/_wgpu_STRUCT_ZERO_INIT _wgpu_COMMA \ }) typedef struct WGPUBlendState { @@ -2608,95 +2750,89 @@ typedef struct WGPUBlendState { WGPUBlendComponent alpha; } WGPUBlendState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_BLEND_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUBlendState, { \ - /*.color=*/WGPU_BLEND_COMPONENT_INIT WGPU_COMMA \ - /*.alpha=*/WGPU_BLEND_COMPONENT_INIT WGPU_COMMA \ +#define WGPU_BLEND_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBlendState, { \ + /*.color=*/WGPU_BLEND_COMPONENT_INIT _wgpu_COMMA \ + /*.alpha=*/WGPU_BLEND_COMPONENT_INIT _wgpu_COMMA \ }) typedef struct WGPUBufferDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; WGPUBufferUsage usage; uint64_t size; WGPUBool mappedAtCreation; } WGPUBufferDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_BUFFER_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUBufferDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.usage=*/{} WGPU_COMMA \ - /*.size=*/{} WGPU_COMMA \ - /*.mappedAtCreation=*/false WGPU_COMMA \ +#define WGPU_BUFFER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBufferDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.usage=*/WGPUBufferUsage_None _wgpu_COMMA \ + /*.size=*/0 _wgpu_COMMA \ + /*.mappedAtCreation=*/0 _wgpu_COMMA \ }) typedef struct WGPUCommandBufferDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; } WGPUCommandBufferDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COMMAND_BUFFER_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUCommandBufferDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ +#define WGPU_COMMAND_BUFFER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCommandBufferDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ }) typedef struct WGPUCommandEncoderDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; } WGPUCommandEncoderDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COMMAND_ENCODER_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUCommandEncoderDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ +#define WGPU_COMMAND_ENCODER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCommandEncoderDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ }) typedef struct WGPUCompilationMessage { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView message; WGPUCompilationMessageType type; uint64_t lineNum; uint64_t linePos; uint64_t offset; uint64_t length; - uint64_t utf16LinePos; - uint64_t utf16Offset; - uint64_t utf16Length; } WGPUCompilationMessage WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COMPILATION_MESSAGE_INIT WGPU_MAKE_INIT_STRUCT(WGPUCompilationMessage, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.message=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.type=*/{} WGPU_COMMA \ - /*.lineNum=*/{} WGPU_COMMA \ - /*.linePos=*/{} WGPU_COMMA \ - /*.offset=*/{} WGPU_COMMA \ - /*.length=*/{} WGPU_COMMA \ - /*.utf16LinePos=*/{} WGPU_COMMA \ - /*.utf16Offset=*/{} WGPU_COMMA \ - /*.utf16Length=*/{} WGPU_COMMA \ +#define WGPU_COMPILATION_MESSAGE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCompilationMessage, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.message=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.type=*/_wgpu_ENUM_ZERO_INIT(WGPUCompilationMessageType) _wgpu_COMMA \ + /*.lineNum=*/0 _wgpu_COMMA \ + /*.linePos=*/0 _wgpu_COMMA \ + /*.offset=*/0 _wgpu_COMMA \ + /*.length=*/0 _wgpu_COMMA \ }) typedef struct WGPUComputePassDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; WGPU_NULLABLE WGPUPassTimestampWrites const * timestampWrites; } WGPUComputePassDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COMPUTE_PASS_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUComputePassDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.timestampWrites=*/NULL WGPU_COMMA \ +#define WGPU_COMPUTE_PASS_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUComputePassDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.timestampWrites=*/NULL _wgpu_COMMA \ }) typedef struct WGPUConstantEntry { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView key; double value; } WGPUConstantEntry WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_CONSTANT_ENTRY_INIT WGPU_MAKE_INIT_STRUCT(WGPUConstantEntry, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.key=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.value=*/{} WGPU_COMMA \ +#define WGPU_CONSTANT_ENTRY_INIT _wgpu_MAKE_INIT_STRUCT(WGPUConstantEntry, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.key=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.value=*/0. _wgpu_COMMA \ }) // Can be chained in WGPUDeviceDescriptor @@ -2708,12 +2844,15 @@ typedef struct WGPUDawnCacheDeviceDescriptor { void * functionUserdata; } WGPUDawnCacheDeviceDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_CACHE_DEVICE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnCacheDeviceDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnCacheDeviceDescriptor} WGPU_COMMA \ - /*.isolationKey=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.loadDataFunction=*/NULL WGPU_COMMA \ - /*.storeDataFunction=*/NULL WGPU_COMMA \ - /*.functionUserdata=*/NULL WGPU_COMMA \ +#define WGPU_DAWN_CACHE_DEVICE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnCacheDeviceDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnCacheDeviceDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.isolationKey=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.loadDataFunction=*/NULL _wgpu_COMMA \ + /*.storeDataFunction=*/NULL _wgpu_COMMA \ + /*.functionUserdata=*/nullptr _wgpu_COMMA \ }) // Can be chained in WGPUDawnFormatCapabilities @@ -2723,14 +2862,17 @@ typedef struct WGPUDawnDrmFormatCapabilities { WGPUDawnDrmFormatProperties const * properties; } WGPUDawnDrmFormatCapabilities WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_DRM_FORMAT_CAPABILITIES_INIT WGPU_MAKE_INIT_STRUCT(WGPUDawnDrmFormatCapabilities, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_DawnDrmFormatCapabilities} WGPU_COMMA \ - /*.propertiesCount=*/{} WGPU_COMMA \ - /*.properties=*/{} WGPU_COMMA \ +#define WGPU_DAWN_DRM_FORMAT_CAPABILITIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnDrmFormatCapabilities, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnDrmFormatCapabilities _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.propertiesCount=*/0 _wgpu_COMMA \ + /*.properties=*/NULL _wgpu_COMMA \ }) typedef struct WGPUDepthStencilState { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUTextureFormat format; WGPUOptionalBool depthWriteEnabled; WGPUCompareFunction depthCompare; @@ -2743,18 +2885,18 @@ typedef struct WGPUDepthStencilState { float depthBiasClamp; } WGPUDepthStencilState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DEPTH_STENCIL_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUDepthStencilState, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.format=*/{} WGPU_COMMA \ - /*.depthWriteEnabled=*/WGPUOptionalBool_Undefined WGPU_COMMA \ - /*.depthCompare=*/WGPUCompareFunction_Undefined WGPU_COMMA \ - /*.stencilFront=*/WGPU_STENCIL_FACE_STATE_INIT WGPU_COMMA \ - /*.stencilBack=*/WGPU_STENCIL_FACE_STATE_INIT WGPU_COMMA \ - /*.stencilReadMask=*/0xFFFFFFFF WGPU_COMMA \ - /*.stencilWriteMask=*/0xFFFFFFFF WGPU_COMMA \ - /*.depthBias=*/0 WGPU_COMMA \ - /*.depthBiasSlopeScale=*/0.0f WGPU_COMMA \ - /*.depthBiasClamp=*/0.0f WGPU_COMMA \ +#define WGPU_DEPTH_STENCIL_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDepthStencilState, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ + /*.depthWriteEnabled=*/WGPUOptionalBool_Undefined _wgpu_COMMA \ + /*.depthCompare=*/WGPUCompareFunction_Undefined _wgpu_COMMA \ + /*.stencilFront=*/WGPU_STENCIL_FACE_STATE_INIT _wgpu_COMMA \ + /*.stencilBack=*/WGPU_STENCIL_FACE_STATE_INIT _wgpu_COMMA \ + /*.stencilReadMask=*/0xFFFFFFFF _wgpu_COMMA \ + /*.stencilWriteMask=*/0xFFFFFFFF _wgpu_COMMA \ + /*.depthBias=*/0 _wgpu_COMMA \ + /*.depthBiasSlopeScale=*/0.0f _wgpu_COMMA \ + /*.depthBiasClamp=*/0.0f _wgpu_COMMA \ }) // Can be chained in WGPUSurfaceDescriptor @@ -2763,13 +2905,16 @@ typedef struct WGPUEmscriptenSurfaceSourceCanvasHTMLSelector { WGPUStringView selector; } WGPUEmscriptenSurfaceSourceCanvasHTMLSelector WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_EMSCRIPTEN_SURFACE_SOURCE_CANVAS_HTML_SELECTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUEmscriptenSurfaceSourceCanvasHTMLSelector, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_EmscriptenSurfaceSourceCanvasHTMLSelector} WGPU_COMMA \ - /*.selector=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ +#define WGPU_EMSCRIPTEN_SURFACE_SOURCE_CANVAS_HTML_SELECTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUEmscriptenSurfaceSourceCanvasHTMLSelector, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_EmscriptenSurfaceSourceCanvasHTMLSelector _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.selector=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ }) typedef struct WGPUExternalTextureDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; WGPUTextureView plane0; WGPU_NULLABLE WGPUTextureView plane1; @@ -2785,21 +2930,21 @@ typedef struct WGPUExternalTextureDescriptor { WGPUExternalTextureRotation rotation; } WGPUExternalTextureDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_EXTERNAL_TEXTURE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUExternalTextureDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.plane0=*/{} WGPU_COMMA \ - /*.plane1=*/NULL WGPU_COMMA \ - /*.cropOrigin=*/WGPU_ORIGIN_2D_INIT WGPU_COMMA \ - /*.cropSize=*/WGPU_EXTENT_2D_INIT WGPU_COMMA \ - /*.apparentSize=*/WGPU_EXTENT_2D_INIT WGPU_COMMA \ - /*.doYuvToRgbConversionOnly=*/false WGPU_COMMA \ - /*.yuvToRgbConversionMatrix=*/NULL WGPU_COMMA \ - /*.srcTransferFunctionParameters=*/{} WGPU_COMMA \ - /*.dstTransferFunctionParameters=*/{} WGPU_COMMA \ - /*.gamutConversionMatrix=*/{} WGPU_COMMA \ - /*.mirrored=*/false WGPU_COMMA \ - /*.rotation=*/WGPUExternalTextureRotation_Rotate0Degrees WGPU_COMMA \ +#define WGPU_EXTERNAL_TEXTURE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUExternalTextureDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.plane0=*/NULL _wgpu_COMMA \ + /*.plane1=*/NULL _wgpu_COMMA \ + /*.cropOrigin=*/WGPU_ORIGIN_2D_INIT _wgpu_COMMA \ + /*.cropSize=*/WGPU_EXTENT_2D_INIT _wgpu_COMMA \ + /*.apparentSize=*/WGPU_EXTENT_2D_INIT _wgpu_COMMA \ + /*.doYuvToRgbConversionOnly=*/0 _wgpu_COMMA \ + /*.yuvToRgbConversionMatrix=*/NULL _wgpu_COMMA \ + /*.srcTransferFunctionParameters=*/NULL _wgpu_COMMA \ + /*.dstTransferFunctionParameters=*/NULL _wgpu_COMMA \ + /*.gamutConversionMatrix=*/NULL _wgpu_COMMA \ + /*.mirrored=*/0 _wgpu_COMMA \ + /*.rotation=*/WGPUExternalTextureRotation_Rotate0Degrees _wgpu_COMMA \ }) typedef struct WGPUFutureWaitInfo { @@ -2807,75 +2952,111 @@ typedef struct WGPUFutureWaitInfo { WGPUBool completed; } WGPUFutureWaitInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_FUTURE_WAIT_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUFutureWaitInfo, { \ - /*.future=*/WGPU_FUTURE_INIT WGPU_COMMA \ - /*.completed=*/false WGPU_COMMA \ -}) - -typedef struct WGPUImageCopyBuffer { - WGPUTextureDataLayout layout; - WGPUBuffer buffer; -} WGPUImageCopyBuffer WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_IMAGE_COPY_BUFFER_INIT WGPU_MAKE_INIT_STRUCT(WGPUImageCopyBuffer, { \ - /*.layout=*/WGPU_TEXTURE_DATA_LAYOUT_INIT WGPU_COMMA \ - /*.buffer=*/{} WGPU_COMMA \ +#define WGPU_FUTURE_WAIT_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUFutureWaitInfo, { \ + /*.future=*/WGPU_FUTURE_INIT _wgpu_COMMA \ + /*.completed=*/0 _wgpu_COMMA \ }) typedef struct WGPUImageCopyExternalTexture { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUExternalTexture externalTexture; WGPUOrigin3D origin; WGPUExtent2D naturalSize; } WGPUImageCopyExternalTexture WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_IMAGE_COPY_EXTERNAL_TEXTURE_INIT WGPU_MAKE_INIT_STRUCT(WGPUImageCopyExternalTexture, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.externalTexture=*/{} WGPU_COMMA \ - /*.origin=*/WGPU_ORIGIN_3D_INIT WGPU_COMMA \ - /*.naturalSize=*/WGPU_EXTENT_2D_INIT WGPU_COMMA \ -}) - -typedef struct WGPUImageCopyTexture { - WGPUTexture texture; - uint32_t mipLevel; - WGPUOrigin3D origin; - WGPUTextureAspect aspect; -} WGPUImageCopyTexture WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_IMAGE_COPY_TEXTURE_INIT WGPU_MAKE_INIT_STRUCT(WGPUImageCopyTexture, { \ - /*.texture=*/{} WGPU_COMMA \ - /*.mipLevel=*/0 WGPU_COMMA \ - /*.origin=*/WGPU_ORIGIN_3D_INIT WGPU_COMMA \ - /*.aspect=*/WGPUTextureAspect_All WGPU_COMMA \ +#define WGPU_IMAGE_COPY_EXTERNAL_TEXTURE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUImageCopyExternalTexture, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.externalTexture=*/NULL _wgpu_COMMA \ + /*.origin=*/WGPU_ORIGIN_3D_INIT _wgpu_COMMA \ + /*.naturalSize=*/WGPU_EXTENT_2D_INIT _wgpu_COMMA \ }) typedef struct WGPUInstanceDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUInstanceCapabilities capabilities; - WGPUInstanceCapabilities features; } WGPUInstanceDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_INSTANCE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUInstanceDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.capabilities=*/WGPU_INSTANCE_CAPABILITIES_INIT WGPU_COMMA \ - /*.features=*/WGPU_INSTANCE_CAPABILITIES_INIT WGPU_COMMA \ +#define WGPU_INSTANCE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUInstanceDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.capabilities=*/WGPU_INSTANCE_CAPABILITIES_INIT _wgpu_COMMA \ }) -typedef struct WGPUPipelineLayoutDescriptor { - WGPUChainedStruct* nextInChain; - WGPUStringView label; - size_t bindGroupLayoutCount; - WGPU_NULLABLE WGPUBindGroupLayout const * bindGroupLayouts; - uint32_t immediateDataRangeByteSize; -} WGPUPipelineLayoutDescriptor WGPU_STRUCTURE_ATTRIBUTE; +typedef struct WGPULimits { + WGPUChainedStruct * nextInChain; + uint32_t maxTextureDimension1D; + uint32_t maxTextureDimension2D; + uint32_t maxTextureDimension3D; + uint32_t maxTextureArrayLayers; + uint32_t maxBindGroups; + uint32_t maxBindGroupsPlusVertexBuffers; + uint32_t maxBindingsPerBindGroup; + uint32_t maxDynamicUniformBuffersPerPipelineLayout; + uint32_t maxDynamicStorageBuffersPerPipelineLayout; + uint32_t maxSampledTexturesPerShaderStage; + uint32_t maxSamplersPerShaderStage; + uint32_t maxStorageBuffersPerShaderStage; + uint32_t maxStorageTexturesPerShaderStage; + uint32_t maxUniformBuffersPerShaderStage; + uint64_t maxUniformBufferBindingSize; + uint64_t maxStorageBufferBindingSize; + uint32_t minUniformBufferOffsetAlignment; + uint32_t minStorageBufferOffsetAlignment; + uint32_t maxVertexBuffers; + uint64_t maxBufferSize; + uint32_t maxVertexAttributes; + uint32_t maxVertexBufferArrayStride; + uint32_t maxInterStageShaderVariables; + uint32_t maxColorAttachments; + uint32_t maxColorAttachmentBytesPerSample; + uint32_t maxComputeWorkgroupStorageSize; + uint32_t maxComputeInvocationsPerWorkgroup; + uint32_t maxComputeWorkgroupSizeX; + uint32_t maxComputeWorkgroupSizeY; + uint32_t maxComputeWorkgroupSizeZ; + uint32_t maxComputeWorkgroupsPerDimension; + uint32_t maxStorageBuffersInVertexStage; + uint32_t maxStorageTexturesInVertexStage; + uint32_t maxStorageBuffersInFragmentStage; + uint32_t maxStorageTexturesInFragmentStage; +} WGPULimits WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_PIPELINE_LAYOUT_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUPipelineLayoutDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.bindGroupLayoutCount=*/{} WGPU_COMMA \ - /*.bindGroupLayouts=*/NULL WGPU_COMMA \ - /*.immediateDataRangeByteSize=*/0 WGPU_COMMA \ +#define WGPU_LIMITS_INIT _wgpu_MAKE_INIT_STRUCT(WGPULimits, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.maxTextureDimension1D=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxTextureDimension2D=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxTextureDimension3D=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxTextureArrayLayers=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxBindGroups=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxBindGroupsPlusVertexBuffers=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxBindingsPerBindGroup=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxDynamicUniformBuffersPerPipelineLayout=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxDynamicStorageBuffersPerPipelineLayout=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxSampledTexturesPerShaderStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxSamplersPerShaderStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxStorageBuffersPerShaderStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxStorageTexturesPerShaderStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxUniformBuffersPerShaderStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxUniformBufferBindingSize=*/WGPU_LIMIT_U64_UNDEFINED _wgpu_COMMA \ + /*.maxStorageBufferBindingSize=*/WGPU_LIMIT_U64_UNDEFINED _wgpu_COMMA \ + /*.minUniformBufferOffsetAlignment=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.minStorageBufferOffsetAlignment=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxVertexBuffers=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxBufferSize=*/WGPU_LIMIT_U64_UNDEFINED _wgpu_COMMA \ + /*.maxVertexAttributes=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxVertexBufferArrayStride=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxInterStageShaderVariables=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxColorAttachments=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxColorAttachmentBytesPerSample=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxComputeWorkgroupStorageSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxComputeInvocationsPerWorkgroup=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxComputeWorkgroupSizeX=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxComputeWorkgroupSizeY=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxComputeWorkgroupSizeZ=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxComputeWorkgroupsPerDimension=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxStorageBuffersInVertexStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxStorageTexturesInVertexStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxStorageBuffersInFragmentStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxStorageTexturesInFragmentStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ }) // Can be chained in WGPUPipelineLayoutDescriptor @@ -2886,49 +3067,52 @@ typedef struct WGPUPipelineLayoutPixelLocalStorage { WGPUPipelineLayoutStorageAttachment const * storageAttachments; } WGPUPipelineLayoutPixelLocalStorage WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_PIPELINE_LAYOUT_PIXEL_LOCAL_STORAGE_INIT WGPU_MAKE_INIT_STRUCT(WGPUPipelineLayoutPixelLocalStorage, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_PipelineLayoutPixelLocalStorage} WGPU_COMMA \ - /*.totalPixelLocalStorageSize=*/{} WGPU_COMMA \ - /*.storageAttachmentCount=*/0 WGPU_COMMA \ - /*.storageAttachments=*/{} WGPU_COMMA \ +#define WGPU_PIPELINE_LAYOUT_PIXEL_LOCAL_STORAGE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUPipelineLayoutPixelLocalStorage, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_PipelineLayoutPixelLocalStorage _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.totalPixelLocalStorageSize=*/0 _wgpu_COMMA \ + /*.storageAttachmentCount=*/0 _wgpu_COMMA \ + /*.storageAttachments=*/NULL _wgpu_COMMA \ }) typedef struct WGPUQuerySetDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; WGPUQueryType type; uint32_t count; } WGPUQuerySetDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_QUERY_SET_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUQuerySetDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.type=*/{} WGPU_COMMA \ - /*.count=*/{} WGPU_COMMA \ +#define WGPU_QUERY_SET_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUQuerySetDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.type=*/_wgpu_ENUM_ZERO_INIT(WGPUQueryType) _wgpu_COMMA \ + /*.count=*/0 _wgpu_COMMA \ }) typedef struct WGPUQueueDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; } WGPUQueueDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_QUEUE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUQueueDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ +#define WGPU_QUEUE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUQueueDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ }) typedef struct WGPURenderBundleDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; } WGPURenderBundleDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_RENDER_BUNDLE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPURenderBundleDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ +#define WGPU_RENDER_BUNDLE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderBundleDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ }) typedef struct WGPURenderBundleEncoderDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; size_t colorFormatCount; WGPUTextureFormat const * colorFormats; @@ -2938,19 +3122,19 @@ typedef struct WGPURenderBundleEncoderDescriptor { WGPUBool stencilReadOnly; } WGPURenderBundleEncoderDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_RENDER_BUNDLE_ENCODER_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPURenderBundleEncoderDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.colorFormatCount=*/{} WGPU_COMMA \ - /*.colorFormats=*/{} WGPU_COMMA \ - /*.depthStencilFormat=*/WGPUTextureFormat_Undefined WGPU_COMMA \ - /*.sampleCount=*/1 WGPU_COMMA \ - /*.depthReadOnly=*/false WGPU_COMMA \ - /*.stencilReadOnly=*/false WGPU_COMMA \ +#define WGPU_RENDER_BUNDLE_ENCODER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderBundleEncoderDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.colorFormatCount=*/0 _wgpu_COMMA \ + /*.colorFormats=*/NULL _wgpu_COMMA \ + /*.depthStencilFormat=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ + /*.sampleCount=*/1 _wgpu_COMMA \ + /*.depthReadOnly=*/0 _wgpu_COMMA \ + /*.stencilReadOnly=*/0 _wgpu_COMMA \ }) typedef struct WGPURenderPassColorAttachment { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPU_NULLABLE WGPUTextureView view; uint32_t depthSlice; WGPU_NULLABLE WGPUTextureView resolveTarget; @@ -2959,18 +3143,18 @@ typedef struct WGPURenderPassColorAttachment { WGPUColor clearValue; } WGPURenderPassColorAttachment WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_RENDER_PASS_COLOR_ATTACHMENT_INIT WGPU_MAKE_INIT_STRUCT(WGPURenderPassColorAttachment, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.view=*/NULL WGPU_COMMA \ - /*.depthSlice=*/WGPU_DEPTH_SLICE_UNDEFINED WGPU_COMMA \ - /*.resolveTarget=*/NULL WGPU_COMMA \ - /*.loadOp=*/{} WGPU_COMMA \ - /*.storeOp=*/{} WGPU_COMMA \ - /*.clearValue=*/WGPU_COLOR_INIT WGPU_COMMA \ +#define WGPU_RENDER_PASS_COLOR_ATTACHMENT_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderPassColorAttachment, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.view=*/NULL _wgpu_COMMA \ + /*.depthSlice=*/WGPU_DEPTH_SLICE_UNDEFINED _wgpu_COMMA \ + /*.resolveTarget=*/NULL _wgpu_COMMA \ + /*.loadOp=*/WGPULoadOp_Undefined _wgpu_COMMA \ + /*.storeOp=*/WGPUStoreOp_Undefined _wgpu_COMMA \ + /*.clearValue=*/WGPU_COLOR_INIT _wgpu_COMMA \ }) typedef struct WGPURenderPassStorageAttachment { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; uint64_t offset; WGPUTextureView storage; WGPULoadOp loadOp; @@ -2978,27 +3162,35 @@ typedef struct WGPURenderPassStorageAttachment { WGPUColor clearValue; } WGPURenderPassStorageAttachment WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_RENDER_PASS_STORAGE_ATTACHMENT_INIT WGPU_MAKE_INIT_STRUCT(WGPURenderPassStorageAttachment, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.offset=*/0 WGPU_COMMA \ - /*.storage=*/{} WGPU_COMMA \ - /*.loadOp=*/{} WGPU_COMMA \ - /*.storeOp=*/{} WGPU_COMMA \ - /*.clearValue=*/WGPU_COLOR_INIT WGPU_COMMA \ +#define WGPU_RENDER_PASS_STORAGE_ATTACHMENT_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderPassStorageAttachment, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.offset=*/0 _wgpu_COMMA \ + /*.storage=*/NULL _wgpu_COMMA \ + /*.loadOp=*/WGPULoadOp_Undefined _wgpu_COMMA \ + /*.storeOp=*/WGPUStoreOp_Undefined _wgpu_COMMA \ + /*.clearValue=*/WGPU_COLOR_INIT _wgpu_COMMA \ }) -typedef struct WGPURequiredLimits { - WGPUChainedStruct* nextInChain; - WGPULimits limits; -} WGPURequiredLimits WGPU_STRUCTURE_ATTRIBUTE; +typedef struct WGPURequestAdapterOptions { + WGPUChainedStruct * nextInChain; + WGPUFeatureLevel featureLevel; + WGPUPowerPreference powerPreference; + WGPUBool forceFallbackAdapter; + WGPUBackendType backendType; + WGPU_NULLABLE WGPUSurface compatibleSurface; +} WGPURequestAdapterOptions WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_REQUIRED_LIMITS_INIT WGPU_MAKE_INIT_STRUCT(WGPURequiredLimits, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.limits=*/WGPU_LIMITS_INIT WGPU_COMMA \ +#define WGPU_REQUEST_ADAPTER_OPTIONS_INIT _wgpu_MAKE_INIT_STRUCT(WGPURequestAdapterOptions, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.featureLevel=*/WGPUFeatureLevel_Undefined _wgpu_COMMA \ + /*.powerPreference=*/WGPUPowerPreference_Undefined _wgpu_COMMA \ + /*.forceFallbackAdapter=*/0 _wgpu_COMMA \ + /*.backendType=*/WGPUBackendType_Undefined _wgpu_COMMA \ + /*.compatibleSurface=*/NULL _wgpu_COMMA \ }) typedef struct WGPUSamplerDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; WGPUAddressMode addressModeU; WGPUAddressMode addressModeV; @@ -3012,29 +3204,19 @@ typedef struct WGPUSamplerDescriptor { uint16_t maxAnisotropy; } WGPUSamplerDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SAMPLER_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSamplerDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.addressModeU=*/WGPUAddressMode_ClampToEdge WGPU_COMMA \ - /*.addressModeV=*/WGPUAddressMode_ClampToEdge WGPU_COMMA \ - /*.addressModeW=*/WGPUAddressMode_ClampToEdge WGPU_COMMA \ - /*.magFilter=*/WGPUFilterMode_Nearest WGPU_COMMA \ - /*.minFilter=*/WGPUFilterMode_Nearest WGPU_COMMA \ - /*.mipmapFilter=*/WGPUMipmapFilterMode_Nearest WGPU_COMMA \ - /*.lodMinClamp=*/0.0f WGPU_COMMA \ - /*.lodMaxClamp=*/32.0f WGPU_COMMA \ - /*.compare=*/WGPUCompareFunction_Undefined WGPU_COMMA \ - /*.maxAnisotropy=*/1 WGPU_COMMA \ -}) - -typedef struct WGPUShaderModuleDescriptor { - WGPUChainedStruct* nextInChain; - WGPUStringView label; -} WGPUShaderModuleDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_SHADER_MODULE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUShaderModuleDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ +#define WGPU_SAMPLER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSamplerDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.addressModeU=*/WGPUAddressMode_Undefined _wgpu_COMMA \ + /*.addressModeV=*/WGPUAddressMode_Undefined _wgpu_COMMA \ + /*.addressModeW=*/WGPUAddressMode_Undefined _wgpu_COMMA \ + /*.magFilter=*/WGPUFilterMode_Undefined _wgpu_COMMA \ + /*.minFilter=*/WGPUFilterMode_Undefined _wgpu_COMMA \ + /*.mipmapFilter=*/WGPUMipmapFilterMode_Undefined _wgpu_COMMA \ + /*.lodMinClamp=*/0.0f _wgpu_COMMA \ + /*.lodMaxClamp=*/32.0f _wgpu_COMMA \ + /*.compare=*/WGPUCompareFunction_Undefined _wgpu_COMMA \ + /*.maxAnisotropy=*/1 _wgpu_COMMA \ }) // Can be chained in WGPUShaderModuleDescriptor @@ -3043,29 +3225,42 @@ typedef struct WGPUShaderSourceWGSL { WGPUStringView code; } WGPUShaderSourceWGSL WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHADER_SOURCE_WGSL_INIT WGPU_MAKE_INIT_STRUCT(WGPUShaderSourceWGSL, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_ShaderSourceWGSL} WGPU_COMMA \ - /*.code=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ +#define WGPU_SHADER_SOURCE_WGSL_INIT _wgpu_MAKE_INIT_STRUCT(WGPUShaderSourceWGSL, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_ShaderSourceWGSL _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.code=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ }) typedef struct WGPUSharedBufferMemoryDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; } WGPUSharedBufferMemoryDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_BUFFER_MEMORY_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ +#define WGPU_SHARED_BUFFER_MEMORY_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ }) typedef struct WGPUSharedFenceDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; } WGPUSharedFenceDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_FENCE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedFenceDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ +#define WGPU_SHARED_FENCE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ +}) + +typedef struct WGPUSharedFenceExportInfo { + WGPUChainedStruct * nextInChain; + WGPUSharedFenceType type; +} WGPUSharedFenceExportInfo WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHARED_FENCE_EXPORT_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedFenceExportInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.type=*/_wgpu_ENUM_ZERO_INIT(WGPUSharedFenceType) _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryProperties @@ -3074,19 +3269,30 @@ typedef struct WGPUSharedTextureMemoryAHardwareBufferProperties { WGPUYCbCrVkDescriptor yCbCrInfo; } WGPUSharedTextureMemoryAHardwareBufferProperties WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_A_HARDWARE_BUFFER_PROPERTIES_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryAHardwareBufferProperties, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryAHardwareBufferProperties} WGPU_COMMA \ - /*.yCbCrInfo=*/WGPU_Y_CB_CR_VK_DESCRIPTOR_INIT WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_A_HARDWARE_BUFFER_PROPERTIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryAHardwareBufferProperties, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryAHardwareBufferProperties _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.yCbCrInfo=*/WGPU_Y_CB_CR_VK_DESCRIPTOR_INIT _wgpu_COMMA \ }) -typedef struct WGPUSharedTextureMemoryDescriptor { - WGPUChainedStruct* nextInChain; - WGPUStringView label; -} WGPUSharedTextureMemoryDescriptor WGPU_STRUCTURE_ATTRIBUTE; +typedef struct WGPUSharedTextureMemoryBeginAccessDescriptor { + WGPUChainedStruct * nextInChain; + WGPUBool concurrentRead; + WGPUBool initialized; + size_t fenceCount; + WGPUSharedFence const * fences; + uint64_t const * signaledValues; +} WGPUSharedTextureMemoryBeginAccessDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_BEGIN_ACCESS_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryBeginAccessDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.concurrentRead=*/0 _wgpu_COMMA \ + /*.initialized=*/0 _wgpu_COMMA \ + /*.fenceCount=*/0 _wgpu_COMMA \ + /*.fences=*/NULL _wgpu_COMMA \ + /*.signaledValues=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor @@ -3099,51 +3305,60 @@ typedef struct WGPUSharedTextureMemoryDmaBufDescriptor { WGPUSharedTextureMemoryDmaBufPlane const * planes; } WGPUSharedTextureMemoryDmaBufDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_DMA_BUF_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryDmaBufDescriptor, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_SharedTextureMemoryDmaBufDescriptor} WGPU_COMMA \ - /*.size=*/WGPU_EXTENT_3D_INIT WGPU_COMMA \ - /*.drmFormat=*/{} WGPU_COMMA \ - /*.drmModifier=*/{} WGPU_COMMA \ - /*.planeCount=*/{} WGPU_COMMA \ - /*.planes=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_DMA_BUF_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryDmaBufDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryDmaBufDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.size=*/WGPU_EXTENT_3D_INIT _wgpu_COMMA \ + /*.drmFormat=*/0 _wgpu_COMMA \ + /*.drmModifier=*/0 _wgpu_COMMA \ + /*.planeCount=*/0 _wgpu_COMMA \ + /*.planes=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUSharedTextureMemoryProperties { - WGPUChainedStruct* nextInChain; - WGPUTextureUsage usage; - WGPUExtent3D size; - WGPUTextureFormat format; -} WGPUSharedTextureMemoryProperties WGPU_STRUCTURE_ATTRIBUTE; +typedef struct WGPUSharedTextureMemoryEndAccessState { + WGPUChainedStruct * nextInChain; + WGPUBool initialized; + size_t fenceCount; + WGPUSharedFence const * fences; + uint64_t const * signaledValues; +} WGPUSharedTextureMemoryEndAccessState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_TEXTURE_MEMORY_PROPERTIES_INIT WGPU_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryProperties, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.usage=*/{} WGPU_COMMA \ - /*.size=*/WGPU_EXTENT_3D_INIT WGPU_COMMA \ - /*.format=*/{} WGPU_COMMA \ +#define WGPU_SHARED_TEXTURE_MEMORY_END_ACCESS_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryEndAccessState, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.initialized=*/0 _wgpu_COMMA \ + /*.fenceCount=*/0 _wgpu_COMMA \ + /*.fences=*/NULL _wgpu_COMMA \ + /*.signaledValues=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUSupportedLimits { - WGPUChainedStruct* nextInChain; - WGPULimits limits; -} WGPUSupportedLimits WGPU_STRUCTURE_ATTRIBUTE; +typedef struct WGPUTexelCopyBufferInfo { + WGPUTexelCopyBufferLayout layout; + WGPUBuffer buffer; +} WGPUTexelCopyBufferInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SUPPORTED_LIMITS_INIT WGPU_MAKE_INIT_STRUCT(WGPUSupportedLimits, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.limits=*/WGPU_LIMITS_INIT WGPU_COMMA \ +#define WGPU_TEXEL_COPY_BUFFER_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTexelCopyBufferInfo, { \ + /*.layout=*/WGPU_TEXEL_COPY_BUFFER_LAYOUT_INIT _wgpu_COMMA \ + /*.buffer=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUSurfaceDescriptor { - WGPUChainedStruct* nextInChain; - WGPUStringView label; -} WGPUSurfaceDescriptor WGPU_STRUCTURE_ATTRIBUTE; +typedef struct WGPUTexelCopyTextureInfo { + WGPUTexture texture; + uint32_t mipLevel; + WGPUOrigin3D origin; + WGPUTextureAspect aspect; +} WGPUTexelCopyTextureInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUSurfaceDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ +#define WGPU_TEXEL_COPY_TEXTURE_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTexelCopyTextureInfo, { \ + /*.texture=*/NULL _wgpu_COMMA \ + /*.mipLevel=*/0 _wgpu_COMMA \ + /*.origin=*/WGPU_ORIGIN_3D_INIT _wgpu_COMMA \ + /*.aspect=*/WGPUTextureAspect_Undefined _wgpu_COMMA \ }) typedef struct WGPUTextureDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; WGPUTextureUsage usage; WGPUTextureDimension dimension; @@ -3155,21 +3370,21 @@ typedef struct WGPUTextureDescriptor { WGPUTextureFormat const * viewFormats; } WGPUTextureDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_TEXTURE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUTextureDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.usage=*/{} WGPU_COMMA \ - /*.dimension=*/WGPUTextureDimension_2D WGPU_COMMA \ - /*.size=*/WGPU_EXTENT_3D_INIT WGPU_COMMA \ - /*.format=*/{} WGPU_COMMA \ - /*.mipLevelCount=*/1 WGPU_COMMA \ - /*.sampleCount=*/1 WGPU_COMMA \ - /*.viewFormatCount=*/0 WGPU_COMMA \ - /*.viewFormats=*/NULL WGPU_COMMA \ +#define WGPU_TEXTURE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTextureDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.usage=*/WGPUTextureUsage_None _wgpu_COMMA \ + /*.dimension=*/WGPUTextureDimension_Undefined _wgpu_COMMA \ + /*.size=*/WGPU_EXTENT_3D_INIT _wgpu_COMMA \ + /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ + /*.mipLevelCount=*/1 _wgpu_COMMA \ + /*.sampleCount=*/1 _wgpu_COMMA \ + /*.viewFormatCount=*/0 _wgpu_COMMA \ + /*.viewFormats=*/NULL _wgpu_COMMA \ }) typedef struct WGPUTextureViewDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; WGPUTextureFormat format; WGPUTextureViewDimension dimension; @@ -3181,129 +3396,179 @@ typedef struct WGPUTextureViewDescriptor { WGPUTextureUsage usage; } WGPUTextureViewDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_TEXTURE_VIEW_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUTextureViewDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.format=*/WGPUTextureFormat_Undefined WGPU_COMMA \ - /*.dimension=*/WGPUTextureViewDimension_Undefined WGPU_COMMA \ - /*.baseMipLevel=*/0 WGPU_COMMA \ - /*.mipLevelCount=*/WGPU_MIP_LEVEL_COUNT_UNDEFINED WGPU_COMMA \ - /*.baseArrayLayer=*/0 WGPU_COMMA \ - /*.arrayLayerCount=*/WGPU_ARRAY_LAYER_COUNT_UNDEFINED WGPU_COMMA \ - /*.aspect=*/WGPUTextureAspect_All WGPU_COMMA \ - /*.usage=*/WGPUTextureUsage_None WGPU_COMMA \ +#define WGPU_TEXTURE_VIEW_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTextureViewDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ + /*.dimension=*/WGPUTextureViewDimension_Undefined _wgpu_COMMA \ + /*.baseMipLevel=*/0 _wgpu_COMMA \ + /*.mipLevelCount=*/WGPU_MIP_LEVEL_COUNT_UNDEFINED _wgpu_COMMA \ + /*.baseArrayLayer=*/0 _wgpu_COMMA \ + /*.arrayLayerCount=*/WGPU_ARRAY_LAYER_COUNT_UNDEFINED _wgpu_COMMA \ + /*.aspect=*/WGPUTextureAspect_Undefined _wgpu_COMMA \ + /*.usage=*/WGPUTextureUsage_None _wgpu_COMMA \ }) typedef struct WGPUVertexBufferLayout { - uint64_t arrayStride; + WGPUChainedStruct * nextInChain; WGPUVertexStepMode stepMode; + uint64_t arrayStride; size_t attributeCount; WGPUVertexAttribute const * attributes; } WGPUVertexBufferLayout WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_VERTEX_BUFFER_LAYOUT_INIT WGPU_MAKE_INIT_STRUCT(WGPUVertexBufferLayout, { \ - /*.arrayStride=*/{} WGPU_COMMA \ - /*.stepMode=*/{} WGPU_COMMA \ - /*.attributeCount=*/{} WGPU_COMMA \ - /*.attributes=*/{} WGPU_COMMA \ +#define WGPU_VERTEX_BUFFER_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUVertexBufferLayout, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.stepMode=*/WGPUVertexStepMode_Undefined _wgpu_COMMA \ + /*.arrayStride=*/0 _wgpu_COMMA \ + /*.attributeCount=*/0 _wgpu_COMMA \ + /*.attributes=*/NULL _wgpu_COMMA \ +}) + +typedef struct WGPUAdapterInfo { + WGPUChainedStruct * nextInChain; + WGPUStringView vendor; + WGPUStringView architecture; + WGPUStringView device; + WGPUStringView description; + WGPUBackendType backendType; + WGPUAdapterType adapterType; + uint32_t vendorID; + uint32_t deviceID; + uint32_t subgroupMinSize; + uint32_t subgroupMaxSize; +} WGPUAdapterInfo WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_ADAPTER_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAdapterInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.vendor=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.architecture=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.device=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.description=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.backendType=*/WGPUBackendType_Undefined _wgpu_COMMA \ + /*.adapterType=*/_wgpu_ENUM_ZERO_INIT(WGPUAdapterType) _wgpu_COMMA \ + /*.vendorID=*/0 _wgpu_COMMA \ + /*.deviceID=*/0 _wgpu_COMMA \ + /*.subgroupMinSize=*/0 _wgpu_COMMA \ + /*.subgroupMaxSize=*/0 _wgpu_COMMA \ +}) + +typedef struct WGPUBindGroupDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; + WGPUBindGroupLayout layout; + size_t entryCount; + WGPUBindGroupEntry const * entries; +} WGPUBindGroupDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_BIND_GROUP_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBindGroupDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.layout=*/NULL _wgpu_COMMA \ + /*.entryCount=*/0 _wgpu_COMMA \ + /*.entries=*/NULL _wgpu_COMMA \ }) typedef struct WGPUBindGroupLayoutDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; size_t entryCount; WGPUBindGroupLayoutEntry const * entries; } WGPUBindGroupLayoutDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUBindGroupLayoutDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.entryCount=*/{} WGPU_COMMA \ - /*.entries=*/{} WGPU_COMMA \ +#define WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBindGroupLayoutDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.entryCount=*/0 _wgpu_COMMA \ + /*.entries=*/NULL _wgpu_COMMA \ }) typedef struct WGPUColorTargetState { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUTextureFormat format; WGPU_NULLABLE WGPUBlendState const * blend; WGPUColorWriteMask writeMask; } WGPUColorTargetState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COLOR_TARGET_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUColorTargetState, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.format=*/{} WGPU_COMMA \ - /*.blend=*/NULL WGPU_COMMA \ - /*.writeMask=*/WGPUColorWriteMask_All WGPU_COMMA \ +#define WGPU_COLOR_TARGET_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUColorTargetState, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ + /*.blend=*/NULL _wgpu_COMMA \ + /*.writeMask=*/WGPUColorWriteMask_All _wgpu_COMMA \ }) typedef struct WGPUCompilationInfo { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; size_t messageCount; WGPUCompilationMessage const * messages; } WGPUCompilationInfo WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COMPILATION_INFO_INIT WGPU_MAKE_INIT_STRUCT(WGPUCompilationInfo, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.messageCount=*/{} WGPU_COMMA \ - /*.messages=*/{} WGPU_COMMA \ +#define WGPU_COMPILATION_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCompilationInfo, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.messageCount=*/0 _wgpu_COMMA \ + /*.messages=*/NULL _wgpu_COMMA \ }) typedef struct WGPUComputeState { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUShaderModule module; WGPUStringView entryPoint; size_t constantCount; WGPUConstantEntry const * constants; } WGPUComputeState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COMPUTE_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUComputeState, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.module=*/{} WGPU_COMMA \ - /*.entryPoint=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.constantCount=*/0 WGPU_COMMA \ - /*.constants=*/{} WGPU_COMMA \ +#define WGPU_COMPUTE_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUComputeState, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.module=*/NULL _wgpu_COMMA \ + /*.entryPoint=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.constantCount=*/0 _wgpu_COMMA \ + /*.constants=*/NULL _wgpu_COMMA \ +}) + +typedef struct WGPUDawnFormatCapabilities { + WGPUChainedStruct * nextInChain; +} WGPUDawnFormatCapabilities WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_DAWN_FORMAT_CAPABILITIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnFormatCapabilities, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ }) typedef struct WGPUDeviceDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; size_t requiredFeatureCount; WGPUFeatureName const * requiredFeatures; - WGPU_NULLABLE WGPURequiredLimits const * requiredLimits; + WGPU_NULLABLE WGPULimits const * requiredLimits; WGPUQueueDescriptor defaultQueue; WGPUDeviceLostCallbackInfo deviceLostCallbackInfo; WGPUUncapturedErrorCallbackInfo uncapturedErrorCallbackInfo; } WGPUDeviceDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DEVICE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUDeviceDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.requiredFeatureCount=*/0 WGPU_COMMA \ - /*.requiredFeatures=*/NULL WGPU_COMMA \ - /*.requiredLimits=*/NULL WGPU_COMMA \ - /*.defaultQueue=*/WGPU_QUEUE_DESCRIPTOR_INIT WGPU_COMMA \ - /*.deviceLostCallbackInfo=*/{} WGPU_COMMA \ - /*.uncapturedErrorCallbackInfo=*/{} WGPU_COMMA \ +#define WGPU_DEVICE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDeviceDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.requiredFeatureCount=*/0 _wgpu_COMMA \ + /*.requiredFeatures=*/NULL _wgpu_COMMA \ + /*.requiredLimits=*/NULL _wgpu_COMMA \ + /*.defaultQueue=*/WGPU_QUEUE_DESCRIPTOR_INIT _wgpu_COMMA \ + /*.deviceLostCallbackInfo=*/WGPU_DEVICE_LOST_CALLBACK_INFO_INIT _wgpu_COMMA \ + /*.uncapturedErrorCallbackInfo=*/WGPU_UNCAPTURED_ERROR_CALLBACK_INFO_INIT _wgpu_COMMA \ }) -typedef struct WGPURenderPassDescriptor { - WGPUChainedStruct* nextInChain; +typedef struct WGPUPipelineLayoutDescriptor { + WGPUChainedStruct * nextInChain; WGPUStringView label; - size_t colorAttachmentCount; - WGPURenderPassColorAttachment const * colorAttachments; - WGPU_NULLABLE WGPURenderPassDepthStencilAttachment const * depthStencilAttachment; - WGPU_NULLABLE WGPUQuerySet occlusionQuerySet; - WGPU_NULLABLE WGPUPassTimestampWrites const * timestampWrites; -} WGPURenderPassDescriptor WGPU_STRUCTURE_ATTRIBUTE; + size_t bindGroupLayoutCount; + WGPU_NULLABLE WGPUBindGroupLayout const * bindGroupLayouts; + uint32_t immediateDataRangeByteSize; +} WGPUPipelineLayoutDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_RENDER_PASS_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPURenderPassDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.colorAttachmentCount=*/{} WGPU_COMMA \ - /*.colorAttachments=*/{} WGPU_COMMA \ - /*.depthStencilAttachment=*/NULL WGPU_COMMA \ - /*.occlusionQuerySet=*/NULL WGPU_COMMA \ - /*.timestampWrites=*/NULL WGPU_COMMA \ +#define WGPU_PIPELINE_LAYOUT_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUPipelineLayoutDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.bindGroupLayoutCount=*/0 _wgpu_COMMA \ + /*.bindGroupLayouts=*/NULL _wgpu_COMMA \ + /*.immediateDataRangeByteSize=*/0 _wgpu_COMMA \ }) // Can be chained in WGPURenderPassDescriptor @@ -3314,15 +3579,62 @@ typedef struct WGPURenderPassPixelLocalStorage { WGPURenderPassStorageAttachment const * storageAttachments; } WGPURenderPassPixelLocalStorage WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_RENDER_PASS_PIXEL_LOCAL_STORAGE_INIT WGPU_MAKE_INIT_STRUCT(WGPURenderPassPixelLocalStorage, { \ - /*.chain=*/{/*.nextInChain*/NULL WGPU_COMMA /*.sType*/WGPUSType_RenderPassPixelLocalStorage} WGPU_COMMA \ - /*.totalPixelLocalStorageSize=*/{} WGPU_COMMA \ - /*.storageAttachmentCount=*/0 WGPU_COMMA \ - /*.storageAttachments=*/{} WGPU_COMMA \ +#define WGPU_RENDER_PASS_PIXEL_LOCAL_STORAGE_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderPassPixelLocalStorage, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_RenderPassPixelLocalStorage _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.totalPixelLocalStorageSize=*/0 _wgpu_COMMA \ + /*.storageAttachmentCount=*/0 _wgpu_COMMA \ + /*.storageAttachments=*/NULL _wgpu_COMMA \ +}) + +typedef struct WGPUShaderModuleDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; +} WGPUShaderModuleDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHADER_MODULE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUShaderModuleDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ +}) + +typedef struct WGPUSharedTextureMemoryDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; +} WGPUSharedTextureMemoryDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHARED_TEXTURE_MEMORY_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ +}) + +typedef struct WGPUSharedTextureMemoryProperties { + WGPUChainedStruct * nextInChain; + WGPUTextureUsage usage; + WGPUExtent3D size; + WGPUTextureFormat format; +} WGPUSharedTextureMemoryProperties WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHARED_TEXTURE_MEMORY_PROPERTIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryProperties, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.usage=*/WGPUTextureUsage_None _wgpu_COMMA \ + /*.size=*/WGPU_EXTENT_3D_INIT _wgpu_COMMA \ + /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ +}) + +typedef struct WGPUSurfaceDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; +} WGPUSurfaceDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SURFACE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ }) typedef struct WGPUVertexState { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUShaderModule module; WGPUStringView entryPoint; size_t constantCount; @@ -3331,32 +3643,32 @@ typedef struct WGPUVertexState { WGPUVertexBufferLayout const * buffers; } WGPUVertexState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_VERTEX_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUVertexState, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.module=*/{} WGPU_COMMA \ - /*.entryPoint=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.constantCount=*/0 WGPU_COMMA \ - /*.constants=*/{} WGPU_COMMA \ - /*.bufferCount=*/0 WGPU_COMMA \ - /*.buffers=*/{} WGPU_COMMA \ +#define WGPU_VERTEX_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUVertexState, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.module=*/NULL _wgpu_COMMA \ + /*.entryPoint=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.constantCount=*/0 _wgpu_COMMA \ + /*.constants=*/NULL _wgpu_COMMA \ + /*.bufferCount=*/0 _wgpu_COMMA \ + /*.buffers=*/NULL _wgpu_COMMA \ }) typedef struct WGPUComputePipelineDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; WGPU_NULLABLE WGPUPipelineLayout layout; WGPUComputeState compute; } WGPUComputePipelineDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COMPUTE_PIPELINE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPUComputePipelineDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.layout=*/NULL WGPU_COMMA \ - /*.compute=*/WGPU_COMPUTE_STATE_INIT WGPU_COMMA \ +#define WGPU_COMPUTE_PIPELINE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUComputePipelineDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.layout=*/NULL _wgpu_COMMA \ + /*.compute=*/WGPU_COMPUTE_STATE_INIT _wgpu_COMMA \ }) typedef struct WGPUFragmentState { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUShaderModule module; WGPUStringView entryPoint; size_t constantCount; @@ -3365,18 +3677,38 @@ typedef struct WGPUFragmentState { WGPUColorTargetState const * targets; } WGPUFragmentState WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_FRAGMENT_STATE_INIT WGPU_MAKE_INIT_STRUCT(WGPUFragmentState, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.module=*/{} WGPU_COMMA \ - /*.entryPoint=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.constantCount=*/0 WGPU_COMMA \ - /*.constants=*/{} WGPU_COMMA \ - /*.targetCount=*/{} WGPU_COMMA \ - /*.targets=*/{} WGPU_COMMA \ +#define WGPU_FRAGMENT_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUFragmentState, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.module=*/NULL _wgpu_COMMA \ + /*.entryPoint=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.constantCount=*/0 _wgpu_COMMA \ + /*.constants=*/NULL _wgpu_COMMA \ + /*.targetCount=*/0 _wgpu_COMMA \ + /*.targets=*/NULL _wgpu_COMMA \ +}) + +typedef struct WGPURenderPassDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; + size_t colorAttachmentCount; + WGPURenderPassColorAttachment const * colorAttachments; + WGPU_NULLABLE WGPURenderPassDepthStencilAttachment const * depthStencilAttachment; + WGPU_NULLABLE WGPUQuerySet occlusionQuerySet; + WGPU_NULLABLE WGPUPassTimestampWrites const * timestampWrites; +} WGPURenderPassDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_RENDER_PASS_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderPassDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.colorAttachmentCount=*/0 _wgpu_COMMA \ + /*.colorAttachments=*/NULL _wgpu_COMMA \ + /*.depthStencilAttachment=*/NULL _wgpu_COMMA \ + /*.occlusionQuerySet=*/NULL _wgpu_COMMA \ + /*.timestampWrites=*/NULL _wgpu_COMMA \ }) typedef struct WGPURenderPipelineDescriptor { - WGPUChainedStruct* nextInChain; + WGPUChainedStruct * nextInChain; WGPUStringView label; WGPU_NULLABLE WGPUPipelineLayout layout; WGPUVertexState vertex; @@ -3386,29 +3718,21 @@ typedef struct WGPURenderPipelineDescriptor { WGPU_NULLABLE WGPUFragmentState const * fragment; } WGPURenderPipelineDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_RENDER_PIPELINE_DESCRIPTOR_INIT WGPU_MAKE_INIT_STRUCT(WGPURenderPipelineDescriptor, { \ - /*.nextInChain=*/NULL WGPU_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT WGPU_COMMA \ - /*.layout=*/NULL WGPU_COMMA \ - /*.vertex=*/WGPU_VERTEX_STATE_INIT WGPU_COMMA \ - /*.primitive=*/WGPU_PRIMITIVE_STATE_INIT WGPU_COMMA \ - /*.depthStencil=*/NULL WGPU_COMMA \ - /*.multisample=*/WGPU_MULTISAMPLE_STATE_INIT WGPU_COMMA \ - /*.fragment=*/NULL WGPU_COMMA \ +#define WGPU_RENDER_PIPELINE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderPipelineDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.layout=*/NULL _wgpu_COMMA \ + /*.vertex=*/WGPU_VERTEX_STATE_INIT _wgpu_COMMA \ + /*.primitive=*/WGPU_PRIMITIVE_STATE_INIT _wgpu_COMMA \ + /*.depthStencil=*/NULL _wgpu_COMMA \ + /*.multisample=*/WGPU_MULTISAMPLE_STATE_INIT _wgpu_COMMA \ + /*.fragment=*/NULL _wgpu_COMMA \ }) -// WGPUComputePassTimestampWrites is deprecated. -// Use WGPUPassTimestampWrites instead. -typedef WGPUPassTimestampWrites WGPUComputePassTimestampWrites; - // WGPURenderPassDescriptorMaxDrawCount is deprecated. // Use WGPURenderPassMaxDrawCount instead. typedef WGPURenderPassMaxDrawCount WGPURenderPassDescriptorMaxDrawCount; -// WGPURenderPassTimestampWrites is deprecated. -// Use WGPUPassTimestampWrites instead. -typedef WGPUPassTimestampWrites WGPURenderPassTimestampWrites; - // WGPUShaderModuleSPIRVDescriptor is deprecated. // Use WGPUShaderSourceSPIRV instead. typedef WGPUShaderSourceSPIRV WGPUShaderModuleSPIRVDescriptor; @@ -3455,6 +3779,7 @@ WGPU_EXPORT WGPUDevice emscripten_webgpu_get_device(void); typedef void (*WGPUProcAdapterInfoFreeMembers)( WGPUAdapterInfo value) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcAdapterPropertiesMemoryHeapsFreeMembers)( WGPUAdapterPropertiesMemoryHeaps value) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcAdapterPropertiesSubgroupMatrixConfigsFreeMembers)( WGPUAdapterPropertiesSubgroupMatrixConfigs value) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUInstance (*WGPUProcCreateInstance)( WGPU_NULLABLE WGPUInstanceDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcDawnDrmFormatCapabilitiesFreeMembers)( WGPUDawnDrmFormatCapabilities value) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUStatus (*WGPUProcGetInstanceCapabilities)( WGPUInstanceCapabilities * capabilities) WGPU_FUNCTION_ATTRIBUTE; @@ -3471,7 +3796,7 @@ typedef void (*WGPUProcAdapterGetFeatures)(WGPUAdapter adapter, WGPUSupportedFea typedef WGPUStatus (*WGPUProcAdapterGetFormatCapabilities)(WGPUAdapter adapter, WGPUTextureFormat format, WGPUDawnFormatCapabilities * capabilities) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUStatus (*WGPUProcAdapterGetInfo)(WGPUAdapter adapter, WGPUAdapterInfo * info) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUInstance (*WGPUProcAdapterGetInstance)(WGPUAdapter adapter) WGPU_FUNCTION_ATTRIBUTE; -typedef WGPUStatus (*WGPUProcAdapterGetLimits)(WGPUAdapter adapter, WGPUSupportedLimits * limits) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUStatus (*WGPUProcAdapterGetLimits)(WGPUAdapter adapter, WGPULimits * limits) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUBool (*WGPUProcAdapterHasFeature)(WGPUAdapter adapter, WGPUFeatureName feature) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUFuture (*WGPUProcAdapterRequestDevice)(WGPUAdapter adapter, WGPU_NULLABLE WGPUDeviceDescriptor const * options, WGPURequestDeviceCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcAdapterAddRef)(WGPUAdapter adapter) WGPU_FUNCTION_ATTRIBUTE; @@ -3495,8 +3820,10 @@ typedef void * (*WGPUProcBufferGetMappedRange)(WGPUBuffer buffer, size_t offset, typedef uint64_t (*WGPUProcBufferGetSize)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUBufferUsage (*WGPUProcBufferGetUsage)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUFuture (*WGPUProcBufferMapAsync)(WGPUBuffer buffer, WGPUMapMode mode, size_t offset, size_t size, WGPUBufferMapCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUStatus (*WGPUProcBufferReadMappedRange)(WGPUBuffer buffer, size_t offset, void * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcBufferSetLabel)(WGPUBuffer buffer, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcBufferUnmap)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUStatus (*WGPUProcBufferWriteMappedRange)(WGPUBuffer buffer, size_t offset, void const * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcBufferAddRef)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcBufferRelease)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; @@ -3510,9 +3837,9 @@ typedef WGPUComputePassEncoder (*WGPUProcCommandEncoderBeginComputePass)(WGPUCom typedef WGPURenderPassEncoder (*WGPUProcCommandEncoderBeginRenderPass)(WGPUCommandEncoder commandEncoder, WGPURenderPassDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcCommandEncoderClearBuffer)(WGPUCommandEncoder commandEncoder, WGPUBuffer buffer, uint64_t offset, uint64_t size) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcCommandEncoderCopyBufferToBuffer)(WGPUCommandEncoder commandEncoder, WGPUBuffer source, uint64_t sourceOffset, WGPUBuffer destination, uint64_t destinationOffset, uint64_t size) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcCommandEncoderCopyBufferToTexture)(WGPUCommandEncoder commandEncoder, WGPUImageCopyBuffer const * source, WGPUImageCopyTexture const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcCommandEncoderCopyTextureToBuffer)(WGPUCommandEncoder commandEncoder, WGPUImageCopyTexture const * source, WGPUImageCopyBuffer const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcCommandEncoderCopyTextureToTexture)(WGPUCommandEncoder commandEncoder, WGPUImageCopyTexture const * source, WGPUImageCopyTexture const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcCommandEncoderCopyBufferToTexture)(WGPUCommandEncoder commandEncoder, WGPUTexelCopyBufferInfo const * source, WGPUTexelCopyTextureInfo const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcCommandEncoderCopyTextureToBuffer)(WGPUCommandEncoder commandEncoder, WGPUTexelCopyTextureInfo const * source, WGPUTexelCopyBufferInfo const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcCommandEncoderCopyTextureToTexture)(WGPUCommandEncoder commandEncoder, WGPUTexelCopyTextureInfo const * source, WGPUTexelCopyTextureInfo const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUCommandBuffer (*WGPUProcCommandEncoderFinish)(WGPUCommandEncoder commandEncoder, WGPU_NULLABLE WGPUCommandBufferDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcCommandEncoderInjectValidationError)(WGPUCommandEncoder commandEncoder, WGPUStringView message) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcCommandEncoderInsertDebugMarker)(WGPUCommandEncoder commandEncoder, WGPUStringView markerLabel) WGPU_FUNCTION_ATTRIBUTE; @@ -3533,6 +3860,7 @@ typedef void (*WGPUProcComputePassEncoderInsertDebugMarker)(WGPUComputePassEncod typedef void (*WGPUProcComputePassEncoderPopDebugGroup)(WGPUComputePassEncoder computePassEncoder) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcComputePassEncoderPushDebugGroup)(WGPUComputePassEncoder computePassEncoder, WGPUStringView groupLabel) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcComputePassEncoderSetBindGroup)(WGPUComputePassEncoder computePassEncoder, uint32_t groupIndex, WGPU_NULLABLE WGPUBindGroup group, size_t dynamicOffsetCount, uint32_t const * dynamicOffsets) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcComputePassEncoderSetImmediateData)(WGPUComputePassEncoder computePassEncoder, uint32_t offset, void const * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcComputePassEncoderSetLabel)(WGPUComputePassEncoder computePassEncoder, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcComputePassEncoderSetPipeline)(WGPUComputePassEncoder computePassEncoder, WGPUComputePipeline pipeline) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcComputePassEncoderWriteTimestamp)(WGPUComputePassEncoder computePassEncoder, WGPUQuerySet querySet, uint32_t queryIndex) WGPU_FUNCTION_ATTRIBUTE; @@ -3571,7 +3899,7 @@ typedef WGPUStatus (*WGPUProcDeviceGetAHardwareBufferProperties)(WGPUDevice devi typedef WGPUAdapter (*WGPUProcDeviceGetAdapter)(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUStatus (*WGPUProcDeviceGetAdapterInfo)(WGPUDevice device, WGPUAdapterInfo * adapterInfo) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcDeviceGetFeatures)(WGPUDevice device, WGPUSupportedFeatures * features) WGPU_FUNCTION_ATTRIBUTE; -typedef WGPUStatus (*WGPUProcDeviceGetLimits)(WGPUDevice device, WGPUSupportedLimits * limits) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUStatus (*WGPUProcDeviceGetLimits)(WGPUDevice device, WGPULimits * limits) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUFuture (*WGPUProcDeviceGetLostFuture)(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUQueue (*WGPUProcDeviceGetQueue)(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUBool (*WGPUProcDeviceHasFeature)(WGPUDevice device, WGPUFeatureName feature) WGPU_FUNCTION_ATTRIBUTE; @@ -3620,13 +3948,13 @@ typedef void (*WGPUProcQuerySetAddRef)(WGPUQuerySet querySet) WGPU_FUNCTION_ATTR typedef void (*WGPUProcQuerySetRelease)(WGPUQuerySet querySet) WGPU_FUNCTION_ATTRIBUTE; // Procs of Queue -typedef void (*WGPUProcQueueCopyExternalTextureForBrowser)(WGPUQueue queue, WGPUImageCopyExternalTexture const * source, WGPUImageCopyTexture const * destination, WGPUExtent3D const * copySize, WGPUCopyTextureForBrowserOptions const * options) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcQueueCopyTextureForBrowser)(WGPUQueue queue, WGPUImageCopyTexture const * source, WGPUImageCopyTexture const * destination, WGPUExtent3D const * copySize, WGPUCopyTextureForBrowserOptions const * options) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcQueueCopyExternalTextureForBrowser)(WGPUQueue queue, WGPUImageCopyExternalTexture const * source, WGPUTexelCopyTextureInfo const * destination, WGPUExtent3D const * copySize, WGPUCopyTextureForBrowserOptions const * options) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcQueueCopyTextureForBrowser)(WGPUQueue queue, WGPUTexelCopyTextureInfo const * source, WGPUTexelCopyTextureInfo const * destination, WGPUExtent3D const * copySize, WGPUCopyTextureForBrowserOptions const * options) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUFuture (*WGPUProcQueueOnSubmittedWorkDone)(WGPUQueue queue, WGPUQueueWorkDoneCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcQueueSetLabel)(WGPUQueue queue, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcQueueSubmit)(WGPUQueue queue, size_t commandCount, WGPUCommandBuffer const * commands) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcQueueWriteBuffer)(WGPUQueue queue, WGPUBuffer buffer, uint64_t bufferOffset, void const * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcQueueWriteTexture)(WGPUQueue queue, WGPUImageCopyTexture const * destination, void const * data, size_t dataSize, WGPUTextureDataLayout const * dataLayout, WGPUExtent3D const * writeSize) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcQueueWriteTexture)(WGPUQueue queue, WGPUTexelCopyTextureInfo const * destination, void const * data, size_t dataSize, WGPUTexelCopyBufferLayout const * dataLayout, WGPUExtent3D const * writeSize) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcQueueAddRef)(WGPUQueue queue) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcQueueRelease)(WGPUQueue queue) WGPU_FUNCTION_ATTRIBUTE; @@ -3645,6 +3973,7 @@ typedef void (*WGPUProcRenderBundleEncoderInsertDebugMarker)(WGPURenderBundleEnc typedef void (*WGPUProcRenderBundleEncoderPopDebugGroup)(WGPURenderBundleEncoder renderBundleEncoder) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcRenderBundleEncoderPushDebugGroup)(WGPURenderBundleEncoder renderBundleEncoder, WGPUStringView groupLabel) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcRenderBundleEncoderSetBindGroup)(WGPURenderBundleEncoder renderBundleEncoder, uint32_t groupIndex, WGPU_NULLABLE WGPUBindGroup group, size_t dynamicOffsetCount, uint32_t const * dynamicOffsets) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcRenderBundleEncoderSetImmediateData)(WGPURenderBundleEncoder renderBundleEncoder, uint32_t offset, void const * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcRenderBundleEncoderSetIndexBuffer)(WGPURenderBundleEncoder renderBundleEncoder, WGPUBuffer buffer, WGPUIndexFormat format, uint64_t offset, uint64_t size) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcRenderBundleEncoderSetLabel)(WGPURenderBundleEncoder renderBundleEncoder, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcRenderBundleEncoderSetPipeline)(WGPURenderBundleEncoder renderBundleEncoder, WGPURenderPipeline pipeline) WGPU_FUNCTION_ATTRIBUTE; @@ -3669,6 +3998,7 @@ typedef void (*WGPUProcRenderPassEncoderPopDebugGroup)(WGPURenderPassEncoder ren typedef void (*WGPUProcRenderPassEncoderPushDebugGroup)(WGPURenderPassEncoder renderPassEncoder, WGPUStringView groupLabel) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcRenderPassEncoderSetBindGroup)(WGPURenderPassEncoder renderPassEncoder, uint32_t groupIndex, WGPU_NULLABLE WGPUBindGroup group, size_t dynamicOffsetCount, uint32_t const * dynamicOffsets) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcRenderPassEncoderSetBlendConstant)(WGPURenderPassEncoder renderPassEncoder, WGPUColor const * color) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcRenderPassEncoderSetImmediateData)(WGPURenderPassEncoder renderPassEncoder, uint32_t offset, void const * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcRenderPassEncoderSetIndexBuffer)(WGPURenderPassEncoder renderPassEncoder, WGPUBuffer buffer, WGPUIndexFormat format, uint64_t offset, uint64_t size) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcRenderPassEncoderSetLabel)(WGPURenderPassEncoder renderPassEncoder, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcRenderPassEncoderSetPipeline)(WGPURenderPassEncoder renderPassEncoder, WGPURenderPipeline pipeline) WGPU_FUNCTION_ATTRIBUTE; @@ -3760,6 +4090,7 @@ typedef void (*WGPUProcTextureViewRelease)(WGPUTextureView textureView) WGPU_FUN WGPU_EXPORT void wgpuAdapterInfoFreeMembers(WGPUAdapterInfo value) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuAdapterPropertiesMemoryHeapsFreeMembers(WGPUAdapterPropertiesMemoryHeaps value) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuAdapterPropertiesSubgroupMatrixConfigsFreeMembers(WGPUAdapterPropertiesSubgroupMatrixConfigs value) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUInstance wgpuCreateInstance(WGPU_NULLABLE WGPUInstanceDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuDawnDrmFormatCapabilitiesFreeMembers(WGPUDawnDrmFormatCapabilities value) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUStatus wgpuGetInstanceCapabilities(WGPUInstanceCapabilities * capabilities) WGPU_FUNCTION_ATTRIBUTE; @@ -3776,7 +4107,7 @@ WGPU_EXPORT void wgpuAdapterGetFeatures(WGPUAdapter adapter, WGPUSupportedFeatur WGPU_EXPORT WGPUStatus wgpuAdapterGetFormatCapabilities(WGPUAdapter adapter, WGPUTextureFormat format, WGPUDawnFormatCapabilities * capabilities) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUStatus wgpuAdapterGetInfo(WGPUAdapter adapter, WGPUAdapterInfo * info) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUInstance wgpuAdapterGetInstance(WGPUAdapter adapter) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT WGPUStatus wgpuAdapterGetLimits(WGPUAdapter adapter, WGPUSupportedLimits * limits) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPUStatus wgpuAdapterGetLimits(WGPUAdapter adapter, WGPULimits * limits) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUBool wgpuAdapterHasFeature(WGPUAdapter adapter, WGPUFeatureName feature) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUFuture wgpuAdapterRequestDevice(WGPUAdapter adapter, WGPU_NULLABLE WGPUDeviceDescriptor const * options, WGPURequestDeviceCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuAdapterAddRef(WGPUAdapter adapter) WGPU_FUNCTION_ATTRIBUTE; @@ -3800,8 +4131,10 @@ WGPU_EXPORT void * wgpuBufferGetMappedRange(WGPUBuffer buffer, size_t offset, si WGPU_EXPORT uint64_t wgpuBufferGetSize(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUBufferUsage wgpuBufferGetUsage(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUFuture wgpuBufferMapAsync(WGPUBuffer buffer, WGPUMapMode mode, size_t offset, size_t size, WGPUBufferMapCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPUStatus wgpuBufferReadMappedRange(WGPUBuffer buffer, size_t offset, void * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuBufferSetLabel(WGPUBuffer buffer, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuBufferUnmap(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPUStatus wgpuBufferWriteMappedRange(WGPUBuffer buffer, size_t offset, void const * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuBufferAddRef(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuBufferRelease(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; @@ -3815,9 +4148,9 @@ WGPU_EXPORT WGPUComputePassEncoder wgpuCommandEncoderBeginComputePass(WGPUComman WGPU_EXPORT WGPURenderPassEncoder wgpuCommandEncoderBeginRenderPass(WGPUCommandEncoder commandEncoder, WGPURenderPassDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuCommandEncoderClearBuffer(WGPUCommandEncoder commandEncoder, WGPUBuffer buffer, uint64_t offset, uint64_t size) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuCommandEncoderCopyBufferToBuffer(WGPUCommandEncoder commandEncoder, WGPUBuffer source, uint64_t sourceOffset, WGPUBuffer destination, uint64_t destinationOffset, uint64_t size) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuCommandEncoderCopyBufferToTexture(WGPUCommandEncoder commandEncoder, WGPUImageCopyBuffer const * source, WGPUImageCopyTexture const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuCommandEncoderCopyTextureToBuffer(WGPUCommandEncoder commandEncoder, WGPUImageCopyTexture const * source, WGPUImageCopyBuffer const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuCommandEncoderCopyTextureToTexture(WGPUCommandEncoder commandEncoder, WGPUImageCopyTexture const * source, WGPUImageCopyTexture const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuCommandEncoderCopyBufferToTexture(WGPUCommandEncoder commandEncoder, WGPUTexelCopyBufferInfo const * source, WGPUTexelCopyTextureInfo const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuCommandEncoderCopyTextureToBuffer(WGPUCommandEncoder commandEncoder, WGPUTexelCopyTextureInfo const * source, WGPUTexelCopyBufferInfo const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuCommandEncoderCopyTextureToTexture(WGPUCommandEncoder commandEncoder, WGPUTexelCopyTextureInfo const * source, WGPUTexelCopyTextureInfo const * destination, WGPUExtent3D const * copySize) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUCommandBuffer wgpuCommandEncoderFinish(WGPUCommandEncoder commandEncoder, WGPU_NULLABLE WGPUCommandBufferDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuCommandEncoderInjectValidationError(WGPUCommandEncoder commandEncoder, WGPUStringView message) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuCommandEncoderInsertDebugMarker(WGPUCommandEncoder commandEncoder, WGPUStringView markerLabel) WGPU_FUNCTION_ATTRIBUTE; @@ -3838,6 +4171,7 @@ WGPU_EXPORT void wgpuComputePassEncoderInsertDebugMarker(WGPUComputePassEncoder WGPU_EXPORT void wgpuComputePassEncoderPopDebugGroup(WGPUComputePassEncoder computePassEncoder) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuComputePassEncoderPushDebugGroup(WGPUComputePassEncoder computePassEncoder, WGPUStringView groupLabel) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuComputePassEncoderSetBindGroup(WGPUComputePassEncoder computePassEncoder, uint32_t groupIndex, WGPU_NULLABLE WGPUBindGroup group, size_t dynamicOffsetCount, uint32_t const * dynamicOffsets) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuComputePassEncoderSetImmediateData(WGPUComputePassEncoder computePassEncoder, uint32_t offset, void const * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuComputePassEncoderSetLabel(WGPUComputePassEncoder computePassEncoder, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuComputePassEncoderSetPipeline(WGPUComputePassEncoder computePassEncoder, WGPUComputePipeline pipeline) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuComputePassEncoderWriteTimestamp(WGPUComputePassEncoder computePassEncoder, WGPUQuerySet querySet, uint32_t queryIndex) WGPU_FUNCTION_ATTRIBUTE; @@ -3876,7 +4210,7 @@ WGPU_EXPORT WGPUStatus wgpuDeviceGetAHardwareBufferProperties(WGPUDevice device, WGPU_EXPORT WGPUAdapter wgpuDeviceGetAdapter(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUStatus wgpuDeviceGetAdapterInfo(WGPUDevice device, WGPUAdapterInfo * adapterInfo) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuDeviceGetFeatures(WGPUDevice device, WGPUSupportedFeatures * features) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT WGPUStatus wgpuDeviceGetLimits(WGPUDevice device, WGPUSupportedLimits * limits) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPUStatus wgpuDeviceGetLimits(WGPUDevice device, WGPULimits * limits) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUFuture wgpuDeviceGetLostFuture(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUQueue wgpuDeviceGetQueue(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUBool wgpuDeviceHasFeature(WGPUDevice device, WGPUFeatureName feature) WGPU_FUNCTION_ATTRIBUTE; @@ -3925,13 +4259,13 @@ WGPU_EXPORT void wgpuQuerySetAddRef(WGPUQuerySet querySet) WGPU_FUNCTION_ATTRIBU WGPU_EXPORT void wgpuQuerySetRelease(WGPUQuerySet querySet) WGPU_FUNCTION_ATTRIBUTE; // Methods of Queue -WGPU_EXPORT void wgpuQueueCopyExternalTextureForBrowser(WGPUQueue queue, WGPUImageCopyExternalTexture const * source, WGPUImageCopyTexture const * destination, WGPUExtent3D const * copySize, WGPUCopyTextureForBrowserOptions const * options) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuQueueCopyTextureForBrowser(WGPUQueue queue, WGPUImageCopyTexture const * source, WGPUImageCopyTexture const * destination, WGPUExtent3D const * copySize, WGPUCopyTextureForBrowserOptions const * options) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuQueueCopyExternalTextureForBrowser(WGPUQueue queue, WGPUImageCopyExternalTexture const * source, WGPUTexelCopyTextureInfo const * destination, WGPUExtent3D const * copySize, WGPUCopyTextureForBrowserOptions const * options) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuQueueCopyTextureForBrowser(WGPUQueue queue, WGPUTexelCopyTextureInfo const * source, WGPUTexelCopyTextureInfo const * destination, WGPUExtent3D const * copySize, WGPUCopyTextureForBrowserOptions const * options) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUFuture wgpuQueueOnSubmittedWorkDone(WGPUQueue queue, WGPUQueueWorkDoneCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuQueueSetLabel(WGPUQueue queue, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuQueueSubmit(WGPUQueue queue, size_t commandCount, WGPUCommandBuffer const * commands) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuQueueWriteBuffer(WGPUQueue queue, WGPUBuffer buffer, uint64_t bufferOffset, void const * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuQueueWriteTexture(WGPUQueue queue, WGPUImageCopyTexture const * destination, void const * data, size_t dataSize, WGPUTextureDataLayout const * dataLayout, WGPUExtent3D const * writeSize) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuQueueWriteTexture(WGPUQueue queue, WGPUTexelCopyTextureInfo const * destination, void const * data, size_t dataSize, WGPUTexelCopyBufferLayout const * dataLayout, WGPUExtent3D const * writeSize) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuQueueAddRef(WGPUQueue queue) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuQueueRelease(WGPUQueue queue) WGPU_FUNCTION_ATTRIBUTE; @@ -3950,6 +4284,7 @@ WGPU_EXPORT void wgpuRenderBundleEncoderInsertDebugMarker(WGPURenderBundleEncode WGPU_EXPORT void wgpuRenderBundleEncoderPopDebugGroup(WGPURenderBundleEncoder renderBundleEncoder) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuRenderBundleEncoderPushDebugGroup(WGPURenderBundleEncoder renderBundleEncoder, WGPUStringView groupLabel) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuRenderBundleEncoderSetBindGroup(WGPURenderBundleEncoder renderBundleEncoder, uint32_t groupIndex, WGPU_NULLABLE WGPUBindGroup group, size_t dynamicOffsetCount, uint32_t const * dynamicOffsets) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuRenderBundleEncoderSetImmediateData(WGPURenderBundleEncoder renderBundleEncoder, uint32_t offset, void const * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuRenderBundleEncoderSetIndexBuffer(WGPURenderBundleEncoder renderBundleEncoder, WGPUBuffer buffer, WGPUIndexFormat format, uint64_t offset, uint64_t size) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuRenderBundleEncoderSetLabel(WGPURenderBundleEncoder renderBundleEncoder, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuRenderBundleEncoderSetPipeline(WGPURenderBundleEncoder renderBundleEncoder, WGPURenderPipeline pipeline) WGPU_FUNCTION_ATTRIBUTE; @@ -3974,6 +4309,7 @@ WGPU_EXPORT void wgpuRenderPassEncoderPopDebugGroup(WGPURenderPassEncoder render WGPU_EXPORT void wgpuRenderPassEncoderPushDebugGroup(WGPURenderPassEncoder renderPassEncoder, WGPUStringView groupLabel) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuRenderPassEncoderSetBindGroup(WGPURenderPassEncoder renderPassEncoder, uint32_t groupIndex, WGPU_NULLABLE WGPUBindGroup group, size_t dynamicOffsetCount, uint32_t const * dynamicOffsets) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuRenderPassEncoderSetBlendConstant(WGPURenderPassEncoder renderPassEncoder, WGPUColor const * color) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuRenderPassEncoderSetImmediateData(WGPURenderPassEncoder renderPassEncoder, uint32_t offset, void const * data, size_t size) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuRenderPassEncoderSetIndexBuffer(WGPURenderPassEncoder renderPassEncoder, WGPUBuffer buffer, WGPUIndexFormat format, uint64_t offset, uint64_t size) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuRenderPassEncoderSetLabel(WGPURenderPassEncoder renderPassEncoder, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuRenderPassEncoderSetPipeline(WGPURenderPassEncoder renderPassEncoder, WGPURenderPipeline pipeline) WGPU_FUNCTION_ATTRIBUTE; From 7abfeddbec1c527fc65e14a98584bb4787f137ee Mon Sep 17 00:00:00 2001 From: Junji Hashimoto Date: Thu, 11 Sep 2025 13:51:40 +0900 Subject: [PATCH 49/54] Fix a segmentation fault of wgpuBufferRelease --- gpu.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gpu.hpp b/gpu.hpp index 4a92789..eb9d660 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -1581,7 +1581,9 @@ inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, // Begin the asynchronous mapping of the readback buffer. wgpuBufferMapAsync(cbData->buffer, WGPUMapMode_Read, 0, cbData->bufferSize, mapCallbackInfo); - wgpuBufferRelease(cbData->buffer); + + // cbData->buffer needs to be freed, but calling it here will cause a segmentation fault. + // wgpuBufferRelease(cbData->buffer); } /** From c2cdcd68096b2c79b40e4883f85871196e74db79 Mon Sep 17 00:00:00 2001 From: Junji Hashimoto Date: Fri, 12 Sep 2025 01:18:17 +0900 Subject: [PATCH 50/54] Fix the size of packed tensor --- Makefile | 3 ++ examples/hello_world/Makefile | 2 +- examples/hello_world/run.cpp | 1 + examples/matmul/Makefile | 5 ++- gpu.hpp | 72 +++++++++++++++++++++++++---------- test/test_gpu.cpp | 8 ++-- 6 files changed, 65 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index ddb1526..03d5e42 100644 --- a/Makefile +++ b/Makefile @@ -69,6 +69,9 @@ all: dawnlib check-clang check-linux-vulkan lib pch cd examples/shadertui && make build/shadertui cd examples/transpose && make build/transpose +test-gpu: dawnlib check-clang + $(LIBSPEC) && clang++ -std=c++17 -g -fsanitize=address -fno-omit-frame-pointer -Wall $(INCLUDES) test/test_gpu.cpp numeric_types/half.cpp -L$(LIBDIR) -lwebgpu_dawn -Wl,-rpath,$(GPUCPP)/third_party/lib -ldl -o build/test_gpu && ./build/test_gpu + # Test 16-bit floating point type test-half: dawnlib check-clang $(LIBSPEC) && clang++ -std=c++17 $(INCLUDES) numeric_types/half.cpp -L$(LIBDIR) -lwebgpu_dawn -ldl -o build/half && ./build/half diff --git a/examples/hello_world/Makefile b/examples/hello_world/Makefile index 575914e..5ab46ce 100644 --- a/examples/hello_world/Makefile +++ b/examples/hello_world/Makefile @@ -23,7 +23,7 @@ build/$(TARGET): run.cpp mkdir -p build && $(CXX) $(FLAGS) -DNO_LOG -o ./build/$(TARGET) debug: run.cpp - mkdir -p build && $(CXX) $(FLAGS) -g -Wall -o ./build/$(TARGET) + mkdir -p build && $(CXX) $(FLAGS) -g -fsanitize=address -fno-omit-frame-pointer -Wall -o ./build/$(TARGET) clean: read -r -p "This will delete the contents of build/*. Are you sure? [CTRL-C to abort] " response && rm -rf build/* diff --git a/examples/hello_world/run.cpp b/examples/hello_world/run.cpp index b44934b..848b51d 100644 --- a/examples/hello_world/run.cpp +++ b/examples/hello_world/run.cpp @@ -40,6 +40,7 @@ int main(int argc, char **argv) { for (int i = 0; i < N; ++i) { inputArr[i] = static_cast(i) / 10.0; // dummy input data } + std::cout << Shape{N} << std::endl; Tensor input = createTensor(ctx, Shape{N}, kf32, inputArr.data()); Tensor output = createTensor(ctx, Shape{N}, kf32); Kernel op = createKernel(ctx, {kGelu, 256, kf32}, diff --git a/examples/matmul/Makefile b/examples/matmul/Makefile index 35a8923..4be902e 100644 --- a/examples/matmul/Makefile +++ b/examples/matmul/Makefile @@ -10,11 +10,14 @@ ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/nu else STDLIB := -stdlib=libc++ endif -FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn +FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -I$(GPUCPP)/third_party/headers/webgpu -L$(GPUCPP)/third_party/lib run.cpp -ldl -lwebgpu_dawn -Wl,-rpath,$(GPUCPP)/third_party/lib run: ./build/$(TARGET) $(LIBSPEC) && ./build/$(TARGET) +debug: run.cpp + mkdir -p build && $(CXX) $(FLAGS) -g -fsanitize=address -fno-omit-frame-pointer -Wall -o ./build/$(TARGET) + run_with_metal_profiler: ./build/$(TARGET)_with_metal_profiler $(LIBSPEC) && export METAL_CAPTURE_ENABLED=1 && ./build/$(TARGET)_with_metal_profiler diff --git a/gpu.hpp b/gpu.hpp index eb9d660..668e2d5 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -71,6 +71,20 @@ struct Shape { } }; +inline std::ostream& operator<<(std::ostream& os, const Shape& shape) +{ + int size = shape.rank; + os << "Shape: ["; + for (int i=0;i(data[i]) << shift); } - return createTensor(ctx, shape, ki32, packed.data()); + Tensor tensor = createTensor(ctx, shape, ki8); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + tensor.data.size); + return tensor; } // Overload for int16_t: pack two 16‑bit ints into one 32‑bit integer @@ -843,7 +860,10 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, size_t shift = (i % 2) * 16; packed[idx] |= (static_cast(data[i]) << shift); } - return createTensor(ctx, shape, ki32, packed.data()); + Tensor tensor = createTensor(ctx, shape, ki16); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + tensor.data.size); + return tensor; } // Overload for int64_t: pack each 64‑bit int into two 32‑bit integers @@ -857,7 +877,10 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, packed[2 * i] = static_cast(val & 0xFFFFFFFF); packed[2 * i + 1] = static_cast((val >> 32) & 0xFFFFFFFF); } - return createTensor(ctx, shape, ki32, packed.data()); + Tensor tensor = createTensor(ctx, shape, ki64); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + tensor.data.size); + return tensor; } inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, @@ -885,7 +908,10 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, size_t shift = (i % 4) * 8; packed[idx] |= (static_cast(data[i]) << shift); } - return createTensor(ctx, shape, ku32, packed.data()); + Tensor tensor = createTensor(ctx, shape, ku8); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + tensor.data.size); + return tensor; } // Overload for uint16_t: pack two 16‑bit integers into one 32‑bit unsigned @@ -901,7 +927,10 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, size_t shift = (i % 2) * 16; packed[idx] |= (static_cast(data[i]) << shift); } - return createTensor(ctx, shape, ku32, packed.data()); + Tensor tensor = createTensor(ctx, shape, ku16); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + tensor.data.size); + return tensor; } // Overload for uint64_t: pack each 64‑bit integer into two 32‑bit unsigned @@ -916,7 +945,10 @@ inline Tensor createTensor(Context &ctx, const Shape &shape, NumType dtype, packed[2 * i] = static_cast(val & 0xFFFFFFFF); packed[2 * i + 1] = static_cast(val >> 32); } - return createTensor(ctx, shape, ku32, packed.data()); + Tensor tensor = createTensor(ctx, shape, ku64); + wgpuQueueWriteBuffer(ctx.queue, tensor.data.buffer, 0, packed.data(), + tensor.data.size); + return tensor; } /** @@ -1987,7 +2019,7 @@ inline void toCPU(Context &ctx, WGPUBuffer buffer, NumType dtype, void *output, case kf32: case ku32: case ki32: { - size_t byteSize = numElements * sizeBytes(dtype); + size_t byteSize = sizeBytes(dtype, numElements); toCPU(ctx, buffer, output, byteSize, sourceOffset); break; } diff --git a/test/test_gpu.cpp b/test/test_gpu.cpp index 8b7a436..efed592 100644 --- a/test/test_gpu.cpp +++ b/test/test_gpu.cpp @@ -415,11 +415,11 @@ void testNumTypeSizes() { assert(sizeBytes(kf16) == 2); assert(sizeBytes(kf32) == 4); - assert(sizeBytes(ki8) == sizeof(uint8_t)); // typically 1 - assert(sizeBytes(ki16) == sizeof(uint16_t)); // typically 2 + assert(sizeBytes(ki8) == sizeof(uint32_t)); // ki8 is packed into uint32_t. + assert(sizeBytes(ki16) == sizeof(uint32_t)); // ki16 is packed into uint32_t. assert(sizeBytes(ki32) == sizeof(int32_t)); // typically 4 - assert(sizeBytes(ku8) == sizeof(uint8_t)); // typically 1 - assert(sizeBytes(ku16) == sizeof(uint16_t)); // typically 2 + assert(sizeBytes(ku8) == sizeof(uint32_t)); // ku8 is packed into uint32_t. + assert(sizeBytes(ku16) == sizeof(uint32_t)); // ku16 is packed into uint32_t. assert(sizeBytes(ku32) == sizeof(uint32_t)); // typically 4 LOG(kDefLog, kInfo, "testNumTypeSizes passed."); From cd6e64dc8d57125e1e8b31179fcf79a6ef3026c7 Mon Sep 17 00:00:00 2001 From: Junji Hashimoto Date: Mon, 29 Sep 2025 12:49:35 +0900 Subject: [PATCH 51/54] Remove iostream to output the shape --- examples/hello_world/run.cpp | 1 - gpu.hpp | 14 -------------- 2 files changed, 15 deletions(-) diff --git a/examples/hello_world/run.cpp b/examples/hello_world/run.cpp index 848b51d..b44934b 100644 --- a/examples/hello_world/run.cpp +++ b/examples/hello_world/run.cpp @@ -40,7 +40,6 @@ int main(int argc, char **argv) { for (int i = 0; i < N; ++i) { inputArr[i] = static_cast(i) / 10.0; // dummy input data } - std::cout << Shape{N} << std::endl; Tensor input = createTensor(ctx, Shape{N}, kf32, inputArr.data()); Tensor output = createTensor(ctx, Shape{N}, kf32); Kernel op = createKernel(ctx, {kGelu, 256, kf32}, diff --git a/gpu.hpp b/gpu.hpp index 668e2d5..f873540 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -71,20 +71,6 @@ struct Shape { } }; -inline std::ostream& operator<<(std::ostream& os, const Shape& shape) -{ - int size = shape.rank; - os << "Shape: ["; - for (int i=0;i Date: Mon, 29 Sep 2025 13:30:39 +0900 Subject: [PATCH 52/54] Replace cmake/dawn.cmake with minigpu_ffi's one --- cmake/dawn.cmake | 470 ++++--- gpu.hpp | 2 + third_party/headers/webgpu/webgpu.h | 1878 ++++++++++++++++----------- 3 files changed, 1432 insertions(+), 918 deletions(-) diff --git a/cmake/dawn.cmake b/cmake/dawn.cmake index 90d9978..d9cbfc9 100644 --- a/cmake/dawn.cmake +++ b/cmake/dawn.cmake @@ -1,182 +1,288 @@ -cmake_minimum_required(VERSION 3.14) - -include(ExternalProject) -include(FetchContent) - -# include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/print_target.cmake") - - -# Setup directories and basic paths -set(FETCHCONTENT_BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external") -set(DAWN_DIR "${FETCHCONTENT_BASE_DIR}/dawn" CACHE INTERNAL "Dawn source directory") - -# For Emscripten builds (if desired) -set(EM_SDK_DIR $ENV{EMSDK} CACHE INTERNAL "") -set(EMSCRIPTEN_DIR "${EM_SDK_DIR}/upstream/emscripten" CACHE INTERNAL "") - -# Decide where to build Dawn’s build files. -if(EMSCRIPTEN) - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_web" CACHE INTERNAL "web build directory" FORCE) -elseif(WIN32) - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_win" CACHE INTERNAL "windows build directory" FORCE) -elseif(IOS) - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_ios" CACHE INTERNAL "ios build directory" FORCE) -elseif(APPLE) - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_mac" CACHE INTERNAL "mac build directory" FORCE) -elseif(ANDROID) - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_android" CACHE INTERNAL "android build directory" FORCE) -else() - set(DAWN_BUILD_DIR "${DAWN_DIR}/build_unix" CACHE INTERNAL "linux build directory" FORCE) -endif() - -# Add Dawn header include directories so that they are available later. -include_directories(BEFORE PUBLIC - "${DAWN_BUILD_DIR}/src/dawn/native/" - "${DAWN_BUILD_DIR}/src/dawn/native/Debug" - "${DAWN_BUILD_DIR}/src/dawn/native/Release" -) - - -# Optionally try to find an existing Dawn build. -set(ENABLE_DAWN_FIND OFF CACHE BOOL "Attempt to find an existing Dawn build" FORCE) -set(DAWN_BUILD_FOUND OFF CACHE BOOL "Dawn build found" FORCE) - -if(ENABLE_DAWN_FIND) - message(STATUS "Attempting to find an existing Dawn build...") - if(WIN32) - find_library(WEBGPU_DAWN_DEBUG NAMES webgpu_dawn HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Debug") - find_library(WEBGPU_DAWN_RELEASE NAMES webgpu_dawn HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Release") - - if(WEBGPU_DAWN_DEBUG OR WEBGPU_DAWN_RELEASE) - message(STATUS "Dawn build found on Windows. Debug: ${WEBGPU_DAWN_DEBUG}, Release: ${WEBGPU_DAWN_RELEASE}") - set(DAWN_BUILD_FOUND ON) - endif() - elseif(NOT EMSCRIPTEN AND NOT WIN32) - find_library(WEBGPU_DAWN_LIB NAMES webgpu_dawn.so PATHS "${DAWN_BUILD_DIR}/src/dawn/native") - - if(WEBGPU_DAWN_LIB) - message(STATUS "Dawn build found on Linux/Unix. Library: ${WEBGPU_DAWN_LIB}") - set(DAWN_BUILD_FOUND ON) - endif() - endif() -endif() - - -# Pre-build Dawn at configuration time if not already built. -if(NOT DAWN_BUILD_FOUND) - message(STATUS "Dawn build not found - pre-building Dawn.") - - set(DAWN_ALWAYS_ASSERT ON CACHE INTERNAL "Always assert in Dawn" FORCE) - set(DAWN_BUILD_PROTOBUF OFF CACHE INTERNAL "Build protobuf" FORCE) - set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE INTERNAL "Build Dawn monolithically" FORCE) - set(DAWN_BUILD_EXAMPLES OFF CACHE INTERNAL "Build Dawn examples" FORCE) - set(DAWN_BUILD_SAMPLES OFF CACHE INTERNAL "Build Dawn samples" FORCE) - set(DAWN_BUILD_TESTS OFF CACHE INTERNAL "Build Dawn tests" FORCE) - set(DAWN_ENABLE_INSTALL ON CACHE INTERNAL "Enable Dawn installation" FORCE) - set(DAWN_FETCH_DEPENDENCIES ON CACHE INTERNAL "Fetch Dawn dependencies" FORCE) - set(TINT_BUILD_TESTS OFF CACHE INTERNAL "Build Tint Tests" FORCE) - set(TINT_BUILD_IR_BINARY OFF CACHE INTERNAL "Build Tint IR binary" FORCE) - set(TINT_BUILD_CMD_TOOLS OFF CACHE INTERNAL "Build Tint command line tools" FORCE) - set(TINT_BUILD_DOCS OFF CACHE INTERNAL "Build Tint docs" FORCE) - set(DAWN_EMSCRIPTEN_TOOLCHAIN ${EMSCRIPTEN_DIR} CACHE INTERNAL "Emscripten toolchain" FORCE) - - set(DAWN_COMMIT "66d57f910357befb441b91162f29a97f687af6d9" CACHE STRING "Dawn commit to checkout" FORCE) - - file(MAKE_DIRECTORY ${DAWN_DIR}) - # Initialize Git and set/update remote. - execute_process(COMMAND git init - WORKING_DIRECTORY "${DAWN_DIR}" - ) - execute_process( - COMMAND git remote add origin https://dawn.googlesource.com/dawn - WORKING_DIRECTORY "${DAWN_DIR}" - ) - # Fetch and checkout the specified commit. - execute_process( - COMMAND git fetch origin ${DAWN_COMMIT} - WORKING_DIRECTORY "${DAWN_DIR}" - ) - execute_process( - COMMAND git checkout ${DAWN_COMMIT} - WORKING_DIRECTORY "${DAWN_DIR}" - ) - execute_process( - COMMAND git submodule update --init third_party/abseil-cpp - WORKING_DIRECTORY "${DAWN_DIR}" - ) - execute_process( - COMMAND git reset --hard ${DAWN_COMMIT} - WORKING_DIRECTORY "${DAWN_DIR}" - ) - - if(APPLE) - set(ABSEIL_COPTS_FILE "${DAWN_DIR}/third_party/abseil-cpp/absl/copts/GENERATED_AbseilCopts.cmake") - if(EXISTS "${ABSEIL_COPTS_FILE}") - file(READ "${ABSEIL_COPTS_FILE}" COPTS_CONTENT) - string(REGEX REPLACE "-msse4\\.1" "" COPTS_CONTENT "${COPTS_CONTENT}") - file(WRITE "${ABSEIL_COPTS_FILE}" "${COPTS_CONTENT}") - endif() - endif() - -# Fetch the Dawn repository if not already present. - FetchContent_Declare( - dawn - SOURCE_DIR ${DAWN_DIR} - SUBBUILD_DIR ${DAWN_BUILD_DIR}/tmp - BINARY_DIR ${DAWN_BUILD_DIR} - ) - FetchContent_MakeAvailable(dawn) - - set(CMAKE_INCLUDE_PATH "${CMAKE_INCLUDE_PATH};${DAWN_DIR}/src" CACHE INTERNAL "") - - set(DAWN_BUILD_FOUND ON) -endif() # End pre-build Dawn - -# Create an IMPORTED target for the Dawn library. -# Adjust the expected output name/extension per platform. -if(MSVC) -message(STATUS "Dawn build found on Windows.") -# MSVC: use separate debug and release dlls. -if((NOT WEBGPU_DAWN_DEBUG) OR (WEBGPU_DAWN_DEBUG MATCHES "NOTFOUND")) - find_library(WEBGPU_DAWN_DEBUG NAMES webgpu_dawn PATHS "${DAWN_BUILD_DIR}/src/dawn/native/Debug") -endif() -if((NOT WEBGPU_DAWN_RELEASE) OR (WEBGPU_DAWN_RELEASE MATCHES "NOTFOUND")) - find_library(WEBGPU_DAWN_RELEASE NAMES webgpu_dawn PATHS "${DAWN_BUILD_DIR}/src/dawn/native/Release") -endif() - -if(WEBGPU_DAWN_DEBUG OR WEBGPU_DAWN_RELEASE) - if(NOT TARGET webgpu_dawn) - add_library(webgpu_dawn INTERFACE) - target_link_libraries(webgpu_dawn INTERFACE - $<$:${WEBGPU_DAWN_DEBUG}> - $<$:${WEBGPU_DAWN_RELEASE}> - ) - endif() -endif() -elseif(IOS) - # On iOS, it is common to build a static library. - if(NOT TARGET webgpu_dawn) - add_library(webgpu_dawn STATIC IMPORTED) - set_target_properties(webgpu_dawn PROPERTIES - IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.a") - endif() -elseif(APPLE) - # On macOS (non-iOS), typically a dynamic library (.dylib) is built. - if(NOT TARGET webgpu_dawn) - add_library(webgpu_dawn SHARED IMPORTED) - set_target_properties(webgpu_dawn PROPERTIES - IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.dylib") - endif() -elseif(ANDROID) - if(NOT TARGET webgpu_dawn) - add_library(webgpu_dawn SHARED IMPORTED) - set_target_properties(webgpu_dawn PROPERTIES - IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.so") - endif() -elseif(NOT EMSCRIPTEN) # For Linux and other Unix-like systems. - if(NOT TARGET webgpu_dawn) - add_library(webgpu_dawn SHARED IMPORTED) - set_target_properties(webgpu_dawn PROPERTIES - IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/webgpu_dawn.so") - endif() -endif() +cmake_minimum_required(VERSION 3.14) + +include(ExternalProject) +include(FetchContent) + +# include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/print_target.cmake") + +# Optionally try to find an existing Dawn build. +set(ENABLE_DAWN_FIND ON CACHE BOOL "Attempt to find an existing Dawn build" FORCE) +set(DAWN_BUILD_FOUND OFF CACHE BOOL "Dawn build found" FORCE) + +# Setup directories and basic paths +set(FETCHCONTENT_BASE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external") +set(DAWN_DIR "${FETCHCONTENT_BASE_DIR}/dawn" CACHE INTERNAL "Dawn source directory") + +# For Emscripten builds (if desired) +set(EM_SDK_DIR $ENV{EMSDK} CACHE INTERNAL "") +set(EMSCRIPTEN_DIR "${EM_SDK_DIR}/upstream/emscripten" CACHE INTERNAL "") + +# Detect and normalize target architecture +# This will be used to make the Dawn build directory arch-specific. +set(_raw_arch "${CMAKE_SYSTEM_PROCESSOR}") +if(EMSCRIPTEN) + set(_raw_arch "wasm32") +elseif(APPLE) + # Prefer CMAKE_OSX_ARCHITECTURES when provided (can be a list) + if(DEFINED CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES STREQUAL "") + list(LENGTH CMAKE_OSX_ARCHITECTURES _num_osx_archs) + if(_num_osx_archs GREATER 1) + message(WARNING "Multiple CMAKE_OSX_ARCHITECTURES set: ${CMAKE_OSX_ARCHITECTURES}. Using the first for Dawn build selection.") + endif() + list(GET CMAKE_OSX_ARCHITECTURES 0 _raw_arch) + endif() +elseif(ANDROID) + # Use the ABI name when available (e.g., arm64-v8a, armeabi-v7a, x86_64) + if(DEFINED ANDROID_ABI AND NOT ANDROID_ABI STREQUAL "") + set(_raw_arch "${ANDROID_ABI}") + endif() +elseif(WIN32) + set(DAWN_ENABLE_VULKAN OFF CACHE INTERNAL "Always assert in Dawn" FORCE) + set(DAWN_FORCE_SYSTEM_COMPONENT_LOAD ON CACHE INTERNAL " " FORCE) + # Prefer generator platform when present (e.g., x64, Win32, ARM64) + if(DEFINED CMAKE_GENERATOR_PLATFORM AND NOT CMAKE_GENERATOR_PLATFORM STREQUAL "") + set(_raw_arch "${CMAKE_GENERATOR_PLATFORM}") + endif() +endif() + +string(TOLOWER "${_raw_arch}" _arch) +# Normalize common variants +if(_arch STREQUAL "amd64" OR _arch STREQUAL "x64") + set(_arch "x86_64") +elseif(_arch STREQUAL "aarch64") + set(_arch "arm64") +elseif(_arch STREQUAL "armv7-a" OR _arch STREQUAL "armeabi-v7a") + set(_arch "armv7") +elseif(_arch MATCHES "arm64[-_]?v8a") + set(_arch "arm64-v8a") +elseif(_arch STREQUAL "" OR _arch STREQUAL "unknown") + set(_arch "unknown") +endif() + +set(DAWN_ARCH "${_arch}" CACHE INTERNAL "Target architecture for Dawn" FORCE) + +# Decide where to build Dawn’s build files (now arch-aware). +if(EMSCRIPTEN) + set(_dawn_build_os "web") +elseif(WIN32) + set(_dawn_build_os "win") +elseif(IOS) + set(_dawn_build_os "ios") +elseif(APPLE) + set(_dawn_build_os "mac") +elseif(ANDROID) + set(_dawn_build_os "android") +else() + set(_dawn_build_os "unix") +endif() + +set(DAWN_BUILD_DIR "${DAWN_DIR}/build_${_dawn_build_os}_${DAWN_ARCH}" CACHE INTERNAL "arch-specific build directory" FORCE) +message(STATUS "Dawn: target OS=${_dawn_build_os}, arch=${DAWN_ARCH}, build dir=${DAWN_BUILD_DIR}") + +# Ensure Dawn/Tint inherit iOS 13+ (important for std::filesystem availability) +if(IOS) + set(DAWN_USE_GLFW OFF CACHE INTERNAL "" FORCE) + if(NOT DEFINED MINIGPU_IOS_DEPLOYMENT_TARGET) + set(MINIGPU_IOS_DEPLOYMENT_TARGET "16.0" CACHE STRING "Minimum iOS version" FORCE) + endif() + set(CMAKE_OSX_DEPLOYMENT_TARGET "${MINIGPU_IOS_DEPLOYMENT_TARGET}" CACHE STRING "" FORCE) + set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${MINIGPU_IOS_DEPLOYMENT_TARGET}" CACHE STRING "" FORCE) + set(CMAKE_XCODE_ATTRIBUTE_IPHONESIMULATOR_DEPLOYMENT_TARGET "${MINIGPU_IOS_DEPLOYMENT_TARGET}" CACHE STRING "" FORCE) + + # If not using the Xcode generator, also force min-version flags + if(CMAKE_GENERATOR MATCHES "Unix Makefiles|Ninja") + if(CMAKE_OSX_SYSROOT MATCHES "iphonesimulator") + add_compile_options(-mios-simulator-version-min=${MINIGPU_IOS_DEPLOYMENT_TARGET}) + add_link_options(-mios-simulator-version-min=${MINIGPU_IOS_DEPLOYMENT_TARGET}) + else() + add_compile_options(-miphoneos-version-min=${MINIGPU_IOS_DEPLOYMENT_TARGET}) + add_link_options(-miphoneos-version-min=${MINIGPU_IOS_DEPLOYMENT_TARGET}) + endif() + endif() +endif() + +# Add Dawn header include directories so that they are available later. +include_directories(BEFORE PUBLIC + "${DAWN_BUILD_DIR}/src/dawn/native/" + "${DAWN_BUILD_DIR}/src/dawn/native/Debug" + "${DAWN_BUILD_DIR}/src/dawn/native/Release" +) + +if(ENABLE_DAWN_FIND) + message(STATUS "Attempting to find an existing Dawn build...") + if(WIN32) + find_library(WEBGPU_DAWN_DEBUG NAMES webgpu_dawn HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Debug") + find_library(WEBGPU_DAWN_RELEASE NAMES webgpu_dawn HINTS "${DAWN_BUILD_DIR}/src/dawn/native/Release") + if(WEBGPU_DAWN_DEBUG OR WEBGPU_DAWN_RELEASE) + message(STATUS "Dawn build found on Windows. Debug: ${WEBGPU_DAWN_DEBUG}, Release: ${WEBGPU_DAWN_RELEASE}") + set(DAWN_BUILD_FOUND ON) + endif() + elseif(NOT EMSCRIPTEN AND NOT WIN32) + find_library(WEBGPU_DAWN_LIB NAMES webgpu_dawn.so PATHS "${DAWN_BUILD_DIR}/src/dawn/native") + + if(WEBGPU_DAWN_LIB) + message(STATUS "Dawn build found on Linux/Unix. Library: ${WEBGPU_DAWN_LIB}") + set(DAWN_BUILD_FOUND ON) + endif() + endif() +endif() + +# Pre-build Dawn at configuration time if not already built. +if(NOT DAWN_BUILD_FOUND) + message(STATUS "Dawn build not found - pre-building Dawn.") + + # Dawn options + set(DAWN_ALWAYS_ASSERT OFF CACHE BOOL "" FORCE) + set(DAWN_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) + set(DAWN_BUILD_SAMPLES OFF CACHE BOOL "" FORCE) + set(DAWN_BUILD_TESTS OFF CACHE BOOL "" FORCE) + set(DAWN_ENABLE_INSTALL OFF CACHE BOOL "" FORCE) + set(DAWN_FETCH_DEPENDENCIES ON CACHE BOOL "" FORCE) + set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_CMD_TOOLS OFF CACHE BOOL "" FORCE) + set(DAWN_ENABLE_GLFW OFF CACHE BOOL "" FORCE) + set(DAWN_USE_GLFW OFF CACHE BOOL "" FORCE) + set(DAWN_BUILD_MONOLITHIC_LIBRARY SHARED CACHE STRING "Monolithic library type" FORCE) + + # iOS minimum version (std::filesystem availability, simulator) + if(IOS) + if(NOT DEFINED MINIGPU_IOS_DEPLOYMENT_TARGET) + set(MINIGPU_IOS_DEPLOYMENT_TARGET "13.0" CACHE STRING "" FORCE) + endif() + set(CMAKE_OSX_DEPLOYMENT_TARGET "${MINIGPU_IOS_DEPLOYMENT_TARGET}" CACHE STRING "" FORCE) + set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${MINIGPU_IOS_DEPLOYMENT_TARGET}" CACHE STRING "" FORCE) + set(CMAKE_XCODE_ATTRIBUTE_IPHONESIMULATOR_DEPLOYMENT_TARGET "${MINIGPU_IOS_DEPLOYMENT_TARGET}" CACHE STRING "" FORCE) + + # For non-Xcode generators, also force min-version flags + if(CMAKE_GENERATOR MATCHES "Unix Makefiles|Ninja") + if(CMAKE_OSX_SYSROOT MATCHES "iphonesimulator") + add_compile_options(-mios-simulator-version-min=${MINIGPU_IOS_DEPLOYMENT_TARGET}) + add_link_options(-mios-simulator-version-min=${MINIGPU_IOS_DEPLOYMENT_TARGET}) + else() + add_compile_options(-miphoneos-version-min=${MINIGPU_IOS_DEPLOYMENT_TARGET}) + add_link_options(-miphoneos-version-min=${MINIGPU_IOS_DEPLOYMENT_TARGET}) + endif() + endif() + endif() + + # Ensure source present on required commit (idempotent remote setup) + if(NOT DEFINED DAWN_COMMIT OR DAWN_COMMIT STREQUAL "") + set(DAWN_COMMIT "e1d6e12337080cf9f6d8726209e86df449bc6e9a" CACHE STRING "Dawn commit to checkout" FORCE) + endif() + file(MAKE_DIRECTORY ${DAWN_DIR}) + execute_process(COMMAND git init WORKING_DIRECTORY "${DAWN_DIR}") + execute_process( + COMMAND git remote get-url origin + WORKING_DIRECTORY "${DAWN_DIR}" + RESULT_VARIABLE _have_origin + OUTPUT_QUIET ERROR_QUIET + ) + if(_have_origin EQUAL 0) + execute_process(COMMAND git remote set-url origin https://dawn.googlesource.com/dawn WORKING_DIRECTORY "${DAWN_DIR}") + else() + execute_process(COMMAND git remote add origin https://dawn.googlesource.com/dawn WORKING_DIRECTORY "${DAWN_DIR}") + endif() + execute_process(COMMAND git fetch origin ${DAWN_COMMIT} WORKING_DIRECTORY "${DAWN_DIR}") + execute_process(COMMAND git checkout ${DAWN_COMMIT} WORKING_DIRECTORY "${DAWN_DIR}") + execute_process(COMMAND git reset --hard ${DAWN_COMMIT} WORKING_DIRECTORY "${DAWN_DIR}") + + # Set kIOMainPortDefault to 0 + if(APPLE) + set(PORTDEFAULT_FILE "${DAWN_DIR}/src/dawn/native/metal/PhysicalDeviceMTL.mm") + if(EXISTS "${PORTDEFAULT_FILE}") + file(READ "${PORTDEFAULT_FILE}" PORTDEFAULT_CONTENT) + string(REGEX REPLACE "kIOMainPortDefault" "0" PORTDEFAULT_CONTENT "${PORTDEFAULT_CONTENT}") + file(WRITE "${PORTDEFAULT_FILE}" "${PORTDEFAULT_CONTENT}") + endif() + endif() + + FetchContent_Declare( + dawn + SOURCE_DIR ${DAWN_DIR} + SUBBUILD_DIR ${DAWN_BUILD_DIR}/tmp + BINARY_DIR ${DAWN_BUILD_DIR} + ) + FetchContent_MakeAvailable(dawn) + + set(CMAKE_INCLUDE_PATH "${CMAKE_INCLUDE_PATH};${DAWN_DIR}/src" CACHE INTERNAL "") + set(DAWN_BUILD_FOUND ON) +endif() # End pre-build Dawn + +# Create an IMPORTED target that matches the monolithic output +if(TARGET webgpu_dawn) + # Dawn already created it in this project; use it directly +else() + if(IOS) + # Xcode config suffix: Debug-iphoneos/Debug-iphonesimulator etc. + if(CMAKE_OSX_SYSROOT MATCHES "iphonesimulator") + set(_ios_conf_suffix "-iphonesimulator") + else() + set(_ios_conf_suffix "-iphoneos") + endif() + add_library(webgpu_dawn STATIC IMPORTED) + # Monolithic static archive name is libwebgpu_dawn.a + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION_DEBUG "${DAWN_BUILD_DIR}/src/dawn/native/Debug${_ios_conf_suffix}/libwebgpu_dawn.a" + IMPORTED_LOCATION_RELEASE "${DAWN_BUILD_DIR}/src/dawn/native/Release${_ios_conf_suffix}/libwebgpu_dawn.a" + IMPORTED_LOCATION_RELWITHDEBINFO "${DAWN_BUILD_DIR}/src/dawn/native/RelWithDebInfo${_ios_conf_suffix}/libwebgpu_dawn.a" + IMPORTED_LOCATION_MINSIZEREL "${DAWN_BUILD_DIR}/src/dawn/native/MinSizeRel${_ios_conf_suffix}/libwebgpu_dawn.a" + ) + elseif(APPLE) + # macOS: prefer shared monolithic dylib; fallback to static if needed + if(EXISTS "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.dylib") + add_library(webgpu_dawn SHARED IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.dylib" + ) + elseif(EXISTS "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.a") + add_library(webgpu_dawn STATIC IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.a" + ) + endif() + elseif(ANDROID) + if(EXISTS "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.so") + add_library(webgpu_dawn SHARED IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.so" + ) + elseif(EXISTS "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.a") + add_library(webgpu_dawn STATIC IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.a" + ) + endif() + elseif(WIN32) + message(STATUS "Dawn build found on Windows.") +# MSVC: use separate debug and release dlls. +if((NOT WEBGPU_DAWN_DEBUG) OR (WEBGPU_DAWN_DEBUG MATCHES "NOTFOUND")) + find_library(WEBGPU_DAWN_DEBUG NAMES webgpu_dawn PATHS "${DAWN_BUILD_DIR}/src/dawn/native/Debug") +endif() +if((NOT WEBGPU_DAWN_RELEASE) OR (WEBGPU_DAWN_RELEASE MATCHES "NOTFOUND")) + find_library(WEBGPU_DAWN_RELEASE NAMES webgpu_dawn PATHS "${DAWN_BUILD_DIR}/src/dawn/native/Release") +endif() + +if(WEBGPU_DAWN_DEBUG OR WEBGPU_DAWN_RELEASE) + if(NOT TARGET webgpu_dawn) + add_library(webgpu_dawn INTERFACE) + target_link_libraries(webgpu_dawn INTERFACE + $<$:${WEBGPU_DAWN_DEBUG}> + $<$:${WEBGPU_DAWN_RELEASE}> + ) + endif() +endif() + else() # Linux/Unix + if(EXISTS "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.so") + add_library(webgpu_dawn SHARED IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.so" + ) + elseif(EXISTS "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.a") + add_library(webgpu_dawn STATIC IMPORTED) + set_target_properties(webgpu_dawn PROPERTIES + IMPORTED_LOCATION "${DAWN_BUILD_DIR}/src/dawn/native/libwebgpu_dawn.a" + ) + endif() + endif() +endif() diff --git a/gpu.hpp b/gpu.hpp index f873540..d1758b1 100644 --- a/gpu.hpp +++ b/gpu.hpp @@ -1581,6 +1581,7 @@ inline void bufferMapCallback(WGPUMapAsyncStatus status, WGPUStringView message, * @param userdata2 Unused. */ inline void queueWorkDoneCallback(WGPUQueueWorkDoneStatus status, + WGPUStringView message, void *userdata1, void * /*userdata2*/) { const CallbackData *cbData = static_cast(userdata1); // Ensure the queue work finished successfully. @@ -2824,6 +2825,7 @@ Kernel createKernel(Context &ctx, const KernelCode &code, * @param userdata2 Unused. */ inline void dispatchKernelCallback(WGPUQueueWorkDoneStatus status, + WGPUStringView message, void *userdata1, void * /*userdata2*/) { // Cast the userdata pointer back to our heap‑allocated promise. auto *p = reinterpret_cast *>(userdata1); diff --git a/third_party/headers/webgpu/webgpu.h b/third_party/headers/webgpu/webgpu.h index deea339..988997a 100644 --- a/third_party/headers/webgpu/webgpu.h +++ b/third_party/headers/webgpu/webgpu.h @@ -27,7 +27,6 @@ // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - #ifdef __EMSCRIPTEN__ #error "Do not include this header. Emscripten already provides headers needed for WebGPU." #endif @@ -35,10 +34,6 @@ #ifndef WEBGPU_H_ #define WEBGPU_H_ -#define WGPU_BREAKING_CHANGE_STRING_VIEW_LABELS -#define WGPU_BREAKING_CHANGE_STRING_VIEW_OUTPUT_STRUCTS -#define WGPU_BREAKING_CHANGE_STRING_VIEW_CALLBACKS - #if defined(WGPU_SHARED_LIBRARY) # if defined(_WIN32) # if defined(WGPU_IMPLEMENTATION) @@ -77,6 +72,7 @@ #include #include +#define _wgpu_COMMA , #if defined(__cplusplus) # define _wgpu_ENUM_ZERO_INIT(type) type(0) # define _wgpu_STRUCT_ZERO_INIT {} @@ -95,17 +91,29 @@ # endif #endif -#define WGPU_ARRAY_LAYER_COUNT_UNDEFINED UINT32_MAX -#define WGPU_COPY_STRIDE_UNDEFINED UINT32_MAX -#define WGPU_DEPTH_CLEAR_VALUE_UNDEFINED NAN -#define WGPU_DEPTH_SLICE_UNDEFINED UINT32_MAX -#define WGPU_LIMIT_U32_UNDEFINED UINT32_MAX -#define WGPU_LIMIT_U64_UNDEFINED UINT64_MAX -#define WGPU_MIP_LEVEL_COUNT_UNDEFINED UINT32_MAX -#define WGPU_QUERY_SET_INDEX_UNDEFINED UINT32_MAX -#define WGPU_STRLEN SIZE_MAX -#define WGPU_WHOLE_MAP_SIZE SIZE_MAX -#define WGPU_WHOLE_SIZE UINT64_MAX +#define WGPU_TRUE (UINT32_C(1)) +#define WGPU_FALSE (UINT32_C(0)) +#define WGPU_ARRAY_LAYER_COUNT_UNDEFINED (UINT32_MAX) +#define WGPU_COPY_STRIDE_UNDEFINED (UINT32_MAX) +#define WGPU_DEPTH_CLEAR_VALUE_UNDEFINED (NAN) +#define WGPU_DEPTH_SLICE_UNDEFINED (UINT32_MAX) +#define WGPU_LIMIT_U32_UNDEFINED (UINT32_MAX) +#define WGPU_LIMIT_U64_UNDEFINED (UINT64_MAX) +#define WGPU_MIP_LEVEL_COUNT_UNDEFINED (UINT32_MAX) +#define WGPU_QUERY_SET_INDEX_UNDEFINED (UINT32_MAX) +#define WGPU_STRLEN (SIZE_MAX) +#define WGPU_WHOLE_MAP_SIZE (SIZE_MAX) +#define WGPU_WHOLE_SIZE (UINT64_MAX) + +typedef struct WGPUStringView { + WGPU_NULLABLE char const * data; + size_t length; +} WGPUStringView WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_STRING_VIEW_INIT _wgpu_MAKE_INIT_STRUCT(WGPUStringView, { \ + /*.data=*/NULL _wgpu_COMMA \ + /*.length=*/WGPU_STRLEN _wgpu_COMMA \ +}) typedef uint64_t WGPUFlags; typedef uint32_t WGPUBool; @@ -134,41 +142,52 @@ typedef struct WGPUSharedBufferMemoryImpl* WGPUSharedBufferMemory WGPU_OBJECT_AT typedef struct WGPUSharedFenceImpl* WGPUSharedFence WGPU_OBJECT_ATTRIBUTE; typedef struct WGPUSharedTextureMemoryImpl* WGPUSharedTextureMemory WGPU_OBJECT_ATTRIBUTE; typedef struct WGPUSurfaceImpl* WGPUSurface WGPU_OBJECT_ATTRIBUTE; +typedef struct WGPUTexelBufferViewImpl* WGPUTexelBufferView WGPU_OBJECT_ATTRIBUTE; typedef struct WGPUTextureImpl* WGPUTexture WGPU_OBJECT_ATTRIBUTE; typedef struct WGPUTextureViewImpl* WGPUTextureView WGPU_OBJECT_ATTRIBUTE; // Structure forward declarations -struct WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER; struct WGPUAdapterPropertiesD3D; -struct WGPUAdapterPropertiesSubgroups; struct WGPUAdapterPropertiesVk; +struct WGPUBindGroupDynamicBindingArray; struct WGPUBlendComponent; struct WGPUBufferBindingLayout; struct WGPUBufferHostMappedPointer; struct WGPUColor; struct WGPUColorTargetStateExpandResolveTextureDawn; +struct WGPUCommandBufferDescriptor; +struct WGPUCompatibilityModeLimits; +struct WGPUConstantEntry; struct WGPUCopyTextureForBrowserOptions; -struct WGPUDawnWGSLBlocklist; struct WGPUDawnAdapterPropertiesPowerPreference; struct WGPUDawnBufferDescriptorErrorInfoFromWireClient; +struct WGPUDawnCacheDeviceDescriptor; struct WGPUDawnCompilationMessageUtf16; +struct WGPUDawnConsumeAdapterDescriptor; +struct WGPUDawnDeviceAllocatorControl; struct WGPUDawnDrmFormatProperties; struct WGPUDawnEncoderInternalUsageDescriptor; -struct WGPUDawnExperimentalImmediateDataLimits; -struct WGPUDawnExperimentalSubgroupLimits; +struct WGPUDawnFakeBufferOOMForTesting; +struct WGPUDawnFakeDeviceInitializeErrorForTesting; +struct WGPUDawnHostMappedPointerLimits; struct WGPUDawnInjectedInvalidSType; struct WGPUDawnRenderPassColorAttachmentRenderToSingleSampled; struct WGPUDawnShaderModuleSPIRVOptionsDescriptor; struct WGPUDawnTexelCopyBufferRowAlignmentLimits; struct WGPUDawnTextureInternalUsageDescriptor; struct WGPUDawnTogglesDescriptor; +struct WGPUDawnWGSLBlocklist; struct WGPUDawnWireWGSLControl; +struct WGPUDynamicBindingArrayLayout; +struct WGPUDynamicBindingArrayLimits; +struct WGPUEmscriptenSurfaceSourceCanvasHTMLSelector; struct WGPUExtent2D; struct WGPUExtent3D; struct WGPUExternalTextureBindingEntry; struct WGPUExternalTextureBindingLayout; struct WGPUFuture; -struct WGPUInstanceCapabilities; +struct WGPUInstanceLimits; +struct WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER; struct WGPUMemoryHeapInfo; struct WGPUMultisampleState; struct WGPUOrigin2D; @@ -176,14 +195,22 @@ struct WGPUOrigin3D; struct WGPUPassTimestampWrites; struct WGPUPipelineLayoutStorageAttachment; struct WGPUPrimitiveState; +struct WGPUQuerySetDescriptor; +struct WGPUQueueDescriptor; +struct WGPURenderBundleDescriptor; +struct WGPURenderBundleEncoderDescriptor; struct WGPURenderPassDepthStencilAttachment; struct WGPURenderPassDescriptorExpandResolveRect; +struct WGPURenderPassDescriptorResolveRect; struct WGPURenderPassMaxDrawCount; +struct WGPURequestAdapterWebGPUBackendOptions; struct WGPURequestAdapterWebXROptions; struct WGPUSamplerBindingLayout; struct WGPUShaderModuleCompilationOptions; struct WGPUShaderSourceSPIRV; +struct WGPUShaderSourceWGSL; struct WGPUSharedBufferMemoryBeginAccessDescriptor; +struct WGPUSharedBufferMemoryDescriptor; struct WGPUSharedBufferMemoryEndAccessState; struct WGPUSharedBufferMemoryProperties; struct WGPUSharedFenceDXGISharedHandleDescriptor; @@ -198,12 +225,13 @@ struct WGPUSharedFenceVkSemaphoreOpaqueFDDescriptor; struct WGPUSharedFenceVkSemaphoreOpaqueFDExportInfo; struct WGPUSharedFenceVkSemaphoreZirconHandleDescriptor; struct WGPUSharedFenceVkSemaphoreZirconHandleExportInfo; +struct WGPUSharedTextureMemoryAHardwareBufferDescriptor; +struct WGPUSharedTextureMemoryD3D11BeginState; struct WGPUSharedTextureMemoryD3DSwapchainBeginState; +struct WGPUSharedTextureMemoryDmaBufPlane; struct WGPUSharedTextureMemoryDXGISharedHandleDescriptor; struct WGPUSharedTextureMemoryEGLImageDescriptor; struct WGPUSharedTextureMemoryIOSurfaceDescriptor; -struct WGPUSharedTextureMemoryAHardwareBufferDescriptor; -struct WGPUSharedTextureMemoryDmaBufPlane; struct WGPUSharedTextureMemoryOpaqueFDDescriptor; struct WGPUSharedTextureMemoryVkDedicatedAllocationDescriptor; struct WGPUSharedTextureMemoryVkImageLayoutBeginState; @@ -212,103 +240,98 @@ struct WGPUSharedTextureMemoryZirconHandleDescriptor; struct WGPUStaticSamplerBindingLayout; struct WGPUStencilFaceState; struct WGPUStorageTextureBindingLayout; -struct WGPUStringView; struct WGPUSubgroupMatrixConfig; -struct WGPUSupportedWGSLLanguageFeatures; struct WGPUSupportedFeatures; +struct WGPUSupportedInstanceFeatures; +struct WGPUSupportedWGSLLanguageFeatures; struct WGPUSurfaceCapabilities; struct WGPUSurfaceColorManagement; struct WGPUSurfaceConfiguration; struct WGPUSurfaceDescriptorFromWindowsCoreWindow; -struct WGPUSurfaceDescriptorFromWindowsSwapChainPanel; -struct WGPUSurfaceSourceXCBWindow; +struct WGPUSurfaceDescriptorFromWindowsUWPSwapChainPanel; +struct WGPUSurfaceDescriptorFromWindowsWinUISwapChainPanel; struct WGPUSurfaceSourceAndroidNativeWindow; struct WGPUSurfaceSourceMetalLayer; struct WGPUSurfaceSourceWaylandSurface; struct WGPUSurfaceSourceWindowsHWND; +struct WGPUSurfaceSourceXCBWindow; struct WGPUSurfaceSourceXlibWindow; struct WGPUSurfaceTexture; +struct WGPUTexelBufferViewDescriptor; struct WGPUTexelCopyBufferLayout; struct WGPUTextureBindingLayout; struct WGPUTextureBindingViewDimensionDescriptor; +struct WGPUTextureComponentSwizzle; struct WGPUVertexAttribute; struct WGPUYCbCrVkDescriptor; -struct WGPUAHardwareBufferProperties; struct WGPUAdapterPropertiesMemoryHeaps; struct WGPUAdapterPropertiesSubgroupMatrixConfigs; +struct WGPUAHardwareBufferProperties; struct WGPUBindGroupEntry; +struct WGPUBindGroupLayoutDynamicBindingArray; struct WGPUBindGroupLayoutEntry; struct WGPUBlendState; struct WGPUBufferDescriptor; -struct WGPUCommandBufferDescriptor; struct WGPUCommandEncoderDescriptor; struct WGPUCompilationMessage; struct WGPUComputePassDescriptor; -struct WGPUConstantEntry; -struct WGPUDawnCacheDeviceDescriptor; +struct WGPUComputeState; struct WGPUDawnDrmFormatCapabilities; struct WGPUDepthStencilState; -struct WGPUEmscriptenSurfaceSourceCanvasHTMLSelector; struct WGPUExternalTextureDescriptor; struct WGPUFutureWaitInfo; struct WGPUImageCopyExternalTexture; struct WGPUInstanceDescriptor; struct WGPULimits; struct WGPUPipelineLayoutPixelLocalStorage; -struct WGPUQuerySetDescriptor; -struct WGPUQueueDescriptor; -struct WGPURenderBundleDescriptor; -struct WGPURenderBundleEncoderDescriptor; struct WGPURenderPassColorAttachment; struct WGPURenderPassStorageAttachment; struct WGPURequestAdapterOptions; struct WGPUSamplerDescriptor; -struct WGPUShaderSourceWGSL; -struct WGPUSharedBufferMemoryDescriptor; +struct WGPUShaderModuleDescriptor; struct WGPUSharedFenceDescriptor; struct WGPUSharedFenceExportInfo; struct WGPUSharedTextureMemoryAHardwareBufferProperties; struct WGPUSharedTextureMemoryBeginAccessDescriptor; struct WGPUSharedTextureMemoryDmaBufDescriptor; struct WGPUSharedTextureMemoryEndAccessState; +struct WGPUSurfaceDescriptor; struct WGPUTexelCopyBufferInfo; struct WGPUTexelCopyTextureInfo; +struct WGPUTextureComponentSwizzleDescriptor; struct WGPUTextureDescriptor; -struct WGPUTextureViewDescriptor; struct WGPUVertexBufferLayout; struct WGPUAdapterInfo; struct WGPUBindGroupDescriptor; struct WGPUBindGroupLayoutDescriptor; struct WGPUColorTargetState; struct WGPUCompilationInfo; -struct WGPUComputeState; +struct WGPUComputePipelineDescriptor; struct WGPUDawnFormatCapabilities; struct WGPUDeviceDescriptor; struct WGPUPipelineLayoutDescriptor; struct WGPURenderPassPixelLocalStorage; -struct WGPUShaderModuleDescriptor; struct WGPUSharedTextureMemoryDescriptor; struct WGPUSharedTextureMemoryProperties; -struct WGPUSurfaceDescriptor; +struct WGPUTextureViewDescriptor; struct WGPUVertexState; -struct WGPUComputePipelineDescriptor; struct WGPUFragmentState; struct WGPURenderPassDescriptor; struct WGPURenderPipelineDescriptor; -typedef enum WGPUWGSLLanguageFeatureName { - WGPUWGSLLanguageFeatureName_ReadonlyAndReadwriteStorageTextures = 0x00000001, - WGPUWGSLLanguageFeatureName_Packed4x8IntegerDotProduct = 0x00000002, - WGPUWGSLLanguageFeatureName_UnrestrictedPointerParameters = 0x00000003, - WGPUWGSLLanguageFeatureName_PointerCompositeAccess = 0x00000004, - WGPUWGSLLanguageFeatureName_SizedBindingArray = 0x00000005, - WGPUWGSLLanguageFeatureName_ChromiumTestingUnimplemented = 0x00050000, - WGPUWGSLLanguageFeatureName_ChromiumTestingUnsafeExperimental = 0x00050001, - WGPUWGSLLanguageFeatureName_ChromiumTestingExperimental = 0x00050002, - WGPUWGSLLanguageFeatureName_ChromiumTestingShippedWithKillswitch = 0x00050003, - WGPUWGSLLanguageFeatureName_ChromiumTestingShipped = 0x00050004, - WGPUWGSLLanguageFeatureName_Force32 = 0x7FFFFFFF -} WGPUWGSLLanguageFeatureName WGPU_ENUM_ATTRIBUTE; +// Callback info structure forward declarations. +struct WGPUBufferMapCallbackInfo; +struct WGPUCompilationInfoCallbackInfo; +struct WGPUCreateComputePipelineAsyncCallbackInfo; +struct WGPUCreateRenderPipelineAsyncCallbackInfo; +struct WGPUDeviceLostCallbackInfo; +struct WGPULoggingCallbackInfo; +struct WGPUPopErrorScopeCallbackInfo; +struct WGPUQueueWorkDoneCallbackInfo; +struct WGPURequestAdapterCallbackInfo; +struct WGPURequestDeviceCallbackInfo; +struct WGPUUncapturedErrorCallbackInfo; + typedef enum WGPUAdapterType { WGPUAdapterType_DiscreteGPU = 0x00000001, WGPUAdapterType_IntegratedGPU = 0x00000002, @@ -316,6 +339,7 @@ typedef enum WGPUAdapterType { WGPUAdapterType_Unknown = 0x00000004, WGPUAdapterType_Force32 = 0x7FFFFFFF } WGPUAdapterType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUAddressMode { WGPUAddressMode_Undefined = 0x00000000, WGPUAddressMode_ClampToEdge = 0x00000001, @@ -323,12 +347,14 @@ typedef enum WGPUAddressMode { WGPUAddressMode_MirrorRepeat = 0x00000003, WGPUAddressMode_Force32 = 0x7FFFFFFF } WGPUAddressMode WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUAlphaMode { WGPUAlphaMode_Opaque = 0x00000001, WGPUAlphaMode_Premultiplied = 0x00000002, WGPUAlphaMode_Unpremultiplied = 0x00000003, WGPUAlphaMode_Force32 = 0x7FFFFFFF } WGPUAlphaMode WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUBackendType { WGPUBackendType_Undefined = 0x00000000, WGPUBackendType_Null = 0x00000001, @@ -341,6 +367,7 @@ typedef enum WGPUBackendType { WGPUBackendType_OpenGLES = 0x00000008, WGPUBackendType_Force32 = 0x7FFFFFFF } WGPUBackendType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUBlendFactor { WGPUBlendFactor_Undefined = 0x00000000, WGPUBlendFactor_Zero = 0x00000001, @@ -362,6 +389,7 @@ typedef enum WGPUBlendFactor { WGPUBlendFactor_OneMinusSrc1Alpha = 0x00000011, WGPUBlendFactor_Force32 = 0x7FFFFFFF } WGPUBlendFactor WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUBlendOperation { WGPUBlendOperation_Undefined = 0x00000000, WGPUBlendOperation_Add = 0x00000001, @@ -371,6 +399,7 @@ typedef enum WGPUBlendOperation { WGPUBlendOperation_Max = 0x00000005, WGPUBlendOperation_Force32 = 0x7FFFFFFF } WGPUBlendOperation WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUBufferBindingType { WGPUBufferBindingType_BindingNotUsed = 0x00000000, WGPUBufferBindingType_Undefined = 0x00000001, @@ -379,18 +408,21 @@ typedef enum WGPUBufferBindingType { WGPUBufferBindingType_ReadOnlyStorage = 0x00000004, WGPUBufferBindingType_Force32 = 0x7FFFFFFF } WGPUBufferBindingType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUBufferMapState { WGPUBufferMapState_Unmapped = 0x00000001, WGPUBufferMapState_Pending = 0x00000002, WGPUBufferMapState_Mapped = 0x00000003, WGPUBufferMapState_Force32 = 0x7FFFFFFF } WGPUBufferMapState WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUCallbackMode { WGPUCallbackMode_WaitAnyOnly = 0x00000001, WGPUCallbackMode_AllowProcessEvents = 0x00000002, WGPUCallbackMode_AllowSpontaneous = 0x00000003, WGPUCallbackMode_Force32 = 0x7FFFFFFF } WGPUCallbackMode WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUCompareFunction { WGPUCompareFunction_Undefined = 0x00000000, WGPUCompareFunction_Never = 0x00000001, @@ -403,17 +435,31 @@ typedef enum WGPUCompareFunction { WGPUCompareFunction_Always = 0x00000008, WGPUCompareFunction_Force32 = 0x7FFFFFFF } WGPUCompareFunction WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUCompilationInfoRequestStatus { WGPUCompilationInfoRequestStatus_Success = 0x00000001, - WGPUCompilationInfoRequestStatus_InstanceDropped = 0x00000002, + WGPUCompilationInfoRequestStatus_CallbackCancelled = 0x00000002, WGPUCompilationInfoRequestStatus_Force32 = 0x7FFFFFFF } WGPUCompilationInfoRequestStatus WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUCompilationMessageType { WGPUCompilationMessageType_Error = 0x00000001, WGPUCompilationMessageType_Warning = 0x00000002, WGPUCompilationMessageType_Info = 0x00000003, WGPUCompilationMessageType_Force32 = 0x7FFFFFFF } WGPUCompilationMessageType WGPU_ENUM_ATTRIBUTE; + +typedef enum WGPUComponentSwizzle { + WGPUComponentSwizzle_Undefined = 0x00000000, + WGPUComponentSwizzle_Zero = 0x00000001, + WGPUComponentSwizzle_One = 0x00000002, + WGPUComponentSwizzle_R = 0x00000003, + WGPUComponentSwizzle_G = 0x00000004, + WGPUComponentSwizzle_B = 0x00000005, + WGPUComponentSwizzle_A = 0x00000006, + WGPUComponentSwizzle_Force32 = 0x7FFFFFFF +} WGPUComponentSwizzle WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUCompositeAlphaMode { WGPUCompositeAlphaMode_Auto = 0x00000000, WGPUCompositeAlphaMode_Opaque = 0x00000001, @@ -422,13 +468,15 @@ typedef enum WGPUCompositeAlphaMode { WGPUCompositeAlphaMode_Inherit = 0x00000004, WGPUCompositeAlphaMode_Force32 = 0x7FFFFFFF } WGPUCompositeAlphaMode WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUCreatePipelineAsyncStatus { WGPUCreatePipelineAsyncStatus_Success = 0x00000001, - WGPUCreatePipelineAsyncStatus_InstanceDropped = 0x00000002, + WGPUCreatePipelineAsyncStatus_CallbackCancelled = 0x00000002, WGPUCreatePipelineAsyncStatus_ValidationError = 0x00000003, WGPUCreatePipelineAsyncStatus_InternalError = 0x00000004, WGPUCreatePipelineAsyncStatus_Force32 = 0x7FFFFFFF } WGPUCreatePipelineAsyncStatus WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUCullMode { WGPUCullMode_Undefined = 0x00000000, WGPUCullMode_None = 0x00000001, @@ -436,19 +484,28 @@ typedef enum WGPUCullMode { WGPUCullMode_Back = 0x00000003, WGPUCullMode_Force32 = 0x7FFFFFFF } WGPUCullMode WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUDeviceLostReason { WGPUDeviceLostReason_Unknown = 0x00000001, WGPUDeviceLostReason_Destroyed = 0x00000002, - WGPUDeviceLostReason_InstanceDropped = 0x00000003, + WGPUDeviceLostReason_CallbackCancelled = 0x00000003, WGPUDeviceLostReason_FailedCreation = 0x00000004, WGPUDeviceLostReason_Force32 = 0x7FFFFFFF } WGPUDeviceLostReason WGPU_ENUM_ATTRIBUTE; + +typedef enum WGPUDynamicBindingKind { + WGPUDynamicBindingKind_Undefined = 0x00000000, + WGPUDynamicBindingKind_SampledTexture = 0x00000001, + WGPUDynamicBindingKind_Force32 = 0x7FFFFFFF +} WGPUDynamicBindingKind WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUErrorFilter { WGPUErrorFilter_Validation = 0x00000001, WGPUErrorFilter_OutOfMemory = 0x00000002, WGPUErrorFilter_Internal = 0x00000003, WGPUErrorFilter_Force32 = 0x7FFFFFFF } WGPUErrorFilter WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUErrorType { WGPUErrorType_NoError = 0x00000001, WGPUErrorType_Validation = 0x00000002, @@ -457,6 +514,7 @@ typedef enum WGPUErrorType { WGPUErrorType_Unknown = 0x00000005, WGPUErrorType_Force32 = 0x7FFFFFFF } WGPUErrorType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUExternalTextureRotation { WGPUExternalTextureRotation_Rotate0Degrees = 0x00000001, WGPUExternalTextureRotation_Rotate90Degrees = 0x00000002, @@ -464,109 +522,127 @@ typedef enum WGPUExternalTextureRotation { WGPUExternalTextureRotation_Rotate270Degrees = 0x00000004, WGPUExternalTextureRotation_Force32 = 0x7FFFFFFF } WGPUExternalTextureRotation WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUFeatureLevel { WGPUFeatureLevel_Undefined = 0x00000000, WGPUFeatureLevel_Compatibility = 0x00000001, WGPUFeatureLevel_Core = 0x00000002, WGPUFeatureLevel_Force32 = 0x7FFFFFFF } WGPUFeatureLevel WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUFeatureName { - WGPUFeatureName_DepthClipControl = 0x00000001, - WGPUFeatureName_Depth32FloatStencil8 = 0x00000002, - WGPUFeatureName_TimestampQuery = 0x00000003, + WGPUFeatureName_CoreFeaturesAndLimits = 0x00000001, + WGPUFeatureName_DepthClipControl = 0x00000002, + WGPUFeatureName_Depth32FloatStencil8 = 0x00000003, WGPUFeatureName_TextureCompressionBC = 0x00000004, WGPUFeatureName_TextureCompressionBCSliced3D = 0x00000005, WGPUFeatureName_TextureCompressionETC2 = 0x00000006, WGPUFeatureName_TextureCompressionASTC = 0x00000007, WGPUFeatureName_TextureCompressionASTCSliced3D = 0x00000008, - WGPUFeatureName_IndirectFirstInstance = 0x00000009, - WGPUFeatureName_ShaderF16 = 0x0000000A, - WGPUFeatureName_RG11B10UfloatRenderable = 0x0000000B, - WGPUFeatureName_BGRA8UnormStorage = 0x0000000C, - WGPUFeatureName_Float32Filterable = 0x0000000D, - WGPUFeatureName_Float32Blendable = 0x0000000E, - WGPUFeatureName_ClipDistances = 0x0000000F, - WGPUFeatureName_DualSourceBlending = 0x00000010, - WGPUFeatureName_Subgroups = 0x00000011, - WGPUFeatureName_CoreFeaturesAndLimits = 0x00000012, + WGPUFeatureName_TimestampQuery = 0x00000009, + WGPUFeatureName_IndirectFirstInstance = 0x0000000A, + WGPUFeatureName_ShaderF16 = 0x0000000B, + WGPUFeatureName_RG11B10UfloatRenderable = 0x0000000C, + WGPUFeatureName_BGRA8UnormStorage = 0x0000000D, + WGPUFeatureName_Float32Filterable = 0x0000000E, + WGPUFeatureName_Float32Blendable = 0x0000000F, + WGPUFeatureName_ClipDistances = 0x00000010, + WGPUFeatureName_DualSourceBlending = 0x00000011, + WGPUFeatureName_Subgroups = 0x00000012, + WGPUFeatureName_TextureFormatsTier1 = 0x00000013, + WGPUFeatureName_TextureFormatsTier2 = 0x00000014, + WGPUFeatureName_PrimitiveIndex = 0x00000015, WGPUFeatureName_DawnInternalUsages = 0x00050000, WGPUFeatureName_DawnMultiPlanarFormats = 0x00050001, WGPUFeatureName_DawnNative = 0x00050002, WGPUFeatureName_ChromiumExperimentalTimestampQueryInsidePasses = 0x00050003, WGPUFeatureName_ImplicitDeviceSynchronization = 0x00050004, - WGPUFeatureName_ChromiumExperimentalImmediateData = 0x00050005, WGPUFeatureName_TransientAttachments = 0x00050006, WGPUFeatureName_MSAARenderToSingleSampled = 0x00050007, - WGPUFeatureName_SubgroupsF16 = 0x00050008, - WGPUFeatureName_D3D11MultithreadProtected = 0x00050009, - WGPUFeatureName_ANGLETextureSharing = 0x0005000A, - WGPUFeatureName_PixelLocalStorageCoherent = 0x0005000B, - WGPUFeatureName_PixelLocalStorageNonCoherent = 0x0005000C, - WGPUFeatureName_Unorm16TextureFormats = 0x0005000D, - WGPUFeatureName_Snorm16TextureFormats = 0x0005000E, - WGPUFeatureName_MultiPlanarFormatExtendedUsages = 0x0005000F, - WGPUFeatureName_MultiPlanarFormatP010 = 0x00050010, - WGPUFeatureName_HostMappedPointer = 0x00050011, - WGPUFeatureName_MultiPlanarRenderTargets = 0x00050012, - WGPUFeatureName_MultiPlanarFormatNv12a = 0x00050013, - WGPUFeatureName_FramebufferFetch = 0x00050014, - WGPUFeatureName_BufferMapExtendedUsages = 0x00050015, - WGPUFeatureName_AdapterPropertiesMemoryHeaps = 0x00050016, - WGPUFeatureName_AdapterPropertiesD3D = 0x00050017, - WGPUFeatureName_AdapterPropertiesVk = 0x00050018, - WGPUFeatureName_R8UnormStorage = 0x00050019, - WGPUFeatureName_DawnFormatCapabilities = 0x0005001A, - WGPUFeatureName_DawnDrmFormatCapabilities = 0x0005001B, - WGPUFeatureName_Norm16TextureFormats = 0x0005001C, - WGPUFeatureName_MultiPlanarFormatNv16 = 0x0005001D, - WGPUFeatureName_MultiPlanarFormatNv24 = 0x0005001E, - WGPUFeatureName_MultiPlanarFormatP210 = 0x0005001F, - WGPUFeatureName_MultiPlanarFormatP410 = 0x00050020, - WGPUFeatureName_SharedTextureMemoryVkDedicatedAllocation = 0x00050021, - WGPUFeatureName_SharedTextureMemoryAHardwareBuffer = 0x00050022, - WGPUFeatureName_SharedTextureMemoryDmaBuf = 0x00050023, - WGPUFeatureName_SharedTextureMemoryOpaqueFD = 0x00050024, - WGPUFeatureName_SharedTextureMemoryZirconHandle = 0x00050025, - WGPUFeatureName_SharedTextureMemoryDXGISharedHandle = 0x00050026, - WGPUFeatureName_SharedTextureMemoryD3D11Texture2D = 0x00050027, - WGPUFeatureName_SharedTextureMemoryIOSurface = 0x00050028, - WGPUFeatureName_SharedTextureMemoryEGLImage = 0x00050029, - WGPUFeatureName_SharedFenceVkSemaphoreOpaqueFD = 0x0005002A, - WGPUFeatureName_SharedFenceSyncFD = 0x0005002B, - WGPUFeatureName_SharedFenceVkSemaphoreZirconHandle = 0x0005002C, - WGPUFeatureName_SharedFenceDXGISharedHandle = 0x0005002D, - WGPUFeatureName_SharedFenceMTLSharedEvent = 0x0005002E, - WGPUFeatureName_SharedBufferMemoryD3D12Resource = 0x0005002F, - WGPUFeatureName_StaticSamplers = 0x00050030, - WGPUFeatureName_YCbCrVulkanSamplers = 0x00050031, - WGPUFeatureName_ShaderModuleCompilationOptions = 0x00050032, - WGPUFeatureName_DawnLoadResolveTexture = 0x00050033, - WGPUFeatureName_DawnPartialLoadResolveTexture = 0x00050034, - WGPUFeatureName_MultiDrawIndirect = 0x00050035, - WGPUFeatureName_DawnTexelCopyBufferRowAlignment = 0x00050037, - WGPUFeatureName_FlexibleTextureViews = 0x00050038, - WGPUFeatureName_ChromiumExperimentalSubgroupMatrix = 0x00050039, - WGPUFeatureName_SharedFenceEGLSync = 0x0005003A, + WGPUFeatureName_D3D11MultithreadProtected = 0x00050008, + WGPUFeatureName_ANGLETextureSharing = 0x00050009, + WGPUFeatureName_PixelLocalStorageCoherent = 0x0005000A, + WGPUFeatureName_PixelLocalStorageNonCoherent = 0x0005000B, + WGPUFeatureName_Unorm16TextureFormats = 0x0005000C, + WGPUFeatureName_Snorm16TextureFormats = 0x0005000D, + WGPUFeatureName_MultiPlanarFormatExtendedUsages = 0x0005000E, + WGPUFeatureName_MultiPlanarFormatP010 = 0x0005000F, + WGPUFeatureName_HostMappedPointer = 0x00050010, + WGPUFeatureName_MultiPlanarRenderTargets = 0x00050011, + WGPUFeatureName_MultiPlanarFormatNv12a = 0x00050012, + WGPUFeatureName_FramebufferFetch = 0x00050013, + WGPUFeatureName_BufferMapExtendedUsages = 0x00050014, + WGPUFeatureName_AdapterPropertiesMemoryHeaps = 0x00050015, + WGPUFeatureName_AdapterPropertiesD3D = 0x00050016, + WGPUFeatureName_AdapterPropertiesVk = 0x00050017, + WGPUFeatureName_R8UnormStorage = 0x00050018, + WGPUFeatureName_DawnFormatCapabilities = 0x00050019, + WGPUFeatureName_DawnDrmFormatCapabilities = 0x0005001A, + WGPUFeatureName_Norm16TextureFormats = 0x0005001B, + WGPUFeatureName_MultiPlanarFormatNv16 = 0x0005001C, + WGPUFeatureName_MultiPlanarFormatNv24 = 0x0005001D, + WGPUFeatureName_MultiPlanarFormatP210 = 0x0005001E, + WGPUFeatureName_MultiPlanarFormatP410 = 0x0005001F, + WGPUFeatureName_SharedTextureMemoryVkDedicatedAllocation = 0x00050020, + WGPUFeatureName_SharedTextureMemoryAHardwareBuffer = 0x00050021, + WGPUFeatureName_SharedTextureMemoryDmaBuf = 0x00050022, + WGPUFeatureName_SharedTextureMemoryOpaqueFD = 0x00050023, + WGPUFeatureName_SharedTextureMemoryZirconHandle = 0x00050024, + WGPUFeatureName_SharedTextureMemoryDXGISharedHandle = 0x00050025, + WGPUFeatureName_SharedTextureMemoryD3D11Texture2D = 0x00050026, + WGPUFeatureName_SharedTextureMemoryIOSurface = 0x00050027, + WGPUFeatureName_SharedTextureMemoryEGLImage = 0x00050028, + WGPUFeatureName_SharedFenceVkSemaphoreOpaqueFD = 0x00050029, + WGPUFeatureName_SharedFenceSyncFD = 0x0005002A, + WGPUFeatureName_SharedFenceVkSemaphoreZirconHandle = 0x0005002B, + WGPUFeatureName_SharedFenceDXGISharedHandle = 0x0005002C, + WGPUFeatureName_SharedFenceMTLSharedEvent = 0x0005002D, + WGPUFeatureName_SharedBufferMemoryD3D12Resource = 0x0005002E, + WGPUFeatureName_StaticSamplers = 0x0005002F, + WGPUFeatureName_YCbCrVulkanSamplers = 0x00050030, + WGPUFeatureName_ShaderModuleCompilationOptions = 0x00050031, + WGPUFeatureName_DawnLoadResolveTexture = 0x00050032, + WGPUFeatureName_DawnPartialLoadResolveTexture = 0x00050033, + WGPUFeatureName_MultiDrawIndirect = 0x00050034, + WGPUFeatureName_DawnTexelCopyBufferRowAlignment = 0x00050035, + WGPUFeatureName_FlexibleTextureViews = 0x00050036, + WGPUFeatureName_ChromiumExperimentalSubgroupMatrix = 0x00050037, + WGPUFeatureName_SharedFenceEGLSync = 0x00050038, + WGPUFeatureName_DawnDeviceAllocatorControl = 0x00050039, + WGPUFeatureName_TextureComponentSwizzle = 0x0005003A, + WGPUFeatureName_ChromiumExperimentalPrimitiveId = 0x0005003B, + WGPUFeatureName_ChromiumExperimentalBindless = 0x0005003C, WGPUFeatureName_Force32 = 0x7FFFFFFF } WGPUFeatureName WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUFilterMode { WGPUFilterMode_Undefined = 0x00000000, WGPUFilterMode_Nearest = 0x00000001, WGPUFilterMode_Linear = 0x00000002, WGPUFilterMode_Force32 = 0x7FFFFFFF } WGPUFilterMode WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUFrontFace { WGPUFrontFace_Undefined = 0x00000000, WGPUFrontFace_CCW = 0x00000001, WGPUFrontFace_CW = 0x00000002, WGPUFrontFace_Force32 = 0x7FFFFFFF } WGPUFrontFace WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUIndexFormat { WGPUIndexFormat_Undefined = 0x00000000, WGPUIndexFormat_Uint16 = 0x00000001, WGPUIndexFormat_Uint32 = 0x00000002, WGPUIndexFormat_Force32 = 0x7FFFFFFF } WGPUIndexFormat WGPU_ENUM_ATTRIBUTE; + +typedef enum WGPUInstanceFeatureName { + WGPUInstanceFeatureName_TimedWaitAny = 0x00000001, + WGPUInstanceFeatureName_ShaderSourceSPIRV = 0x00000002, + WGPUInstanceFeatureName_MultipleDevicesPerAdapter = 0x00000003, + WGPUInstanceFeatureName_Force32 = 0x7FFFFFFF +} WGPUInstanceFeatureName WGPU_ENUM_ATTRIBUTE; + typedef enum WGPULoadOp { WGPULoadOp_Undefined = 0x00000000, WGPULoadOp_Load = 0x00000001, @@ -574,6 +650,7 @@ typedef enum WGPULoadOp { WGPULoadOp_ExpandResolveTexture = 0x00050003, WGPULoadOp_Force32 = 0x7FFFFFFF } WGPULoadOp WGPU_ENUM_ATTRIBUTE; + typedef enum WGPULoggingType { WGPULoggingType_Verbose = 0x00000001, WGPULoggingType_Info = 0x00000002, @@ -581,42 +658,49 @@ typedef enum WGPULoggingType { WGPULoggingType_Error = 0x00000004, WGPULoggingType_Force32 = 0x7FFFFFFF } WGPULoggingType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUMapAsyncStatus { WGPUMapAsyncStatus_Success = 0x00000001, - WGPUMapAsyncStatus_InstanceDropped = 0x00000002, + WGPUMapAsyncStatus_CallbackCancelled = 0x00000002, WGPUMapAsyncStatus_Error = 0x00000003, WGPUMapAsyncStatus_Aborted = 0x00000004, WGPUMapAsyncStatus_Force32 = 0x7FFFFFFF } WGPUMapAsyncStatus WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUMipmapFilterMode { WGPUMipmapFilterMode_Undefined = 0x00000000, WGPUMipmapFilterMode_Nearest = 0x00000001, WGPUMipmapFilterMode_Linear = 0x00000002, WGPUMipmapFilterMode_Force32 = 0x7FFFFFFF } WGPUMipmapFilterMode WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUOptionalBool { WGPUOptionalBool_False = 0x00000000, WGPUOptionalBool_True = 0x00000001, WGPUOptionalBool_Undefined = 0x00000002, WGPUOptionalBool_Force32 = 0x7FFFFFFF } WGPUOptionalBool WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUPopErrorScopeStatus { WGPUPopErrorScopeStatus_Success = 0x00000001, - WGPUPopErrorScopeStatus_InstanceDropped = 0x00000002, + WGPUPopErrorScopeStatus_CallbackCancelled = 0x00000002, WGPUPopErrorScopeStatus_Error = 0x00000003, WGPUPopErrorScopeStatus_Force32 = 0x7FFFFFFF } WGPUPopErrorScopeStatus WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUPowerPreference { WGPUPowerPreference_Undefined = 0x00000000, WGPUPowerPreference_LowPower = 0x00000001, WGPUPowerPreference_HighPerformance = 0x00000002, WGPUPowerPreference_Force32 = 0x7FFFFFFF } WGPUPowerPreference WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUPredefinedColorSpace { WGPUPredefinedColorSpace_SRGB = 0x00000001, WGPUPredefinedColorSpace_DisplayP3 = 0x00000002, WGPUPredefinedColorSpace_Force32 = 0x7FFFFFFF } WGPUPredefinedColorSpace WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUPresentMode { WGPUPresentMode_Undefined = 0x00000000, WGPUPresentMode_Fifo = 0x00000001, @@ -625,6 +709,7 @@ typedef enum WGPUPresentMode { WGPUPresentMode_Mailbox = 0x00000004, WGPUPresentMode_Force32 = 0x7FFFFFFF } WGPUPresentMode WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUPrimitiveTopology { WGPUPrimitiveTopology_Undefined = 0x00000000, WGPUPrimitiveTopology_PointList = 0x00000001, @@ -634,112 +719,35 @@ typedef enum WGPUPrimitiveTopology { WGPUPrimitiveTopology_TriangleStrip = 0x00000005, WGPUPrimitiveTopology_Force32 = 0x7FFFFFFF } WGPUPrimitiveTopology WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUQueryType { WGPUQueryType_Occlusion = 0x00000001, WGPUQueryType_Timestamp = 0x00000002, WGPUQueryType_Force32 = 0x7FFFFFFF } WGPUQueryType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUQueueWorkDoneStatus { WGPUQueueWorkDoneStatus_Success = 0x00000001, - WGPUQueueWorkDoneStatus_InstanceDropped = 0x00000002, + WGPUQueueWorkDoneStatus_CallbackCancelled = 0x00000002, WGPUQueueWorkDoneStatus_Error = 0x00000003, WGPUQueueWorkDoneStatus_Force32 = 0x7FFFFFFF } WGPUQueueWorkDoneStatus WGPU_ENUM_ATTRIBUTE; + typedef enum WGPURequestAdapterStatus { WGPURequestAdapterStatus_Success = 0x00000001, - WGPURequestAdapterStatus_InstanceDropped = 0x00000002, + WGPURequestAdapterStatus_CallbackCancelled = 0x00000002, WGPURequestAdapterStatus_Unavailable = 0x00000003, WGPURequestAdapterStatus_Error = 0x00000004, WGPURequestAdapterStatus_Force32 = 0x7FFFFFFF } WGPURequestAdapterStatus WGPU_ENUM_ATTRIBUTE; + typedef enum WGPURequestDeviceStatus { WGPURequestDeviceStatus_Success = 0x00000001, - WGPURequestDeviceStatus_InstanceDropped = 0x00000002, + WGPURequestDeviceStatus_CallbackCancelled = 0x00000002, WGPURequestDeviceStatus_Error = 0x00000003, WGPURequestDeviceStatus_Force32 = 0x7FFFFFFF } WGPURequestDeviceStatus WGPU_ENUM_ATTRIBUTE; -typedef enum WGPUSType { - WGPUSType_ShaderSourceSPIRV = 0x00000001, - WGPUSType_ShaderSourceWGSL = 0x00000002, - WGPUSType_RenderPassMaxDrawCount = 0x00000003, - WGPUSType_SurfaceSourceMetalLayer = 0x00000004, - WGPUSType_SurfaceSourceWindowsHWND = 0x00000005, - WGPUSType_SurfaceSourceXlibWindow = 0x00000006, - WGPUSType_SurfaceSourceWaylandSurface = 0x00000007, - WGPUSType_SurfaceSourceAndroidNativeWindow = 0x00000008, - WGPUSType_SurfaceSourceXCBWindow = 0x00000009, - WGPUSType_SurfaceColorManagement = 0x0000000A, - WGPUSType_RequestAdapterWebXROptions = 0x0000000B, - WGPUSType_AdapterPropertiesSubgroups = 0x0000000C, - WGPUSType_TextureBindingViewDimensionDescriptor = 0x00020000, - WGPUSType_EmscriptenSurfaceSourceCanvasHTMLSelector = 0x00040000, - WGPUSType_SurfaceDescriptorFromWindowsCoreWindow = 0x00050000, - WGPUSType_ExternalTextureBindingEntry = 0x00050001, - WGPUSType_ExternalTextureBindingLayout = 0x00050002, - WGPUSType_SurfaceDescriptorFromWindowsSwapChainPanel = 0x00050003, - WGPUSType_DawnTextureInternalUsageDescriptor = 0x00050004, - WGPUSType_DawnEncoderInternalUsageDescriptor = 0x00050005, - WGPUSType_DawnInstanceDescriptor = 0x00050006, - WGPUSType_DawnCacheDeviceDescriptor = 0x00050007, - WGPUSType_DawnAdapterPropertiesPowerPreference = 0x00050008, - WGPUSType_DawnBufferDescriptorErrorInfoFromWireClient = 0x00050009, - WGPUSType_DawnTogglesDescriptor = 0x0005000A, - WGPUSType_DawnShaderModuleSPIRVOptionsDescriptor = 0x0005000B, - WGPUSType_RequestAdapterOptionsLUID = 0x0005000C, - WGPUSType_RequestAdapterOptionsGetGLProc = 0x0005000D, - WGPUSType_RequestAdapterOptionsD3D11Device = 0x0005000E, - WGPUSType_DawnRenderPassColorAttachmentRenderToSingleSampled = 0x0005000F, - WGPUSType_RenderPassPixelLocalStorage = 0x00050010, - WGPUSType_PipelineLayoutPixelLocalStorage = 0x00050011, - WGPUSType_BufferHostMappedPointer = 0x00050012, - WGPUSType_DawnExperimentalSubgroupLimits = 0x00050013, - WGPUSType_AdapterPropertiesMemoryHeaps = 0x00050014, - WGPUSType_AdapterPropertiesD3D = 0x00050015, - WGPUSType_AdapterPropertiesVk = 0x00050016, - WGPUSType_DawnWireWGSLControl = 0x00050017, - WGPUSType_DawnWGSLBlocklist = 0x00050018, - WGPUSType_DawnDrmFormatCapabilities = 0x00050019, - WGPUSType_ShaderModuleCompilationOptions = 0x0005001A, - WGPUSType_ColorTargetStateExpandResolveTextureDawn = 0x0005001B, - WGPUSType_RenderPassDescriptorExpandResolveRect = 0x0005001C, - WGPUSType_SharedTextureMemoryVkDedicatedAllocationDescriptor = 0x0005001D, - WGPUSType_SharedTextureMemoryAHardwareBufferDescriptor = 0x0005001E, - WGPUSType_SharedTextureMemoryDmaBufDescriptor = 0x0005001F, - WGPUSType_SharedTextureMemoryOpaqueFDDescriptor = 0x00050020, - WGPUSType_SharedTextureMemoryZirconHandleDescriptor = 0x00050021, - WGPUSType_SharedTextureMemoryDXGISharedHandleDescriptor = 0x00050022, - WGPUSType_SharedTextureMemoryD3D11Texture2DDescriptor = 0x00050023, - WGPUSType_SharedTextureMemoryIOSurfaceDescriptor = 0x00050024, - WGPUSType_SharedTextureMemoryEGLImageDescriptor = 0x00050025, - WGPUSType_SharedTextureMemoryInitializedBeginState = 0x00050026, - WGPUSType_SharedTextureMemoryInitializedEndState = 0x00050027, - WGPUSType_SharedTextureMemoryVkImageLayoutBeginState = 0x00050028, - WGPUSType_SharedTextureMemoryVkImageLayoutEndState = 0x00050029, - WGPUSType_SharedTextureMemoryD3DSwapchainBeginState = 0x0005002A, - WGPUSType_SharedFenceVkSemaphoreOpaqueFDDescriptor = 0x0005002B, - WGPUSType_SharedFenceVkSemaphoreOpaqueFDExportInfo = 0x0005002C, - WGPUSType_SharedFenceSyncFDDescriptor = 0x0005002D, - WGPUSType_SharedFenceSyncFDExportInfo = 0x0005002E, - WGPUSType_SharedFenceVkSemaphoreZirconHandleDescriptor = 0x0005002F, - WGPUSType_SharedFenceVkSemaphoreZirconHandleExportInfo = 0x00050030, - WGPUSType_SharedFenceDXGISharedHandleDescriptor = 0x00050031, - WGPUSType_SharedFenceDXGISharedHandleExportInfo = 0x00050032, - WGPUSType_SharedFenceMTLSharedEventDescriptor = 0x00050033, - WGPUSType_SharedFenceMTLSharedEventExportInfo = 0x00050034, - WGPUSType_SharedBufferMemoryD3D12ResourceDescriptor = 0x00050035, - WGPUSType_StaticSamplerBindingLayout = 0x00050036, - WGPUSType_YCbCrVkDescriptor = 0x00050037, - WGPUSType_SharedTextureMemoryAHardwareBufferProperties = 0x00050038, - WGPUSType_AHardwareBufferProperties = 0x00050039, - WGPUSType_DawnExperimentalImmediateDataLimits = 0x0005003A, - WGPUSType_DawnTexelCopyBufferRowAlignmentLimits = 0x0005003B, - WGPUSType_AdapterPropertiesSubgroupMatrixConfigs = 0x0005003C, - WGPUSType_SharedFenceEGLSyncDescriptor = 0x0005003D, - WGPUSType_SharedFenceEGLSyncExportInfo = 0x0005003E, - WGPUSType_DawnInjectedInvalidSType = 0x0005003F, - WGPUSType_DawnCompilationMessageUtf16 = 0x00050040, - WGPUSType_Force32 = 0x7FFFFFFF -} WGPUSType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUSamplerBindingType { WGPUSamplerBindingType_BindingNotUsed = 0x00000000, WGPUSamplerBindingType_Undefined = 0x00000001, @@ -748,6 +756,7 @@ typedef enum WGPUSamplerBindingType { WGPUSamplerBindingType_Comparison = 0x00000004, WGPUSamplerBindingType_Force32 = 0x7FFFFFFF } WGPUSamplerBindingType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUSharedFenceType { WGPUSharedFenceType_VkSemaphoreOpaqueFD = 0x00000001, WGPUSharedFenceType_SyncFD = 0x00000002, @@ -757,11 +766,13 @@ typedef enum WGPUSharedFenceType { WGPUSharedFenceType_EGLSync = 0x00000006, WGPUSharedFenceType_Force32 = 0x7FFFFFFF } WGPUSharedFenceType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUStatus { WGPUStatus_Success = 0x00000001, WGPUStatus_Error = 0x00000002, WGPUStatus_Force32 = 0x7FFFFFFF } WGPUStatus WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUStencilOperation { WGPUStencilOperation_Undefined = 0x00000000, WGPUStencilOperation_Keep = 0x00000001, @@ -774,6 +785,7 @@ typedef enum WGPUStencilOperation { WGPUStencilOperation_DecrementWrap = 0x00000008, WGPUStencilOperation_Force32 = 0x7FFFFFFF } WGPUStencilOperation WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUStorageTextureAccess { WGPUStorageTextureAccess_BindingNotUsed = 0x00000000, WGPUStorageTextureAccess_Undefined = 0x00000001, @@ -782,19 +794,118 @@ typedef enum WGPUStorageTextureAccess { WGPUStorageTextureAccess_ReadWrite = 0x00000004, WGPUStorageTextureAccess_Force32 = 0x7FFFFFFF } WGPUStorageTextureAccess WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUStoreOp { WGPUStoreOp_Undefined = 0x00000000, WGPUStoreOp_Store = 0x00000001, WGPUStoreOp_Discard = 0x00000002, WGPUStoreOp_Force32 = 0x7FFFFFFF } WGPUStoreOp WGPU_ENUM_ATTRIBUTE; + +typedef enum WGPUSType { + WGPUSType_ShaderSourceSPIRV = 0x00000001, + WGPUSType_ShaderSourceWGSL = 0x00000002, + WGPUSType_RenderPassMaxDrawCount = 0x00000003, + WGPUSType_SurfaceSourceMetalLayer = 0x00000004, + WGPUSType_SurfaceSourceWindowsHWND = 0x00000005, + WGPUSType_SurfaceSourceXlibWindow = 0x00000006, + WGPUSType_SurfaceSourceWaylandSurface = 0x00000007, + WGPUSType_SurfaceSourceAndroidNativeWindow = 0x00000008, + WGPUSType_SurfaceSourceXCBWindow = 0x00000009, + WGPUSType_SurfaceColorManagement = 0x0000000A, + WGPUSType_RequestAdapterWebXROptions = 0x0000000B, + WGPUSType_CompatibilityModeLimits = 0x00020000, + WGPUSType_TextureBindingViewDimensionDescriptor = 0x00020001, + WGPUSType_EmscriptenSurfaceSourceCanvasHTMLSelector = 0x00040000, + WGPUSType_SurfaceDescriptorFromWindowsCoreWindow = 0x00050000, + WGPUSType_ExternalTextureBindingEntry = 0x00050001, + WGPUSType_ExternalTextureBindingLayout = 0x00050002, + WGPUSType_SurfaceDescriptorFromWindowsUWPSwapChainPanel = 0x00050003, + WGPUSType_DawnTextureInternalUsageDescriptor = 0x00050004, + WGPUSType_DawnEncoderInternalUsageDescriptor = 0x00050005, + WGPUSType_DawnInstanceDescriptor = 0x00050006, + WGPUSType_DawnCacheDeviceDescriptor = 0x00050007, + WGPUSType_DawnAdapterPropertiesPowerPreference = 0x00050008, + WGPUSType_DawnBufferDescriptorErrorInfoFromWireClient = 0x00050009, + WGPUSType_DawnTogglesDescriptor = 0x0005000A, + WGPUSType_DawnShaderModuleSPIRVOptionsDescriptor = 0x0005000B, + WGPUSType_RequestAdapterOptionsLUID = 0x0005000C, + WGPUSType_RequestAdapterOptionsGetGLProc = 0x0005000D, + WGPUSType_RequestAdapterOptionsD3D11Device = 0x0005000E, + WGPUSType_DawnRenderPassColorAttachmentRenderToSingleSampled = 0x0005000F, + WGPUSType_RenderPassPixelLocalStorage = 0x00050010, + WGPUSType_PipelineLayoutPixelLocalStorage = 0x00050011, + WGPUSType_BufferHostMappedPointer = 0x00050012, + WGPUSType_AdapterPropertiesMemoryHeaps = 0x00050013, + WGPUSType_AdapterPropertiesD3D = 0x00050014, + WGPUSType_AdapterPropertiesVk = 0x00050015, + WGPUSType_DawnWireWGSLControl = 0x00050016, + WGPUSType_DawnWGSLBlocklist = 0x00050017, + WGPUSType_DawnDrmFormatCapabilities = 0x00050018, + WGPUSType_ShaderModuleCompilationOptions = 0x00050019, + WGPUSType_ColorTargetStateExpandResolveTextureDawn = 0x0005001A, + WGPUSType_RenderPassDescriptorExpandResolveRect = 0x0005001B, + WGPUSType_SharedTextureMemoryVkDedicatedAllocationDescriptor = 0x0005001C, + WGPUSType_SharedTextureMemoryAHardwareBufferDescriptor = 0x0005001D, + WGPUSType_SharedTextureMemoryDmaBufDescriptor = 0x0005001E, + WGPUSType_SharedTextureMemoryOpaqueFDDescriptor = 0x0005001F, + WGPUSType_SharedTextureMemoryZirconHandleDescriptor = 0x00050020, + WGPUSType_SharedTextureMemoryDXGISharedHandleDescriptor = 0x00050021, + WGPUSType_SharedTextureMemoryD3D11Texture2DDescriptor = 0x00050022, + WGPUSType_SharedTextureMemoryIOSurfaceDescriptor = 0x00050023, + WGPUSType_SharedTextureMemoryEGLImageDescriptor = 0x00050024, + WGPUSType_SharedTextureMemoryInitializedBeginState = 0x00050025, + WGPUSType_SharedTextureMemoryInitializedEndState = 0x00050026, + WGPUSType_SharedTextureMemoryVkImageLayoutBeginState = 0x00050027, + WGPUSType_SharedTextureMemoryVkImageLayoutEndState = 0x00050028, + WGPUSType_SharedTextureMemoryD3DSwapchainBeginState = 0x00050029, + WGPUSType_SharedFenceVkSemaphoreOpaqueFDDescriptor = 0x0005002A, + WGPUSType_SharedFenceVkSemaphoreOpaqueFDExportInfo = 0x0005002B, + WGPUSType_SharedFenceSyncFDDescriptor = 0x0005002C, + WGPUSType_SharedFenceSyncFDExportInfo = 0x0005002D, + WGPUSType_SharedFenceVkSemaphoreZirconHandleDescriptor = 0x0005002E, + WGPUSType_SharedFenceVkSemaphoreZirconHandleExportInfo = 0x0005002F, + WGPUSType_SharedFenceDXGISharedHandleDescriptor = 0x00050030, + WGPUSType_SharedFenceDXGISharedHandleExportInfo = 0x00050031, + WGPUSType_SharedFenceMTLSharedEventDescriptor = 0x00050032, + WGPUSType_SharedFenceMTLSharedEventExportInfo = 0x00050033, + WGPUSType_SharedBufferMemoryD3D12ResourceDescriptor = 0x00050034, + WGPUSType_StaticSamplerBindingLayout = 0x00050035, + WGPUSType_YCbCrVkDescriptor = 0x00050036, + WGPUSType_SharedTextureMemoryAHardwareBufferProperties = 0x00050037, + WGPUSType_AHardwareBufferProperties = 0x00050038, + WGPUSType_DawnTexelCopyBufferRowAlignmentLimits = 0x0005003A, + WGPUSType_AdapterPropertiesSubgroupMatrixConfigs = 0x0005003B, + WGPUSType_SharedFenceEGLSyncDescriptor = 0x0005003C, + WGPUSType_SharedFenceEGLSyncExportInfo = 0x0005003D, + WGPUSType_DawnInjectedInvalidSType = 0x0005003E, + WGPUSType_DawnCompilationMessageUtf16 = 0x0005003F, + WGPUSType_DawnFakeBufferOOMForTesting = 0x00050040, + WGPUSType_SurfaceDescriptorFromWindowsWinUISwapChainPanel = 0x00050041, + WGPUSType_DawnDeviceAllocatorControl = 0x00050042, + WGPUSType_DawnHostMappedPointerLimits = 0x00050043, + WGPUSType_RenderPassDescriptorResolveRect = 0x00050044, + WGPUSType_RequestAdapterWebGPUBackendOptions = 0x00050045, + WGPUSType_DawnFakeDeviceInitializeErrorForTesting = 0x00050046, + WGPUSType_TextureComponentSwizzleDescriptor = 0x00050047, + WGPUSType_SharedTextureMemoryD3D11BeginState = 0x00050048, + WGPUSType_DawnConsumeAdapterDescriptor = 0x00050049, + WGPUSType_BindGroupLayoutDynamicBindingArray = 0x0005004A, + WGPUSType_DynamicBindingArrayLimits = 0x0005004B, + WGPUSType_BindGroupDynamicBindingArray = 0x0005004C, + WGPUSType_Force32 = 0x7FFFFFFF +} WGPUSType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUSubgroupMatrixComponentType { WGPUSubgroupMatrixComponentType_F32 = 0x00000001, WGPUSubgroupMatrixComponentType_F16 = 0x00000002, WGPUSubgroupMatrixComponentType_U32 = 0x00000003, WGPUSubgroupMatrixComponentType_I32 = 0x00000004, + WGPUSubgroupMatrixComponentType_U8 = 0x00000005, + WGPUSubgroupMatrixComponentType_I8 = 0x00000006, WGPUSubgroupMatrixComponentType_Force32 = 0x7FFFFFFF } WGPUSubgroupMatrixComponentType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUSurfaceGetCurrentTextureStatus { WGPUSurfaceGetCurrentTextureStatus_SuccessOptimal = 0x00000001, WGPUSurfaceGetCurrentTextureStatus_SuccessSuboptimal = 0x00000002, @@ -804,6 +915,7 @@ typedef enum WGPUSurfaceGetCurrentTextureStatus { WGPUSurfaceGetCurrentTextureStatus_Error = 0x00000006, WGPUSurfaceGetCurrentTextureStatus_Force32 = 0x7FFFFFFF } WGPUSurfaceGetCurrentTextureStatus WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUTextureAspect { WGPUTextureAspect_Undefined = 0x00000000, WGPUTextureAspect_All = 0x00000001, @@ -814,6 +926,7 @@ typedef enum WGPUTextureAspect { WGPUTextureAspect_Plane2Only = 0x00050002, WGPUTextureAspect_Force32 = 0x7FFFFFFF } WGPUTextureAspect WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUTextureDimension { WGPUTextureDimension_Undefined = 0x00000000, WGPUTextureDimension_1D = 0x00000001, @@ -821,109 +934,110 @@ typedef enum WGPUTextureDimension { WGPUTextureDimension_3D = 0x00000003, WGPUTextureDimension_Force32 = 0x7FFFFFFF } WGPUTextureDimension WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUTextureFormat { WGPUTextureFormat_Undefined = 0x00000000, WGPUTextureFormat_R8Unorm = 0x00000001, WGPUTextureFormat_R8Snorm = 0x00000002, WGPUTextureFormat_R8Uint = 0x00000003, WGPUTextureFormat_R8Sint = 0x00000004, - WGPUTextureFormat_R16Uint = 0x00000005, - WGPUTextureFormat_R16Sint = 0x00000006, - WGPUTextureFormat_R16Float = 0x00000007, - WGPUTextureFormat_RG8Unorm = 0x00000008, - WGPUTextureFormat_RG8Snorm = 0x00000009, - WGPUTextureFormat_RG8Uint = 0x0000000A, - WGPUTextureFormat_RG8Sint = 0x0000000B, - WGPUTextureFormat_R32Float = 0x0000000C, - WGPUTextureFormat_R32Uint = 0x0000000D, - WGPUTextureFormat_R32Sint = 0x0000000E, - WGPUTextureFormat_RG16Uint = 0x0000000F, - WGPUTextureFormat_RG16Sint = 0x00000010, - WGPUTextureFormat_RG16Float = 0x00000011, - WGPUTextureFormat_RGBA8Unorm = 0x00000012, - WGPUTextureFormat_RGBA8UnormSrgb = 0x00000013, - WGPUTextureFormat_RGBA8Snorm = 0x00000014, - WGPUTextureFormat_RGBA8Uint = 0x00000015, - WGPUTextureFormat_RGBA8Sint = 0x00000016, - WGPUTextureFormat_BGRA8Unorm = 0x00000017, - WGPUTextureFormat_BGRA8UnormSrgb = 0x00000018, - WGPUTextureFormat_RGB10A2Uint = 0x00000019, - WGPUTextureFormat_RGB10A2Unorm = 0x0000001A, - WGPUTextureFormat_RG11B10Ufloat = 0x0000001B, - WGPUTextureFormat_RGB9E5Ufloat = 0x0000001C, - WGPUTextureFormat_RG32Float = 0x0000001D, - WGPUTextureFormat_RG32Uint = 0x0000001E, - WGPUTextureFormat_RG32Sint = 0x0000001F, - WGPUTextureFormat_RGBA16Uint = 0x00000020, - WGPUTextureFormat_RGBA16Sint = 0x00000021, - WGPUTextureFormat_RGBA16Float = 0x00000022, - WGPUTextureFormat_RGBA32Float = 0x00000023, - WGPUTextureFormat_RGBA32Uint = 0x00000024, - WGPUTextureFormat_RGBA32Sint = 0x00000025, - WGPUTextureFormat_Stencil8 = 0x00000026, - WGPUTextureFormat_Depth16Unorm = 0x00000027, - WGPUTextureFormat_Depth24Plus = 0x00000028, - WGPUTextureFormat_Depth24PlusStencil8 = 0x00000029, - WGPUTextureFormat_Depth32Float = 0x0000002A, - WGPUTextureFormat_Depth32FloatStencil8 = 0x0000002B, - WGPUTextureFormat_BC1RGBAUnorm = 0x0000002C, - WGPUTextureFormat_BC1RGBAUnormSrgb = 0x0000002D, - WGPUTextureFormat_BC2RGBAUnorm = 0x0000002E, - WGPUTextureFormat_BC2RGBAUnormSrgb = 0x0000002F, - WGPUTextureFormat_BC3RGBAUnorm = 0x00000030, - WGPUTextureFormat_BC3RGBAUnormSrgb = 0x00000031, - WGPUTextureFormat_BC4RUnorm = 0x00000032, - WGPUTextureFormat_BC4RSnorm = 0x00000033, - WGPUTextureFormat_BC5RGUnorm = 0x00000034, - WGPUTextureFormat_BC5RGSnorm = 0x00000035, - WGPUTextureFormat_BC6HRGBUfloat = 0x00000036, - WGPUTextureFormat_BC6HRGBFloat = 0x00000037, - WGPUTextureFormat_BC7RGBAUnorm = 0x00000038, - WGPUTextureFormat_BC7RGBAUnormSrgb = 0x00000039, - WGPUTextureFormat_ETC2RGB8Unorm = 0x0000003A, - WGPUTextureFormat_ETC2RGB8UnormSrgb = 0x0000003B, - WGPUTextureFormat_ETC2RGB8A1Unorm = 0x0000003C, - WGPUTextureFormat_ETC2RGB8A1UnormSrgb = 0x0000003D, - WGPUTextureFormat_ETC2RGBA8Unorm = 0x0000003E, - WGPUTextureFormat_ETC2RGBA8UnormSrgb = 0x0000003F, - WGPUTextureFormat_EACR11Unorm = 0x00000040, - WGPUTextureFormat_EACR11Snorm = 0x00000041, - WGPUTextureFormat_EACRG11Unorm = 0x00000042, - WGPUTextureFormat_EACRG11Snorm = 0x00000043, - WGPUTextureFormat_ASTC4x4Unorm = 0x00000044, - WGPUTextureFormat_ASTC4x4UnormSrgb = 0x00000045, - WGPUTextureFormat_ASTC5x4Unorm = 0x00000046, - WGPUTextureFormat_ASTC5x4UnormSrgb = 0x00000047, - WGPUTextureFormat_ASTC5x5Unorm = 0x00000048, - WGPUTextureFormat_ASTC5x5UnormSrgb = 0x00000049, - WGPUTextureFormat_ASTC6x5Unorm = 0x0000004A, - WGPUTextureFormat_ASTC6x5UnormSrgb = 0x0000004B, - WGPUTextureFormat_ASTC6x6Unorm = 0x0000004C, - WGPUTextureFormat_ASTC6x6UnormSrgb = 0x0000004D, - WGPUTextureFormat_ASTC8x5Unorm = 0x0000004E, - WGPUTextureFormat_ASTC8x5UnormSrgb = 0x0000004F, - WGPUTextureFormat_ASTC8x6Unorm = 0x00000050, - WGPUTextureFormat_ASTC8x6UnormSrgb = 0x00000051, - WGPUTextureFormat_ASTC8x8Unorm = 0x00000052, - WGPUTextureFormat_ASTC8x8UnormSrgb = 0x00000053, - WGPUTextureFormat_ASTC10x5Unorm = 0x00000054, - WGPUTextureFormat_ASTC10x5UnormSrgb = 0x00000055, - WGPUTextureFormat_ASTC10x6Unorm = 0x00000056, - WGPUTextureFormat_ASTC10x6UnormSrgb = 0x00000057, - WGPUTextureFormat_ASTC10x8Unorm = 0x00000058, - WGPUTextureFormat_ASTC10x8UnormSrgb = 0x00000059, - WGPUTextureFormat_ASTC10x10Unorm = 0x0000005A, - WGPUTextureFormat_ASTC10x10UnormSrgb = 0x0000005B, - WGPUTextureFormat_ASTC12x10Unorm = 0x0000005C, - WGPUTextureFormat_ASTC12x10UnormSrgb = 0x0000005D, - WGPUTextureFormat_ASTC12x12Unorm = 0x0000005E, - WGPUTextureFormat_ASTC12x12UnormSrgb = 0x0000005F, - WGPUTextureFormat_R16Unorm = 0x00050000, - WGPUTextureFormat_RG16Unorm = 0x00050001, - WGPUTextureFormat_RGBA16Unorm = 0x00050002, - WGPUTextureFormat_R16Snorm = 0x00050003, - WGPUTextureFormat_RG16Snorm = 0x00050004, - WGPUTextureFormat_RGBA16Snorm = 0x00050005, + WGPUTextureFormat_R16Unorm = 0x00000005, + WGPUTextureFormat_R16Snorm = 0x00000006, + WGPUTextureFormat_R16Uint = 0x00000007, + WGPUTextureFormat_R16Sint = 0x00000008, + WGPUTextureFormat_R16Float = 0x00000009, + WGPUTextureFormat_RG8Unorm = 0x0000000A, + WGPUTextureFormat_RG8Snorm = 0x0000000B, + WGPUTextureFormat_RG8Uint = 0x0000000C, + WGPUTextureFormat_RG8Sint = 0x0000000D, + WGPUTextureFormat_R32Float = 0x0000000E, + WGPUTextureFormat_R32Uint = 0x0000000F, + WGPUTextureFormat_R32Sint = 0x00000010, + WGPUTextureFormat_RG16Unorm = 0x00000011, + WGPUTextureFormat_RG16Snorm = 0x00000012, + WGPUTextureFormat_RG16Uint = 0x00000013, + WGPUTextureFormat_RG16Sint = 0x00000014, + WGPUTextureFormat_RG16Float = 0x00000015, + WGPUTextureFormat_RGBA8Unorm = 0x00000016, + WGPUTextureFormat_RGBA8UnormSrgb = 0x00000017, + WGPUTextureFormat_RGBA8Snorm = 0x00000018, + WGPUTextureFormat_RGBA8Uint = 0x00000019, + WGPUTextureFormat_RGBA8Sint = 0x0000001A, + WGPUTextureFormat_BGRA8Unorm = 0x0000001B, + WGPUTextureFormat_BGRA8UnormSrgb = 0x0000001C, + WGPUTextureFormat_RGB10A2Uint = 0x0000001D, + WGPUTextureFormat_RGB10A2Unorm = 0x0000001E, + WGPUTextureFormat_RG11B10Ufloat = 0x0000001F, + WGPUTextureFormat_RGB9E5Ufloat = 0x00000020, + WGPUTextureFormat_RG32Float = 0x00000021, + WGPUTextureFormat_RG32Uint = 0x00000022, + WGPUTextureFormat_RG32Sint = 0x00000023, + WGPUTextureFormat_RGBA16Unorm = 0x00000024, + WGPUTextureFormat_RGBA16Snorm = 0x00000025, + WGPUTextureFormat_RGBA16Uint = 0x00000026, + WGPUTextureFormat_RGBA16Sint = 0x00000027, + WGPUTextureFormat_RGBA16Float = 0x00000028, + WGPUTextureFormat_RGBA32Float = 0x00000029, + WGPUTextureFormat_RGBA32Uint = 0x0000002A, + WGPUTextureFormat_RGBA32Sint = 0x0000002B, + WGPUTextureFormat_Stencil8 = 0x0000002C, + WGPUTextureFormat_Depth16Unorm = 0x0000002D, + WGPUTextureFormat_Depth24Plus = 0x0000002E, + WGPUTextureFormat_Depth24PlusStencil8 = 0x0000002F, + WGPUTextureFormat_Depth32Float = 0x00000030, + WGPUTextureFormat_Depth32FloatStencil8 = 0x00000031, + WGPUTextureFormat_BC1RGBAUnorm = 0x00000032, + WGPUTextureFormat_BC1RGBAUnormSrgb = 0x00000033, + WGPUTextureFormat_BC2RGBAUnorm = 0x00000034, + WGPUTextureFormat_BC2RGBAUnormSrgb = 0x00000035, + WGPUTextureFormat_BC3RGBAUnorm = 0x00000036, + WGPUTextureFormat_BC3RGBAUnormSrgb = 0x00000037, + WGPUTextureFormat_BC4RUnorm = 0x00000038, + WGPUTextureFormat_BC4RSnorm = 0x00000039, + WGPUTextureFormat_BC5RGUnorm = 0x0000003A, + WGPUTextureFormat_BC5RGSnorm = 0x0000003B, + WGPUTextureFormat_BC6HRGBUfloat = 0x0000003C, + WGPUTextureFormat_BC6HRGBFloat = 0x0000003D, + WGPUTextureFormat_BC7RGBAUnorm = 0x0000003E, + WGPUTextureFormat_BC7RGBAUnormSrgb = 0x0000003F, + WGPUTextureFormat_ETC2RGB8Unorm = 0x00000040, + WGPUTextureFormat_ETC2RGB8UnormSrgb = 0x00000041, + WGPUTextureFormat_ETC2RGB8A1Unorm = 0x00000042, + WGPUTextureFormat_ETC2RGB8A1UnormSrgb = 0x00000043, + WGPUTextureFormat_ETC2RGBA8Unorm = 0x00000044, + WGPUTextureFormat_ETC2RGBA8UnormSrgb = 0x00000045, + WGPUTextureFormat_EACR11Unorm = 0x00000046, + WGPUTextureFormat_EACR11Snorm = 0x00000047, + WGPUTextureFormat_EACRG11Unorm = 0x00000048, + WGPUTextureFormat_EACRG11Snorm = 0x00000049, + WGPUTextureFormat_ASTC4x4Unorm = 0x0000004A, + WGPUTextureFormat_ASTC4x4UnormSrgb = 0x0000004B, + WGPUTextureFormat_ASTC5x4Unorm = 0x0000004C, + WGPUTextureFormat_ASTC5x4UnormSrgb = 0x0000004D, + WGPUTextureFormat_ASTC5x5Unorm = 0x0000004E, + WGPUTextureFormat_ASTC5x5UnormSrgb = 0x0000004F, + WGPUTextureFormat_ASTC6x5Unorm = 0x00000050, + WGPUTextureFormat_ASTC6x5UnormSrgb = 0x00000051, + WGPUTextureFormat_ASTC6x6Unorm = 0x00000052, + WGPUTextureFormat_ASTC6x6UnormSrgb = 0x00000053, + WGPUTextureFormat_ASTC8x5Unorm = 0x00000054, + WGPUTextureFormat_ASTC8x5UnormSrgb = 0x00000055, + WGPUTextureFormat_ASTC8x6Unorm = 0x00000056, + WGPUTextureFormat_ASTC8x6UnormSrgb = 0x00000057, + WGPUTextureFormat_ASTC8x8Unorm = 0x00000058, + WGPUTextureFormat_ASTC8x8UnormSrgb = 0x00000059, + WGPUTextureFormat_ASTC10x5Unorm = 0x0000005A, + WGPUTextureFormat_ASTC10x5UnormSrgb = 0x0000005B, + WGPUTextureFormat_ASTC10x6Unorm = 0x0000005C, + WGPUTextureFormat_ASTC10x6UnormSrgb = 0x0000005D, + WGPUTextureFormat_ASTC10x8Unorm = 0x0000005E, + WGPUTextureFormat_ASTC10x8UnormSrgb = 0x0000005F, + WGPUTextureFormat_ASTC10x10Unorm = 0x00000060, + WGPUTextureFormat_ASTC10x10UnormSrgb = 0x00000061, + WGPUTextureFormat_ASTC12x10Unorm = 0x00000062, + WGPUTextureFormat_ASTC12x10UnormSrgb = 0x00000063, + WGPUTextureFormat_ASTC12x12Unorm = 0x00000064, + WGPUTextureFormat_ASTC12x12UnormSrgb = 0x00000065, WGPUTextureFormat_R8BG8Biplanar420Unorm = 0x00050006, WGPUTextureFormat_R10X6BG10X6Biplanar420Unorm = 0x00050007, WGPUTextureFormat_R8BG8A8Triplanar420Unorm = 0x00050008, @@ -934,6 +1048,7 @@ typedef enum WGPUTextureFormat { WGPUTextureFormat_External = 0x0005000D, WGPUTextureFormat_Force32 = 0x7FFFFFFF } WGPUTextureFormat WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUTextureSampleType { WGPUTextureSampleType_BindingNotUsed = 0x00000000, WGPUTextureSampleType_Undefined = 0x00000001, @@ -944,6 +1059,7 @@ typedef enum WGPUTextureSampleType { WGPUTextureSampleType_Uint = 0x00000006, WGPUTextureSampleType_Force32 = 0x7FFFFFFF } WGPUTextureSampleType WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUTextureViewDimension { WGPUTextureViewDimension_Undefined = 0x00000000, WGPUTextureViewDimension_1D = 0x00000001, @@ -954,11 +1070,13 @@ typedef enum WGPUTextureViewDimension { WGPUTextureViewDimension_3D = 0x00000006, WGPUTextureViewDimension_Force32 = 0x7FFFFFFF } WGPUTextureViewDimension WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUToneMappingMode { WGPUToneMappingMode_Standard = 0x00000001, WGPUToneMappingMode_Extended = 0x00000002, WGPUToneMappingMode_Force32 = 0x7FFFFFFF } WGPUToneMappingMode WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUVertexFormat { WGPUVertexFormat_Uint8 = 0x00000001, WGPUVertexFormat_Uint8x2 = 0x00000002, @@ -1003,12 +1121,14 @@ typedef enum WGPUVertexFormat { WGPUVertexFormat_Unorm8x4BGRA = 0x00000029, WGPUVertexFormat_Force32 = 0x7FFFFFFF } WGPUVertexFormat WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUVertexStepMode { WGPUVertexStepMode_Undefined = 0x00000000, WGPUVertexStepMode_Vertex = 0x00000001, WGPUVertexStepMode_Instance = 0x00000002, WGPUVertexStepMode_Force32 = 0x7FFFFFFF } WGPUVertexStepMode WGPU_ENUM_ATTRIBUTE; + typedef enum WGPUWaitStatus { WGPUWaitStatus_Success = 0x00000001, WGPUWaitStatus_TimedOut = 0x00000002, @@ -1016,6 +1136,22 @@ typedef enum WGPUWaitStatus { WGPUWaitStatus_Force32 = 0x7FFFFFFF } WGPUWaitStatus WGPU_ENUM_ATTRIBUTE; +typedef enum WGPUWGSLLanguageFeatureName { + WGPUWGSLLanguageFeatureName_ReadonlyAndReadwriteStorageTextures = 0x00000001, + WGPUWGSLLanguageFeatureName_Packed4x8IntegerDotProduct = 0x00000002, + WGPUWGSLLanguageFeatureName_UnrestrictedPointerParameters = 0x00000003, + WGPUWGSLLanguageFeatureName_PointerCompositeAccess = 0x00000004, + WGPUWGSLLanguageFeatureName_SizedBindingArray = 0x00050005, + WGPUWGSLLanguageFeatureName_TexelBuffers = 0x00050006, + WGPUWGSLLanguageFeatureName_ChromiumPrint = 0x00050007, + WGPUWGSLLanguageFeatureName_ChromiumTestingUnimplemented = 0x00050000, + WGPUWGSLLanguageFeatureName_ChromiumTestingUnsafeExperimental = 0x00050001, + WGPUWGSLLanguageFeatureName_ChromiumTestingExperimental = 0x00050002, + WGPUWGSLLanguageFeatureName_ChromiumTestingShippedWithKillswitch = 0x00050003, + WGPUWGSLLanguageFeatureName_ChromiumTestingShipped = 0x00050004, + WGPUWGSLLanguageFeatureName_Force32 = 0x7FFFFFFF +} WGPUWGSLLanguageFeatureName WGPU_ENUM_ATTRIBUTE; + typedef WGPUFlags WGPUBufferUsage; static const WGPUBufferUsage WGPUBufferUsage_None = 0x0000000000000000; static const WGPUBufferUsage WGPUBufferUsage_MapRead = 0x0000000000000001; @@ -1028,6 +1164,8 @@ static const WGPUBufferUsage WGPUBufferUsage_Uniform = 0x0000000000000040; static const WGPUBufferUsage WGPUBufferUsage_Storage = 0x0000000000000080; static const WGPUBufferUsage WGPUBufferUsage_Indirect = 0x0000000000000100; static const WGPUBufferUsage WGPUBufferUsage_QueryResolve = 0x0000000000000200; +static const WGPUBufferUsage WGPUBufferUsage_TexelBuffer = 0x0000000000000400; + typedef WGPUFlags WGPUColorWriteMask; static const WGPUColorWriteMask WGPUColorWriteMask_None = 0x0000000000000000; static const WGPUColorWriteMask WGPUColorWriteMask_Red = 0x0000000000000001; @@ -1035,6 +1173,7 @@ static const WGPUColorWriteMask WGPUColorWriteMask_Green = 0x0000000000000002; static const WGPUColorWriteMask WGPUColorWriteMask_Blue = 0x0000000000000004; static const WGPUColorWriteMask WGPUColorWriteMask_Alpha = 0x0000000000000008; static const WGPUColorWriteMask WGPUColorWriteMask_All = 0x000000000000000F; + typedef WGPUFlags WGPUHeapProperty; static const WGPUHeapProperty WGPUHeapProperty_None = 0x0000000000000000; static const WGPUHeapProperty WGPUHeapProperty_DeviceLocal = 0x0000000000000001; @@ -1042,15 +1181,18 @@ static const WGPUHeapProperty WGPUHeapProperty_HostVisible = 0x0000000000000002; static const WGPUHeapProperty WGPUHeapProperty_HostCoherent = 0x0000000000000004; static const WGPUHeapProperty WGPUHeapProperty_HostUncached = 0x0000000000000008; static const WGPUHeapProperty WGPUHeapProperty_HostCached = 0x0000000000000010; + typedef WGPUFlags WGPUMapMode; static const WGPUMapMode WGPUMapMode_None = 0x0000000000000000; static const WGPUMapMode WGPUMapMode_Read = 0x0000000000000001; static const WGPUMapMode WGPUMapMode_Write = 0x0000000000000002; + typedef WGPUFlags WGPUShaderStage; static const WGPUShaderStage WGPUShaderStage_None = 0x0000000000000000; static const WGPUShaderStage WGPUShaderStage_Vertex = 0x0000000000000001; static const WGPUShaderStage WGPUShaderStage_Fragment = 0x0000000000000002; static const WGPUShaderStage WGPUShaderStage_Compute = 0x0000000000000004; + typedef WGPUFlags WGPUTextureUsage; static const WGPUTextureUsage WGPUTextureUsage_None = 0x0000000000000000; static const WGPUTextureUsage WGPUTextureUsage_CopySrc = 0x0000000000000001; @@ -1060,32 +1202,40 @@ static const WGPUTextureUsage WGPUTextureUsage_StorageBinding = 0x00000000000000 static const WGPUTextureUsage WGPUTextureUsage_RenderAttachment = 0x0000000000000010; static const WGPUTextureUsage WGPUTextureUsage_TransientAttachment = 0x0000000000000020; static const WGPUTextureUsage WGPUTextureUsage_StorageAttachment = 0x0000000000000040; + typedef void (*WGPUCallback)(void * userdata) WGPU_FUNCTION_ATTRIBUTE; typedef size_t (*WGPUDawnLoadCacheDataFunction)(void const * key, size_t keySize, void * value, size_t valueSize, void * userdata) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUDawnStoreCacheDataFunction)(void const * key, size_t keySize, void const * value, size_t valueSize, void * userdata) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProc)(void) WGPU_FUNCTION_ATTRIBUTE; // Callback function pointers -typedef void (*WGPUBufferMapCallback)(WGPUMapAsyncStatus status, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUBufferMapCallback)(WGPUMapAsyncStatus status, WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; + typedef void (*WGPUCompilationInfoCallback)(WGPUCompilationInfoRequestStatus status, struct WGPUCompilationInfo const * compilationInfo, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUCreateComputePipelineAsyncCallback)(WGPUCreatePipelineAsyncStatus status, WGPUComputePipeline pipeline, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUCreateRenderPipelineAsyncCallback)(WGPUCreatePipelineAsyncStatus status, WGPURenderPipeline pipeline, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUDeviceLostCallback)(WGPUDevice const * device, WGPUDeviceLostReason reason, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPULoggingCallback)(WGPULoggingType type, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUPopErrorScopeCallback)(WGPUPopErrorScopeStatus status, WGPUErrorType type, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUQueueWorkDoneCallback)(WGPUQueueWorkDoneStatus status, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPURequestAdapterCallback)(WGPURequestAdapterStatus status, WGPUAdapter adapter, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPURequestDeviceCallback)(WGPURequestDeviceStatus status, WGPUDevice device, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUUncapturedErrorCallback)(WGPUDevice const * device, WGPUErrorType type, struct WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; + +typedef void (*WGPUCreateComputePipelineAsyncCallback)(WGPUCreatePipelineAsyncStatus status, WGPUComputePipeline pipeline, WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; + +typedef void (*WGPUCreateRenderPipelineAsyncCallback)(WGPUCreatePipelineAsyncStatus status, WGPURenderPipeline pipeline, WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; + +typedef void (*WGPUDeviceLostCallback)(WGPUDevice const * device, WGPUDeviceLostReason reason, WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; + +typedef void (*WGPULoggingCallback)(WGPULoggingType type, WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; + +typedef void (*WGPUPopErrorScopeCallback)(WGPUPopErrorScopeStatus status, WGPUErrorType type, WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; + +typedef void (*WGPUQueueWorkDoneCallback)(WGPUQueueWorkDoneStatus status, WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; + +typedef void (*WGPURequestAdapterCallback)(WGPURequestAdapterStatus status, WGPUAdapter adapter, WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; + +typedef void (*WGPURequestDeviceCallback)(WGPURequestDeviceStatus status, WGPUDevice device, WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; + +typedef void (*WGPUUncapturedErrorCallback)(WGPUDevice const * device, WGPUErrorType type, WGPUStringView message, WGPU_NULLABLE void* userdata1, WGPU_NULLABLE void* userdata2) WGPU_FUNCTION_ATTRIBUTE; typedef struct WGPUChainedStruct { struct WGPUChainedStruct * next; WGPUSType sType; } WGPUChainedStruct WGPU_STRUCTURE_ATTRIBUTE; - -#define _wgpu_COMMA , - typedef struct WGPUBufferMapCallbackInfo { WGPUChainedStruct * nextInChain; WGPUCallbackMode mode; @@ -1258,15 +1408,6 @@ typedef struct WGPUUncapturedErrorCallbackInfo { /*.userdata2=*/NULL _wgpu_COMMA \ }) - -typedef struct WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER { - WGPUBool unused; -} WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_INTERNAL_HAVE_EMDAWNWEBGPU_HEADER_INIT _wgpu_MAKE_INIT_STRUCT(WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER, { \ - /*.unused=*/0 _wgpu_COMMA \ -}) - // Can be chained in WGPUAdapterInfo typedef struct WGPUAdapterPropertiesD3D { WGPUChainedStruct chain; @@ -1282,33 +1423,31 @@ typedef struct WGPUAdapterPropertiesD3D { }) // Can be chained in WGPUAdapterInfo -typedef struct WGPUAdapterPropertiesSubgroups { +typedef struct WGPUAdapterPropertiesVk { WGPUChainedStruct chain; - uint32_t subgroupMinSize; - uint32_t subgroupMaxSize; -} WGPUAdapterPropertiesSubgroups WGPU_STRUCTURE_ATTRIBUTE; + uint32_t driverVersion; +} WGPUAdapterPropertiesVk WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_ADAPTER_PROPERTIES_SUBGROUPS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAdapterPropertiesSubgroups, { \ +#define WGPU_ADAPTER_PROPERTIES_VK_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAdapterPropertiesVk, { \ /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ /*.next=*/NULL _wgpu_COMMA \ - /*.sType=*/WGPUSType_AdapterPropertiesSubgroups _wgpu_COMMA \ + /*.sType=*/WGPUSType_AdapterPropertiesVk _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.subgroupMinSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ - /*.subgroupMaxSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.driverVersion=*/0 _wgpu_COMMA \ }) -// Can be chained in WGPUAdapterInfo -typedef struct WGPUAdapterPropertiesVk { +// Can be chained in WGPUBindGroupDescriptor +typedef struct WGPUBindGroupDynamicBindingArray { WGPUChainedStruct chain; - uint32_t driverVersion; -} WGPUAdapterPropertiesVk WGPU_STRUCTURE_ATTRIBUTE; + uint32_t dynamicArraySize; +} WGPUBindGroupDynamicBindingArray WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_ADAPTER_PROPERTIES_VK_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAdapterPropertiesVk, { \ +#define WGPU_BIND_GROUP_DYNAMIC_BINDING_ARRAY_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBindGroupDynamicBindingArray, { \ /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ /*.next=*/NULL _wgpu_COMMA \ - /*.sType=*/WGPUSType_AdapterPropertiesVk _wgpu_COMMA \ + /*.sType=*/WGPUSType_BindGroupDynamicBindingArray _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.driverVersion=*/0 _wgpu_COMMA \ + /*.dynamicArraySize=*/0 _wgpu_COMMA \ }) typedef struct WGPUBlendComponent { @@ -1333,7 +1472,7 @@ typedef struct WGPUBufferBindingLayout { #define WGPU_BUFFER_BINDING_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBufferBindingLayout, { \ /*.nextInChain=*/NULL _wgpu_COMMA \ /*.type=*/WGPUBufferBindingType_Undefined _wgpu_COMMA \ - /*.hasDynamicOffset=*/0 _wgpu_COMMA \ + /*.hasDynamicOffset=*/WGPU_FALSE _wgpu_COMMA \ /*.minBindingSize=*/0 _wgpu_COMMA \ }) @@ -1380,16 +1519,58 @@ typedef struct WGPUColorTargetStateExpandResolveTextureDawn { /*.next=*/NULL _wgpu_COMMA \ /*.sType=*/WGPUSType_ColorTargetStateExpandResolveTextureDawn _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.enabled=*/0 _wgpu_COMMA \ + /*.enabled=*/WGPU_FALSE _wgpu_COMMA \ }) -typedef struct WGPUCopyTextureForBrowserOptions { +typedef struct WGPUCommandBufferDescriptor { WGPUChainedStruct * nextInChain; - WGPUBool flipY; - WGPUBool needsColorSpaceConversion; - WGPUAlphaMode srcAlphaMode; - WGPU_NULLABLE float const * srcTransferFunctionParameters; - WGPU_NULLABLE float const * conversionMatrix; + WGPUStringView label; +} WGPUCommandBufferDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_COMMAND_BUFFER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCommandBufferDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ +}) + +// Can be chained in WGPULimits +typedef struct WGPUCompatibilityModeLimits { + WGPUChainedStruct chain; + uint32_t maxStorageBuffersInVertexStage; + uint32_t maxStorageTexturesInVertexStage; + uint32_t maxStorageBuffersInFragmentStage; + uint32_t maxStorageTexturesInFragmentStage; +} WGPUCompatibilityModeLimits WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_COMPATIBILITY_MODE_LIMITS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCompatibilityModeLimits, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_CompatibilityModeLimits _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.maxStorageBuffersInVertexStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxStorageTexturesInVertexStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxStorageBuffersInFragmentStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxStorageTexturesInFragmentStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ +}) + +typedef struct WGPUConstantEntry { + WGPUChainedStruct * nextInChain; + WGPUStringView key; + double value; +} WGPUConstantEntry WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_CONSTANT_ENTRY_INIT _wgpu_MAKE_INIT_STRUCT(WGPUConstantEntry, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.key=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.value=*/0. _wgpu_COMMA \ +}) + +typedef struct WGPUCopyTextureForBrowserOptions { + WGPUChainedStruct * nextInChain; + WGPUBool flipY; + WGPUBool needsColorSpaceConversion; + WGPUAlphaMode srcAlphaMode; + WGPU_NULLABLE float const * srcTransferFunctionParameters; + WGPU_NULLABLE float const * conversionMatrix; WGPU_NULLABLE float const * dstTransferFunctionParameters; WGPUAlphaMode dstAlphaMode; WGPUBool internalUsage; @@ -1397,30 +1578,14 @@ typedef struct WGPUCopyTextureForBrowserOptions { #define WGPU_COPY_TEXTURE_FOR_BROWSER_OPTIONS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCopyTextureForBrowserOptions, { \ /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.flipY=*/0 _wgpu_COMMA \ - /*.needsColorSpaceConversion=*/0 _wgpu_COMMA \ + /*.flipY=*/WGPU_FALSE _wgpu_COMMA \ + /*.needsColorSpaceConversion=*/WGPU_FALSE _wgpu_COMMA \ /*.srcAlphaMode=*/WGPUAlphaMode_Unpremultiplied _wgpu_COMMA \ /*.srcTransferFunctionParameters=*/NULL _wgpu_COMMA \ /*.conversionMatrix=*/NULL _wgpu_COMMA \ /*.dstTransferFunctionParameters=*/NULL _wgpu_COMMA \ /*.dstAlphaMode=*/WGPUAlphaMode_Unpremultiplied _wgpu_COMMA \ - /*.internalUsage=*/0 _wgpu_COMMA \ -}) - -// Can be chained in WGPUInstanceDescriptor -typedef struct WGPUDawnWGSLBlocklist { - WGPUChainedStruct chain; - size_t blocklistedFeatureCount; - const char* const * blocklistedFeatures; -} WGPUDawnWGSLBlocklist WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_DAWN_WGSL_BLOCKLIST_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnWGSLBlocklist, { \ - /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ - /*.next=*/NULL _wgpu_COMMA \ - /*.sType=*/WGPUSType_DawnWGSLBlocklist _wgpu_COMMA \ - }) _wgpu_COMMA \ - /*.blocklistedFeatureCount=*/0 _wgpu_COMMA \ - /*.blocklistedFeatures=*/NULL _wgpu_COMMA \ + /*.internalUsage=*/WGPU_FALSE _wgpu_COMMA \ }) // Can be chained in WGPUAdapterInfo @@ -1448,7 +1613,27 @@ typedef struct WGPUDawnBufferDescriptorErrorInfoFromWireClient { /*.next=*/NULL _wgpu_COMMA \ /*.sType=*/WGPUSType_DawnBufferDescriptorErrorInfoFromWireClient _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.outOfMemory=*/0 _wgpu_COMMA \ + /*.outOfMemory=*/WGPU_FALSE _wgpu_COMMA \ +}) + +// Can be chained in WGPUDeviceDescriptor +typedef struct WGPUDawnCacheDeviceDescriptor { + WGPUChainedStruct chain; + WGPUStringView isolationKey; + WGPUDawnLoadCacheDataFunction loadDataFunction; + WGPUDawnStoreCacheDataFunction storeDataFunction; + void * functionUserdata; +} WGPUDawnCacheDeviceDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_DAWN_CACHE_DEVICE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnCacheDeviceDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnCacheDeviceDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.isolationKey=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.loadDataFunction=*/NULL _wgpu_COMMA \ + /*.storeDataFunction=*/NULL _wgpu_COMMA \ + /*.functionUserdata=*/nullptr _wgpu_COMMA \ }) // Can be chained in WGPUCompilationMessage @@ -1469,6 +1654,34 @@ typedef struct WGPUDawnCompilationMessageUtf16 { /*.length=*/0 _wgpu_COMMA \ }) +// Can be chained in WGPUDeviceDescriptor +typedef struct WGPUDawnConsumeAdapterDescriptor { + WGPUChainedStruct chain; + WGPUBool consumeAdapter; +} WGPUDawnConsumeAdapterDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_DAWN_CONSUME_ADAPTER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnConsumeAdapterDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnConsumeAdapterDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.consumeAdapter=*/WGPU_FALSE _wgpu_COMMA \ +}) + +// Can be chained in WGPUDeviceDescriptor +typedef struct WGPUDawnDeviceAllocatorControl { + WGPUChainedStruct chain; + size_t allocatorHeapBlockSize; +} WGPUDawnDeviceAllocatorControl WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_DAWN_DEVICE_ALLOCATOR_CONTROL_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnDeviceAllocatorControl, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnDeviceAllocatorControl _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.allocatorHeapBlockSize=*/0 _wgpu_COMMA \ +}) + typedef struct WGPUDawnDrmFormatProperties { uint64_t modifier; uint32_t modifierPlaneCount; @@ -1490,37 +1703,51 @@ typedef struct WGPUDawnEncoderInternalUsageDescriptor { /*.next=*/NULL _wgpu_COMMA \ /*.sType=*/WGPUSType_DawnEncoderInternalUsageDescriptor _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.useInternalUsages=*/0 _wgpu_COMMA \ + /*.useInternalUsages=*/WGPU_FALSE _wgpu_COMMA \ }) -// Can be chained in WGPULimits -typedef struct WGPUDawnExperimentalImmediateDataLimits { +// Can be chained in WGPUBufferDescriptor +typedef struct WGPUDawnFakeBufferOOMForTesting { + WGPUChainedStruct chain; + WGPUBool fakeOOMAtWireClientMap; + WGPUBool fakeOOMAtNativeMap; + WGPUBool fakeOOMAtDevice; +} WGPUDawnFakeBufferOOMForTesting WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_DAWN_FAKE_BUFFER_OOM_FOR_TESTING_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnFakeBufferOOMForTesting, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnFakeBufferOOMForTesting _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.fakeOOMAtWireClientMap=*/WGPU_FALSE _wgpu_COMMA \ + /*.fakeOOMAtNativeMap=*/WGPU_FALSE _wgpu_COMMA \ + /*.fakeOOMAtDevice=*/WGPU_FALSE _wgpu_COMMA \ +}) + +// Can be chained in WGPUDeviceDescriptor +typedef struct WGPUDawnFakeDeviceInitializeErrorForTesting { WGPUChainedStruct chain; - uint32_t maxImmediateDataRangeByteSize; -} WGPUDawnExperimentalImmediateDataLimits WGPU_STRUCTURE_ATTRIBUTE; +} WGPUDawnFakeDeviceInitializeErrorForTesting WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_EXPERIMENTAL_IMMEDIATE_DATA_LIMITS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnExperimentalImmediateDataLimits, { \ +#define WGPU_DAWN_FAKE_DEVICE_INITIALIZE_ERROR_FOR_TESTING_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnFakeDeviceInitializeErrorForTesting, { \ /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ /*.next=*/NULL _wgpu_COMMA \ - /*.sType=*/WGPUSType_DawnExperimentalImmediateDataLimits _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnFakeDeviceInitializeErrorForTesting _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.maxImmediateDataRangeByteSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ }) // Can be chained in WGPULimits -typedef struct WGPUDawnExperimentalSubgroupLimits { +typedef struct WGPUDawnHostMappedPointerLimits { WGPUChainedStruct chain; - uint32_t minSubgroupSize; - uint32_t maxSubgroupSize; -} WGPUDawnExperimentalSubgroupLimits WGPU_STRUCTURE_ATTRIBUTE; + uint32_t hostMappedPointerAlignment; +} WGPUDawnHostMappedPointerLimits WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_DAWN_EXPERIMENTAL_SUBGROUP_LIMITS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnExperimentalSubgroupLimits, { \ +#define WGPU_DAWN_HOST_MAPPED_POINTER_LIMITS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnHostMappedPointerLimits, { \ /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ /*.next=*/NULL _wgpu_COMMA \ - /*.sType=*/WGPUSType_DawnExperimentalSubgroupLimits _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnHostMappedPointerLimits _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.minSubgroupSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ - /*.maxSubgroupSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.hostMappedPointerAlignment=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ }) typedef struct WGPUDawnInjectedInvalidSType { @@ -1561,7 +1788,7 @@ typedef struct WGPUDawnShaderModuleSPIRVOptionsDescriptor { /*.next=*/NULL _wgpu_COMMA \ /*.sType=*/WGPUSType_DawnShaderModuleSPIRVOptionsDescriptor _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.allowNonUniformDerivatives=*/0 _wgpu_COMMA \ + /*.allowNonUniformDerivatives=*/WGPU_FALSE _wgpu_COMMA \ }) // Can be chained in WGPULimits @@ -1614,6 +1841,22 @@ typedef struct WGPUDawnTogglesDescriptor { /*.disabledToggles=*/NULL _wgpu_COMMA \ }) +// Can be chained in WGPUInstanceDescriptor +typedef struct WGPUDawnWGSLBlocklist { + WGPUChainedStruct chain; + size_t blocklistedFeatureCount; + const char* const * blocklistedFeatures; +} WGPUDawnWGSLBlocklist WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_DAWN_WGSL_BLOCKLIST_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnWGSLBlocklist, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DawnWGSLBlocklist _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.blocklistedFeatureCount=*/0 _wgpu_COMMA \ + /*.blocklistedFeatures=*/NULL _wgpu_COMMA \ +}) + // Can be chained in WGPUInstanceDescriptor typedef struct WGPUDawnWireWGSLControl { WGPUChainedStruct chain; @@ -1627,9 +1870,49 @@ typedef struct WGPUDawnWireWGSLControl { /*.next=*/NULL _wgpu_COMMA \ /*.sType=*/WGPUSType_DawnWireWGSLControl _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.enableExperimental=*/0 _wgpu_COMMA \ - /*.enableUnsafe=*/0 _wgpu_COMMA \ - /*.enableTesting=*/0 _wgpu_COMMA \ + /*.enableExperimental=*/WGPU_FALSE _wgpu_COMMA \ + /*.enableUnsafe=*/WGPU_FALSE _wgpu_COMMA \ + /*.enableTesting=*/WGPU_FALSE _wgpu_COMMA \ +}) + +typedef struct WGPUDynamicBindingArrayLayout { + WGPUChainedStruct * nextInChain; + uint32_t start; + WGPUDynamicBindingKind kind; +} WGPUDynamicBindingArrayLayout WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_DYNAMIC_BINDING_ARRAY_LAYOUT_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDynamicBindingArrayLayout, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.start=*/0 _wgpu_COMMA \ + /*.kind=*/WGPUDynamicBindingKind_Undefined _wgpu_COMMA \ +}) + +// Can be chained in WGPULimits +typedef struct WGPUDynamicBindingArrayLimits { + WGPUChainedStruct chain; + uint32_t maxDynamicBindingArraySize; +} WGPUDynamicBindingArrayLimits WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_DYNAMIC_BINDING_ARRAY_LIMITS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDynamicBindingArrayLimits, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_DynamicBindingArrayLimits _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.maxDynamicBindingArraySize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ +}) + +// Can be chained in WGPUSurfaceDescriptor +typedef struct WGPUEmscriptenSurfaceSourceCanvasHTMLSelector { + WGPUChainedStruct chain; + WGPUStringView selector; +} WGPUEmscriptenSurfaceSourceCanvasHTMLSelector WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_EMSCRIPTEN_SURFACE_SOURCE_CANVAS_HTML_SELECTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUEmscriptenSurfaceSourceCanvasHTMLSelector, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_EmscriptenSurfaceSourceCanvasHTMLSelector _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.selector=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ }) typedef struct WGPUExtent2D { @@ -1688,18 +1971,24 @@ typedef struct WGPUFuture { /*.id=*/0 _wgpu_COMMA \ }) -typedef struct WGPUInstanceCapabilities { +typedef struct WGPUInstanceLimits { WGPUChainedStruct * nextInChain; - WGPUBool timedWaitAnyEnable; size_t timedWaitAnyMaxCount; -} WGPUInstanceCapabilities WGPU_STRUCTURE_ATTRIBUTE; +} WGPUInstanceLimits WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_INSTANCE_CAPABILITIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUInstanceCapabilities, { \ +#define WGPU_INSTANCE_LIMITS_INIT _wgpu_MAKE_INIT_STRUCT(WGPUInstanceLimits, { \ /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.timedWaitAnyEnable=*/0 _wgpu_COMMA \ /*.timedWaitAnyMaxCount=*/0 _wgpu_COMMA \ }) +typedef struct WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER { + WGPUBool unused; +} WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_INTERNAL_HAVE_EMDAWNWEBGPU_HEADER_INIT _wgpu_MAKE_INIT_STRUCT(WGPUINTERNAL_HAVE_EMDAWNWEBGPU_HEADER, { \ + /*.unused=*/WGPU_FALSE _wgpu_COMMA \ +}) + typedef struct WGPUMemoryHeapInfo { WGPUHeapProperty properties; uint64_t size; @@ -1721,7 +2010,7 @@ typedef struct WGPUMultisampleState { /*.nextInChain=*/NULL _wgpu_COMMA \ /*.count=*/1 _wgpu_COMMA \ /*.mask=*/0xFFFFFFFF _wgpu_COMMA \ - /*.alphaToCoverageEnabled=*/0 _wgpu_COMMA \ + /*.alphaToCoverageEnabled=*/WGPU_FALSE _wgpu_COMMA \ }) typedef struct WGPUOrigin2D { @@ -1787,7 +2076,63 @@ typedef struct WGPUPrimitiveState { /*.stripIndexFormat=*/WGPUIndexFormat_Undefined _wgpu_COMMA \ /*.frontFace=*/WGPUFrontFace_Undefined _wgpu_COMMA \ /*.cullMode=*/WGPUCullMode_Undefined _wgpu_COMMA \ - /*.unclippedDepth=*/0 _wgpu_COMMA \ + /*.unclippedDepth=*/WGPU_FALSE _wgpu_COMMA \ +}) + +typedef struct WGPUQuerySetDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; + WGPUQueryType type; + uint32_t count; +} WGPUQuerySetDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_QUERY_SET_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUQuerySetDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.type=*/_wgpu_ENUM_ZERO_INIT(WGPUQueryType) _wgpu_COMMA \ + /*.count=*/0 _wgpu_COMMA \ +}) + +typedef struct WGPUQueueDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; +} WGPUQueueDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_QUEUE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUQueueDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ +}) + +typedef struct WGPURenderBundleDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; +} WGPURenderBundleDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_RENDER_BUNDLE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderBundleDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ +}) + +typedef struct WGPURenderBundleEncoderDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; + size_t colorFormatCount; + WGPUTextureFormat const * colorFormats; + WGPUTextureFormat depthStencilFormat; + uint32_t sampleCount; + WGPUBool depthReadOnly; + WGPUBool stencilReadOnly; +} WGPURenderBundleEncoderDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_RENDER_BUNDLE_ENCODER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderBundleEncoderDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.colorFormatCount=*/0 _wgpu_COMMA \ + /*.colorFormats=*/NULL _wgpu_COMMA \ + /*.depthStencilFormat=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ + /*.sampleCount=*/1 _wgpu_COMMA \ + /*.depthReadOnly=*/WGPU_FALSE _wgpu_COMMA \ + /*.stencilReadOnly=*/WGPU_FALSE _wgpu_COMMA \ }) typedef struct WGPURenderPassDepthStencilAttachment { @@ -1809,11 +2154,11 @@ typedef struct WGPURenderPassDepthStencilAttachment { /*.depthLoadOp=*/WGPULoadOp_Undefined _wgpu_COMMA \ /*.depthStoreOp=*/WGPUStoreOp_Undefined _wgpu_COMMA \ /*.depthClearValue=*/WGPU_DEPTH_CLEAR_VALUE_UNDEFINED _wgpu_COMMA \ - /*.depthReadOnly=*/0 _wgpu_COMMA \ + /*.depthReadOnly=*/WGPU_FALSE _wgpu_COMMA \ /*.stencilLoadOp=*/WGPULoadOp_Undefined _wgpu_COMMA \ /*.stencilStoreOp=*/WGPUStoreOp_Undefined _wgpu_COMMA \ /*.stencilClearValue=*/0 _wgpu_COMMA \ - /*.stencilReadOnly=*/0 _wgpu_COMMA \ + /*.stencilReadOnly=*/WGPU_FALSE _wgpu_COMMA \ }) // Can be chained in WGPURenderPassDescriptor @@ -1836,6 +2181,30 @@ typedef struct WGPURenderPassDescriptorExpandResolveRect { /*.height=*/0 _wgpu_COMMA \ }) +// Can be chained in WGPURenderPassDescriptor +typedef struct WGPURenderPassDescriptorResolveRect { + WGPUChainedStruct chain; + uint32_t colorOffsetX; + uint32_t colorOffsetY; + uint32_t resolveOffsetX; + uint32_t resolveOffsetY; + uint32_t width; + uint32_t height; +} WGPURenderPassDescriptorResolveRect WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_RENDER_PASS_DESCRIPTOR_RESOLVE_RECT_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderPassDescriptorResolveRect, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_RenderPassDescriptorResolveRect _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.colorOffsetX=*/0 _wgpu_COMMA \ + /*.colorOffsetY=*/0 _wgpu_COMMA \ + /*.resolveOffsetX=*/0 _wgpu_COMMA \ + /*.resolveOffsetY=*/0 _wgpu_COMMA \ + /*.width=*/0 _wgpu_COMMA \ + /*.height=*/0 _wgpu_COMMA \ +}) + // Can be chained in WGPURenderPassDescriptor typedef struct WGPURenderPassMaxDrawCount { WGPUChainedStruct chain; @@ -1850,6 +2219,18 @@ typedef struct WGPURenderPassMaxDrawCount { /*.maxDrawCount=*/50000000 _wgpu_COMMA \ }) +// Can be chained in WGPURequestAdapterOptions +typedef struct WGPURequestAdapterWebGPUBackendOptions { + WGPUChainedStruct chain; +} WGPURequestAdapterWebGPUBackendOptions WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_REQUEST_ADAPTER_WEBGPU_BACKEND_OPTIONS_INIT _wgpu_MAKE_INIT_STRUCT(WGPURequestAdapterWebGPUBackendOptions, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_RequestAdapterWebGPUBackendOptions _wgpu_COMMA \ + }) _wgpu_COMMA \ +}) + // Can be chained in WGPURequestAdapterOptions typedef struct WGPURequestAdapterWebXROptions { WGPUChainedStruct chain; @@ -1861,7 +2242,7 @@ typedef struct WGPURequestAdapterWebXROptions { /*.next=*/NULL _wgpu_COMMA \ /*.sType=*/WGPUSType_RequestAdapterWebXROptions _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.xrCompatible=*/0 _wgpu_COMMA \ + /*.xrCompatible=*/WGPU_FALSE _wgpu_COMMA \ }) typedef struct WGPUSamplerBindingLayout { @@ -1885,7 +2266,7 @@ typedef struct WGPUShaderModuleCompilationOptions { /*.next=*/NULL _wgpu_COMMA \ /*.sType=*/WGPUSType_ShaderModuleCompilationOptions _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.strictMath=*/0 _wgpu_COMMA \ + /*.strictMath=*/WGPU_FALSE _wgpu_COMMA \ }) // Can be chained in WGPUShaderModuleDescriptor @@ -1904,6 +2285,20 @@ typedef struct WGPUShaderSourceSPIRV { /*.code=*/NULL _wgpu_COMMA \ }) +// Can be chained in WGPUShaderModuleDescriptor +typedef struct WGPUShaderSourceWGSL { + WGPUChainedStruct chain; + WGPUStringView code; +} WGPUShaderSourceWGSL WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHADER_SOURCE_WGSL_INIT _wgpu_MAKE_INIT_STRUCT(WGPUShaderSourceWGSL, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_ShaderSourceWGSL _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.code=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ +}) + typedef struct WGPUSharedBufferMemoryBeginAccessDescriptor { WGPUChainedStruct * nextInChain; WGPUBool initialized; @@ -1914,12 +2309,22 @@ typedef struct WGPUSharedBufferMemoryBeginAccessDescriptor { #define WGPU_SHARED_BUFFER_MEMORY_BEGIN_ACCESS_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryBeginAccessDescriptor, { \ /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.initialized=*/0 _wgpu_COMMA \ + /*.initialized=*/WGPU_FALSE _wgpu_COMMA \ /*.fenceCount=*/0 _wgpu_COMMA \ /*.fences=*/NULL _wgpu_COMMA \ /*.signaledValues=*/NULL _wgpu_COMMA \ }) +typedef struct WGPUSharedBufferMemoryDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; +} WGPUSharedBufferMemoryDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHARED_BUFFER_MEMORY_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ +}) + typedef struct WGPUSharedBufferMemoryEndAccessState { WGPUChainedStruct * nextInChain; WGPUBool initialized; @@ -1930,7 +2335,7 @@ typedef struct WGPUSharedBufferMemoryEndAccessState { #define WGPU_SHARED_BUFFER_MEMORY_END_ACCESS_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryEndAccessState, { \ /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.initialized=*/0 _wgpu_COMMA \ + /*.initialized=*/WGPU_FALSE _wgpu_COMMA \ /*.fenceCount=*/0 _wgpu_COMMA \ /*.fences=*/NULL _wgpu_COMMA \ /*.signaledValues=*/NULL _wgpu_COMMA \ @@ -2116,6 +2521,36 @@ typedef struct WGPUSharedFenceVkSemaphoreZirconHandleExportInfo { /*.handle=*/0 _wgpu_COMMA \ }) +// Can be chained in WGPUSharedTextureMemoryDescriptor +typedef struct WGPUSharedTextureMemoryAHardwareBufferDescriptor { + WGPUChainedStruct chain; + void * handle; + WGPUBool useExternalFormat; +} WGPUSharedTextureMemoryAHardwareBufferDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHARED_TEXTURE_MEMORY_A_HARDWARE_BUFFER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryAHardwareBufferDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryAHardwareBufferDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.handle=*/NULL _wgpu_COMMA \ + /*.useExternalFormat=*/WGPU_FALSE _wgpu_COMMA \ +}) + +// Can be chained in WGPUSharedTextureMemoryBeginAccessDescriptor +typedef struct WGPUSharedTextureMemoryD3D11BeginState { + WGPUChainedStruct chain; + WGPUBool requiresEndAccessFence; +} WGPUSharedTextureMemoryD3D11BeginState WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHARED_TEXTURE_MEMORY_D3D11_BEGIN_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryD3D11BeginState, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SharedTextureMemoryD3D11BeginState _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.requiresEndAccessFence=*/WGPU_TRUE _wgpu_COMMA \ +}) + // Can be chained in WGPUSharedTextureMemoryBeginAccessDescriptor typedef struct WGPUSharedTextureMemoryD3DSwapchainBeginState { WGPUChainedStruct chain; @@ -2127,7 +2562,19 @@ typedef struct WGPUSharedTextureMemoryD3DSwapchainBeginState { /*.next=*/NULL _wgpu_COMMA \ /*.sType=*/WGPUSType_SharedTextureMemoryD3DSwapchainBeginState _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.isSwapchain=*/0 _wgpu_COMMA \ + /*.isSwapchain=*/WGPU_FALSE _wgpu_COMMA \ +}) + +typedef struct WGPUSharedTextureMemoryDmaBufPlane { + int fd; + uint64_t offset; + uint32_t stride; +} WGPUSharedTextureMemoryDmaBufPlane WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SHARED_TEXTURE_MEMORY_DMA_BUF_PLANE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryDmaBufPlane, { \ + /*.fd=*/0 _wgpu_COMMA \ + /*.offset=*/0 _wgpu_COMMA \ + /*.stride=*/0 _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor @@ -2143,7 +2590,7 @@ typedef struct WGPUSharedTextureMemoryDXGISharedHandleDescriptor { /*.sType=*/WGPUSType_SharedTextureMemoryDXGISharedHandleDescriptor _wgpu_COMMA \ }) _wgpu_COMMA \ /*.handle=*/NULL _wgpu_COMMA \ - /*.useKeyedMutex=*/0 _wgpu_COMMA \ + /*.useKeyedMutex=*/WGPU_FALSE _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor @@ -2173,35 +2620,7 @@ typedef struct WGPUSharedTextureMemoryIOSurfaceDescriptor { /*.sType=*/WGPUSType_SharedTextureMemoryIOSurfaceDescriptor _wgpu_COMMA \ }) _wgpu_COMMA \ /*.ioSurface=*/NULL _wgpu_COMMA \ - /*.allowStorageBinding=*/1 _wgpu_COMMA \ -}) - -// Can be chained in WGPUSharedTextureMemoryDescriptor -typedef struct WGPUSharedTextureMemoryAHardwareBufferDescriptor { - WGPUChainedStruct chain; - void * handle; - WGPUBool useExternalFormat; -} WGPUSharedTextureMemoryAHardwareBufferDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_SHARED_TEXTURE_MEMORY_A_HARDWARE_BUFFER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryAHardwareBufferDescriptor, { \ - /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ - /*.next=*/NULL _wgpu_COMMA \ - /*.sType=*/WGPUSType_SharedTextureMemoryAHardwareBufferDescriptor _wgpu_COMMA \ - }) _wgpu_COMMA \ - /*.handle=*/NULL _wgpu_COMMA \ - /*.useExternalFormat=*/0 _wgpu_COMMA \ -}) - -typedef struct WGPUSharedTextureMemoryDmaBufPlane { - int fd; - uint64_t offset; - uint32_t stride; -} WGPUSharedTextureMemoryDmaBufPlane WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_SHARED_TEXTURE_MEMORY_DMA_BUF_PLANE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryDmaBufPlane, { \ - /*.fd=*/0 _wgpu_COMMA \ - /*.offset=*/0 _wgpu_COMMA \ - /*.stride=*/0 _wgpu_COMMA \ + /*.allowStorageBinding=*/WGPU_TRUE _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor @@ -2223,7 +2642,7 @@ typedef struct WGPUSharedTextureMemoryOpaqueFDDescriptor { /*.memoryFD=*/0 _wgpu_COMMA \ /*.memoryTypeIndex=*/0 _wgpu_COMMA \ /*.allocationSize=*/0 _wgpu_COMMA \ - /*.dedicatedAllocation=*/0 _wgpu_COMMA \ + /*.dedicatedAllocation=*/WGPU_FALSE _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryDescriptor @@ -2237,7 +2656,7 @@ typedef struct WGPUSharedTextureMemoryVkDedicatedAllocationDescriptor { /*.next=*/NULL _wgpu_COMMA \ /*.sType=*/WGPUSType_SharedTextureMemoryVkDedicatedAllocationDescriptor _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.dedicatedAllocation=*/0 _wgpu_COMMA \ + /*.dedicatedAllocation=*/WGPU_FALSE _wgpu_COMMA \ }) // Can be chained in WGPUSharedTextureMemoryBeginAccessDescriptor @@ -2332,16 +2751,6 @@ typedef struct WGPUStorageTextureBindingLayout { /*.viewDimension=*/WGPUTextureViewDimension_Undefined _wgpu_COMMA \ }) -typedef struct WGPUStringView { - WGPU_NULLABLE char const * data; - size_t length; -} WGPUStringView WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_STRING_VIEW_INIT _wgpu_MAKE_INIT_STRUCT(WGPUStringView, { \ - /*.data=*/NULL _wgpu_COMMA \ - /*.length=*/WGPU_STRLEN _wgpu_COMMA \ -}) - typedef struct WGPUSubgroupMatrixConfig { WGPUSubgroupMatrixComponentType componentType; WGPUSubgroupMatrixComponentType resultComponentType; @@ -2358,22 +2767,32 @@ typedef struct WGPUSubgroupMatrixConfig { /*.K=*/0 _wgpu_COMMA \ }) -typedef struct WGPUSupportedWGSLLanguageFeatures { +typedef struct WGPUSupportedFeatures { size_t featureCount; - WGPUWGSLLanguageFeatureName const * features; -} WGPUSupportedWGSLLanguageFeatures WGPU_STRUCTURE_ATTRIBUTE; + WGPUFeatureName const * features; +} WGPUSupportedFeatures WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SUPPORTED_WGSL_LANGUAGE_FEATURES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSupportedWGSLLanguageFeatures, { \ +#define WGPU_SUPPORTED_FEATURES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSupportedFeatures, { \ /*.featureCount=*/0 _wgpu_COMMA \ /*.features=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUSupportedFeatures { +typedef struct WGPUSupportedInstanceFeatures { size_t featureCount; - WGPUFeatureName const * features; -} WGPUSupportedFeatures WGPU_STRUCTURE_ATTRIBUTE; + WGPUInstanceFeatureName const * features; +} WGPUSupportedInstanceFeatures WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SUPPORTED_FEATURES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSupportedFeatures, { \ +#define WGPU_SUPPORTED_INSTANCE_FEATURES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSupportedInstanceFeatures, { \ + /*.featureCount=*/0 _wgpu_COMMA \ + /*.features=*/NULL _wgpu_COMMA \ +}) + +typedef struct WGPUSupportedWGSLLanguageFeatures { + size_t featureCount; + WGPUWGSLLanguageFeatureName const * features; +} WGPUSupportedWGSLLanguageFeatures WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SUPPORTED_WGSL_LANGUAGE_FEATURES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSupportedWGSLLanguageFeatures, { \ /*.featureCount=*/0 _wgpu_COMMA \ /*.features=*/NULL _wgpu_COMMA \ }) @@ -2457,33 +2876,31 @@ typedef struct WGPUSurfaceDescriptorFromWindowsCoreWindow { }) // Can be chained in WGPUSurfaceDescriptor -typedef struct WGPUSurfaceDescriptorFromWindowsSwapChainPanel { +typedef struct WGPUSurfaceDescriptorFromWindowsUWPSwapChainPanel { WGPUChainedStruct chain; void * swapChainPanel; -} WGPUSurfaceDescriptorFromWindowsSwapChainPanel WGPU_STRUCTURE_ATTRIBUTE; +} WGPUSurfaceDescriptorFromWindowsUWPSwapChainPanel WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_DESCRIPTOR_FROM_WINDOWS_SWAP_CHAIN_PANEL_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceDescriptorFromWindowsSwapChainPanel, { \ +#define WGPU_SURFACE_DESCRIPTOR_FROM_WINDOWS_UWP_SWAP_CHAIN_PANEL_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceDescriptorFromWindowsUWPSwapChainPanel, { \ /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ /*.next=*/NULL _wgpu_COMMA \ - /*.sType=*/WGPUSType_SurfaceDescriptorFromWindowsSwapChainPanel _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceDescriptorFromWindowsUWPSwapChainPanel _wgpu_COMMA \ }) _wgpu_COMMA \ /*.swapChainPanel=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSurfaceDescriptor -typedef struct WGPUSurfaceSourceXCBWindow { +typedef struct WGPUSurfaceDescriptorFromWindowsWinUISwapChainPanel { WGPUChainedStruct chain; - void * connection; - uint32_t window; -} WGPUSurfaceSourceXCBWindow WGPU_STRUCTURE_ATTRIBUTE; + void * swapChainPanel; +} WGPUSurfaceDescriptorFromWindowsWinUISwapChainPanel WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_SOURCE_XCB_WINDOW_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceSourceXCBWindow, { \ +#define WGPU_SURFACE_DESCRIPTOR_FROM_WINDOWS_WINUI_SWAP_CHAIN_PANEL_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceDescriptorFromWindowsWinUISwapChainPanel, { \ /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ /*.next=*/NULL _wgpu_COMMA \ - /*.sType=*/WGPUSType_SurfaceSourceXCBWindow _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceDescriptorFromWindowsWinUISwapChainPanel _wgpu_COMMA \ }) _wgpu_COMMA \ - /*.connection=*/NULL _wgpu_COMMA \ - /*.window=*/0 _wgpu_COMMA \ + /*.swapChainPanel=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUSurfaceDescriptor @@ -2546,6 +2963,22 @@ typedef struct WGPUSurfaceSourceWindowsHWND { /*.hwnd=*/NULL _wgpu_COMMA \ }) +// Can be chained in WGPUSurfaceDescriptor +typedef struct WGPUSurfaceSourceXCBWindow { + WGPUChainedStruct chain; + void * connection; + uint32_t window; +} WGPUSurfaceSourceXCBWindow WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SURFACE_SOURCE_XCB_WINDOW_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceSourceXCBWindow, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_SurfaceSourceXCBWindow _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.connection=*/NULL _wgpu_COMMA \ + /*.window=*/0 _wgpu_COMMA \ +}) + // Can be chained in WGPUSurfaceDescriptor typedef struct WGPUSurfaceSourceXlibWindow { WGPUChainedStruct chain; @@ -2574,6 +3007,22 @@ typedef struct WGPUSurfaceTexture { /*.status=*/_wgpu_ENUM_ZERO_INIT(WGPUSurfaceGetCurrentTextureStatus) _wgpu_COMMA \ }) +typedef struct WGPUTexelBufferViewDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; + WGPUTextureFormat format; + uint64_t offset; + uint64_t size; +} WGPUTexelBufferViewDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_TEXEL_BUFFER_VIEW_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTexelBufferViewDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ + /*.offset=*/0 _wgpu_COMMA \ + /*.size=*/WGPU_WHOLE_SIZE _wgpu_COMMA \ +}) + typedef struct WGPUTexelCopyBufferLayout { uint64_t offset; uint32_t bytesPerRow; @@ -2597,7 +3046,7 @@ typedef struct WGPUTextureBindingLayout { /*.nextInChain=*/NULL _wgpu_COMMA \ /*.sampleType=*/WGPUTextureSampleType_Undefined _wgpu_COMMA \ /*.viewDimension=*/WGPUTextureViewDimension_Undefined _wgpu_COMMA \ - /*.multisampled=*/0 _wgpu_COMMA \ + /*.multisampled=*/WGPU_FALSE _wgpu_COMMA \ }) // Can be chained in WGPUTextureDescriptor @@ -2614,6 +3063,20 @@ typedef struct WGPUTextureBindingViewDimensionDescriptor { /*.textureBindingViewDimension=*/WGPUTextureViewDimension_Undefined _wgpu_COMMA \ }) +typedef struct WGPUTextureComponentSwizzle { + WGPUComponentSwizzle r; + WGPUComponentSwizzle g; + WGPUComponentSwizzle b; + WGPUComponentSwizzle a; +} WGPUTextureComponentSwizzle WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_TEXTURE_COMPONENT_SWIZZLE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTextureComponentSwizzle, { \ + /*.r=*/WGPUComponentSwizzle_Undefined _wgpu_COMMA \ + /*.g=*/WGPUComponentSwizzle_Undefined _wgpu_COMMA \ + /*.b=*/WGPUComponentSwizzle_Undefined _wgpu_COMMA \ + /*.a=*/WGPUComponentSwizzle_Undefined _wgpu_COMMA \ +}) + typedef struct WGPUVertexAttribute { WGPUChainedStruct * nextInChain; WGPUVertexFormat format; @@ -2661,18 +3124,10 @@ typedef struct WGPUYCbCrVkDescriptor { /*.vkXChromaOffset=*/0 _wgpu_COMMA \ /*.vkYChromaOffset=*/0 _wgpu_COMMA \ /*.vkChromaFilter=*/WGPUFilterMode_Undefined _wgpu_COMMA \ - /*.forceExplicitReconstruction=*/0 _wgpu_COMMA \ + /*.forceExplicitReconstruction=*/WGPU_FALSE _wgpu_COMMA \ /*.externalFormat=*/0 _wgpu_COMMA \ }) -typedef struct WGPUAHardwareBufferProperties { - WGPUYCbCrVkDescriptor yCbCrInfo; -} WGPUAHardwareBufferProperties WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_A_HARDWARE_BUFFER_PROPERTIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAHardwareBufferProperties, { \ - /*.yCbCrInfo=*/WGPU_Y_CB_CR_VK_DESCRIPTOR_INIT _wgpu_COMMA \ -}) - // Can be chained in WGPUAdapterInfo typedef struct WGPUAdapterPropertiesMemoryHeaps { WGPUChainedStruct chain; @@ -2705,6 +3160,14 @@ typedef struct WGPUAdapterPropertiesSubgroupMatrixConfigs { /*.configs=*/NULL _wgpu_COMMA \ }) +typedef struct WGPUAHardwareBufferProperties { + WGPUYCbCrVkDescriptor yCbCrInfo; +} WGPUAHardwareBufferProperties WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_A_HARDWARE_BUFFER_PROPERTIES_INIT _wgpu_MAKE_INIT_STRUCT(WGPUAHardwareBufferProperties, { \ + /*.yCbCrInfo=*/WGPU_Y_CB_CR_VK_DESCRIPTOR_INIT _wgpu_COMMA \ +}) + typedef struct WGPUBindGroupEntry { WGPUChainedStruct * nextInChain; uint32_t binding; @@ -2725,10 +3188,25 @@ typedef struct WGPUBindGroupEntry { /*.textureView=*/NULL _wgpu_COMMA \ }) +// Can be chained in WGPUBindGroupLayoutDescriptor +typedef struct WGPUBindGroupLayoutDynamicBindingArray { + WGPUChainedStruct chain; + WGPUDynamicBindingArrayLayout dynamicArray; +} WGPUBindGroupLayoutDynamicBindingArray WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_BIND_GROUP_LAYOUT_DYNAMIC_BINDING_ARRAY_INIT _wgpu_MAKE_INIT_STRUCT(WGPUBindGroupLayoutDynamicBindingArray, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_BindGroupLayoutDynamicBindingArray _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.dynamicArray=*/WGPU_DYNAMIC_BINDING_ARRAY_LAYOUT_INIT _wgpu_COMMA \ +}) + typedef struct WGPUBindGroupLayoutEntry { WGPUChainedStruct * nextInChain; uint32_t binding; WGPUShaderStage visibility; + uint32_t bindingArraySize; WGPUBufferBindingLayout buffer; WGPUSamplerBindingLayout sampler; WGPUTextureBindingLayout texture; @@ -2739,6 +3217,7 @@ typedef struct WGPUBindGroupLayoutEntry { /*.nextInChain=*/NULL _wgpu_COMMA \ /*.binding=*/0 _wgpu_COMMA \ /*.visibility=*/WGPUShaderStage_None _wgpu_COMMA \ + /*.bindingArraySize=*/0 _wgpu_COMMA \ /*.buffer=*/_wgpu_STRUCT_ZERO_INIT _wgpu_COMMA \ /*.sampler=*/_wgpu_STRUCT_ZERO_INIT _wgpu_COMMA \ /*.texture=*/_wgpu_STRUCT_ZERO_INIT _wgpu_COMMA \ @@ -2768,17 +3247,7 @@ typedef struct WGPUBufferDescriptor { /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ /*.usage=*/WGPUBufferUsage_None _wgpu_COMMA \ /*.size=*/0 _wgpu_COMMA \ - /*.mappedAtCreation=*/0 _wgpu_COMMA \ -}) - -typedef struct WGPUCommandBufferDescriptor { - WGPUChainedStruct * nextInChain; - WGPUStringView label; -} WGPUCommandBufferDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_COMMAND_BUFFER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUCommandBufferDescriptor, { \ - /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.mappedAtCreation=*/WGPU_FALSE _wgpu_COMMA \ }) typedef struct WGPUCommandEncoderDescriptor { @@ -2823,36 +3292,20 @@ typedef struct WGPUComputePassDescriptor { /*.timestampWrites=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUConstantEntry { +typedef struct WGPUComputeState { WGPUChainedStruct * nextInChain; - WGPUStringView key; - double value; -} WGPUConstantEntry WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_CONSTANT_ENTRY_INIT _wgpu_MAKE_INIT_STRUCT(WGPUConstantEntry, { \ - /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.key=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ - /*.value=*/0. _wgpu_COMMA \ -}) - -// Can be chained in WGPUDeviceDescriptor -typedef struct WGPUDawnCacheDeviceDescriptor { - WGPUChainedStruct chain; - WGPUStringView isolationKey; - WGPUDawnLoadCacheDataFunction loadDataFunction; - WGPUDawnStoreCacheDataFunction storeDataFunction; - void * functionUserdata; -} WGPUDawnCacheDeviceDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_DAWN_CACHE_DEVICE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUDawnCacheDeviceDescriptor, { \ - /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ - /*.next=*/NULL _wgpu_COMMA \ - /*.sType=*/WGPUSType_DawnCacheDeviceDescriptor _wgpu_COMMA \ - }) _wgpu_COMMA \ - /*.isolationKey=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ - /*.loadDataFunction=*/NULL _wgpu_COMMA \ - /*.storeDataFunction=*/NULL _wgpu_COMMA \ - /*.functionUserdata=*/nullptr _wgpu_COMMA \ + WGPUShaderModule module; + WGPUStringView entryPoint; + size_t constantCount; + WGPUConstantEntry const * constants; +} WGPUComputeState WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_COMPUTE_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUComputeState, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.module=*/NULL _wgpu_COMMA \ + /*.entryPoint=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.constantCount=*/0 _wgpu_COMMA \ + /*.constants=*/NULL _wgpu_COMMA \ }) // Can be chained in WGPUDawnFormatCapabilities @@ -2895,22 +3348,8 @@ typedef struct WGPUDepthStencilState { /*.stencilReadMask=*/0xFFFFFFFF _wgpu_COMMA \ /*.stencilWriteMask=*/0xFFFFFFFF _wgpu_COMMA \ /*.depthBias=*/0 _wgpu_COMMA \ - /*.depthBiasSlopeScale=*/0.0f _wgpu_COMMA \ - /*.depthBiasClamp=*/0.0f _wgpu_COMMA \ -}) - -// Can be chained in WGPUSurfaceDescriptor -typedef struct WGPUEmscriptenSurfaceSourceCanvasHTMLSelector { - WGPUChainedStruct chain; - WGPUStringView selector; -} WGPUEmscriptenSurfaceSourceCanvasHTMLSelector WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_EMSCRIPTEN_SURFACE_SOURCE_CANVAS_HTML_SELECTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUEmscriptenSurfaceSourceCanvasHTMLSelector, { \ - /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ - /*.next=*/NULL _wgpu_COMMA \ - /*.sType=*/WGPUSType_EmscriptenSurfaceSourceCanvasHTMLSelector _wgpu_COMMA \ - }) _wgpu_COMMA \ - /*.selector=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.depthBiasSlopeScale=*/0.f _wgpu_COMMA \ + /*.depthBiasClamp=*/0.f _wgpu_COMMA \ }) typedef struct WGPUExternalTextureDescriptor { @@ -2938,12 +3377,12 @@ typedef struct WGPUExternalTextureDescriptor { /*.cropOrigin=*/WGPU_ORIGIN_2D_INIT _wgpu_COMMA \ /*.cropSize=*/WGPU_EXTENT_2D_INIT _wgpu_COMMA \ /*.apparentSize=*/WGPU_EXTENT_2D_INIT _wgpu_COMMA \ - /*.doYuvToRgbConversionOnly=*/0 _wgpu_COMMA \ + /*.doYuvToRgbConversionOnly=*/WGPU_FALSE _wgpu_COMMA \ /*.yuvToRgbConversionMatrix=*/NULL _wgpu_COMMA \ /*.srcTransferFunctionParameters=*/NULL _wgpu_COMMA \ /*.dstTransferFunctionParameters=*/NULL _wgpu_COMMA \ /*.gamutConversionMatrix=*/NULL _wgpu_COMMA \ - /*.mirrored=*/0 _wgpu_COMMA \ + /*.mirrored=*/WGPU_FALSE _wgpu_COMMA \ /*.rotation=*/WGPUExternalTextureRotation_Rotate0Degrees _wgpu_COMMA \ }) @@ -2954,7 +3393,7 @@ typedef struct WGPUFutureWaitInfo { #define WGPU_FUTURE_WAIT_INFO_INIT _wgpu_MAKE_INIT_STRUCT(WGPUFutureWaitInfo, { \ /*.future=*/WGPU_FUTURE_INIT _wgpu_COMMA \ - /*.completed=*/0 _wgpu_COMMA \ + /*.completed=*/WGPU_FALSE _wgpu_COMMA \ }) typedef struct WGPUImageCopyExternalTexture { @@ -2973,12 +3412,16 @@ typedef struct WGPUImageCopyExternalTexture { typedef struct WGPUInstanceDescriptor { WGPUChainedStruct * nextInChain; - WGPUInstanceCapabilities capabilities; + size_t requiredFeatureCount; + WGPUInstanceFeatureName const * requiredFeatures; + WGPU_NULLABLE WGPUInstanceLimits const * requiredLimits; } WGPUInstanceDescriptor WGPU_STRUCTURE_ATTRIBUTE; #define WGPU_INSTANCE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUInstanceDescriptor, { \ /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.capabilities=*/WGPU_INSTANCE_CAPABILITIES_INIT _wgpu_COMMA \ + /*.requiredFeatureCount=*/0 _wgpu_COMMA \ + /*.requiredFeatures=*/NULL _wgpu_COMMA \ + /*.requiredLimits=*/NULL _wgpu_COMMA \ }) typedef struct WGPULimits { @@ -3014,10 +3457,7 @@ typedef struct WGPULimits { uint32_t maxComputeWorkgroupSizeY; uint32_t maxComputeWorkgroupSizeZ; uint32_t maxComputeWorkgroupsPerDimension; - uint32_t maxStorageBuffersInVertexStage; - uint32_t maxStorageTexturesInVertexStage; - uint32_t maxStorageBuffersInFragmentStage; - uint32_t maxStorageTexturesInFragmentStage; + uint32_t maxImmediateSize; } WGPULimits WGPU_STRUCTURE_ATTRIBUTE; #define WGPU_LIMITS_INIT _wgpu_MAKE_INIT_STRUCT(WGPULimits, { \ @@ -3053,10 +3493,7 @@ typedef struct WGPULimits { /*.maxComputeWorkgroupSizeY=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ /*.maxComputeWorkgroupSizeZ=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ /*.maxComputeWorkgroupsPerDimension=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ - /*.maxStorageBuffersInVertexStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ - /*.maxStorageTexturesInVertexStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ - /*.maxStorageBuffersInFragmentStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ - /*.maxStorageTexturesInFragmentStage=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ + /*.maxImmediateSize=*/WGPU_LIMIT_U32_UNDEFINED _wgpu_COMMA \ }) // Can be chained in WGPUPipelineLayoutDescriptor @@ -3077,62 +3514,6 @@ typedef struct WGPUPipelineLayoutPixelLocalStorage { /*.storageAttachments=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUQuerySetDescriptor { - WGPUChainedStruct * nextInChain; - WGPUStringView label; - WGPUQueryType type; - uint32_t count; -} WGPUQuerySetDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_QUERY_SET_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUQuerySetDescriptor, { \ - /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ - /*.type=*/_wgpu_ENUM_ZERO_INIT(WGPUQueryType) _wgpu_COMMA \ - /*.count=*/0 _wgpu_COMMA \ -}) - -typedef struct WGPUQueueDescriptor { - WGPUChainedStruct * nextInChain; - WGPUStringView label; -} WGPUQueueDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_QUEUE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUQueueDescriptor, { \ - /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ -}) - -typedef struct WGPURenderBundleDescriptor { - WGPUChainedStruct * nextInChain; - WGPUStringView label; -} WGPURenderBundleDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_RENDER_BUNDLE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderBundleDescriptor, { \ - /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ -}) - -typedef struct WGPURenderBundleEncoderDescriptor { - WGPUChainedStruct * nextInChain; - WGPUStringView label; - size_t colorFormatCount; - WGPUTextureFormat const * colorFormats; - WGPUTextureFormat depthStencilFormat; - uint32_t sampleCount; - WGPUBool depthReadOnly; - WGPUBool stencilReadOnly; -} WGPURenderBundleEncoderDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_RENDER_BUNDLE_ENCODER_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPURenderBundleEncoderDescriptor, { \ - /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ - /*.colorFormatCount=*/0 _wgpu_COMMA \ - /*.colorFormats=*/NULL _wgpu_COMMA \ - /*.depthStencilFormat=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ - /*.sampleCount=*/1 _wgpu_COMMA \ - /*.depthReadOnly=*/0 _wgpu_COMMA \ - /*.stencilReadOnly=*/0 _wgpu_COMMA \ -}) - typedef struct WGPURenderPassColorAttachment { WGPUChainedStruct * nextInChain; WGPU_NULLABLE WGPUTextureView view; @@ -3184,7 +3565,7 @@ typedef struct WGPURequestAdapterOptions { /*.nextInChain=*/NULL _wgpu_COMMA \ /*.featureLevel=*/WGPUFeatureLevel_Undefined _wgpu_COMMA \ /*.powerPreference=*/WGPUPowerPreference_Undefined _wgpu_COMMA \ - /*.forceFallbackAdapter=*/0 _wgpu_COMMA \ + /*.forceFallbackAdapter=*/WGPU_FALSE _wgpu_COMMA \ /*.backendType=*/WGPUBackendType_Undefined _wgpu_COMMA \ /*.compatibleSurface=*/NULL _wgpu_COMMA \ }) @@ -3213,32 +3594,18 @@ typedef struct WGPUSamplerDescriptor { /*.magFilter=*/WGPUFilterMode_Undefined _wgpu_COMMA \ /*.minFilter=*/WGPUFilterMode_Undefined _wgpu_COMMA \ /*.mipmapFilter=*/WGPUMipmapFilterMode_Undefined _wgpu_COMMA \ - /*.lodMinClamp=*/0.0f _wgpu_COMMA \ - /*.lodMaxClamp=*/32.0f _wgpu_COMMA \ + /*.lodMinClamp=*/0.f _wgpu_COMMA \ + /*.lodMaxClamp=*/32.f _wgpu_COMMA \ /*.compare=*/WGPUCompareFunction_Undefined _wgpu_COMMA \ /*.maxAnisotropy=*/1 _wgpu_COMMA \ }) -// Can be chained in WGPUShaderModuleDescriptor -typedef struct WGPUShaderSourceWGSL { - WGPUChainedStruct chain; - WGPUStringView code; -} WGPUShaderSourceWGSL WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_SHADER_SOURCE_WGSL_INIT _wgpu_MAKE_INIT_STRUCT(WGPUShaderSourceWGSL, { \ - /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ - /*.next=*/NULL _wgpu_COMMA \ - /*.sType=*/WGPUSType_ShaderSourceWGSL _wgpu_COMMA \ - }) _wgpu_COMMA \ - /*.code=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ -}) - -typedef struct WGPUSharedBufferMemoryDescriptor { +typedef struct WGPUShaderModuleDescriptor { WGPUChainedStruct * nextInChain; WGPUStringView label; -} WGPUSharedBufferMemoryDescriptor WGPU_STRUCTURE_ATTRIBUTE; +} WGPUShaderModuleDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SHARED_BUFFER_MEMORY_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedBufferMemoryDescriptor, { \ +#define WGPU_SHADER_MODULE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUShaderModuleDescriptor, { \ /*.nextInChain=*/NULL _wgpu_COMMA \ /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ }) @@ -3288,8 +3655,8 @@ typedef struct WGPUSharedTextureMemoryBeginAccessDescriptor { #define WGPU_SHARED_TEXTURE_MEMORY_BEGIN_ACCESS_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryBeginAccessDescriptor, { \ /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.concurrentRead=*/0 _wgpu_COMMA \ - /*.initialized=*/0 _wgpu_COMMA \ + /*.concurrentRead=*/WGPU_FALSE _wgpu_COMMA \ + /*.initialized=*/WGPU_FALSE _wgpu_COMMA \ /*.fenceCount=*/0 _wgpu_COMMA \ /*.fences=*/NULL _wgpu_COMMA \ /*.signaledValues=*/NULL _wgpu_COMMA \ @@ -3327,12 +3694,22 @@ typedef struct WGPUSharedTextureMemoryEndAccessState { #define WGPU_SHARED_TEXTURE_MEMORY_END_ACCESS_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSharedTextureMemoryEndAccessState, { \ /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.initialized=*/0 _wgpu_COMMA \ + /*.initialized=*/WGPU_FALSE _wgpu_COMMA \ /*.fenceCount=*/0 _wgpu_COMMA \ /*.fences=*/NULL _wgpu_COMMA \ /*.signaledValues=*/NULL _wgpu_COMMA \ }) +typedef struct WGPUSurfaceDescriptor { + WGPUChainedStruct * nextInChain; + WGPUStringView label; +} WGPUSurfaceDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_SURFACE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceDescriptor, { \ + /*.nextInChain=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ +}) + typedef struct WGPUTexelCopyBufferInfo { WGPUTexelCopyBufferLayout layout; WGPUBuffer buffer; @@ -3357,6 +3734,20 @@ typedef struct WGPUTexelCopyTextureInfo { /*.aspect=*/WGPUTextureAspect_Undefined _wgpu_COMMA \ }) +// Can be chained in WGPUTextureViewDescriptor +typedef struct WGPUTextureComponentSwizzleDescriptor { + WGPUChainedStruct chain; + WGPUTextureComponentSwizzle swizzle; +} WGPUTextureComponentSwizzleDescriptor WGPU_STRUCTURE_ATTRIBUTE; + +#define WGPU_TEXTURE_COMPONENT_SWIZZLE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTextureComponentSwizzleDescriptor, { \ + /*.chain=*/_wgpu_MAKE_INIT_STRUCT(WGPUChainedStruct, { \ + /*.next=*/NULL _wgpu_COMMA \ + /*.sType=*/WGPUSType_TextureComponentSwizzleDescriptor _wgpu_COMMA \ + }) _wgpu_COMMA \ + /*.swizzle=*/WGPU_TEXTURE_COMPONENT_SWIZZLE_INIT _wgpu_COMMA \ +}) + typedef struct WGPUTextureDescriptor { WGPUChainedStruct * nextInChain; WGPUStringView label; @@ -3383,32 +3774,6 @@ typedef struct WGPUTextureDescriptor { /*.viewFormats=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUTextureViewDescriptor { - WGPUChainedStruct * nextInChain; - WGPUStringView label; - WGPUTextureFormat format; - WGPUTextureViewDimension dimension; - uint32_t baseMipLevel; - uint32_t mipLevelCount; - uint32_t baseArrayLayer; - uint32_t arrayLayerCount; - WGPUTextureAspect aspect; - WGPUTextureUsage usage; -} WGPUTextureViewDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_TEXTURE_VIEW_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTextureViewDescriptor, { \ - /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ - /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ - /*.dimension=*/WGPUTextureViewDimension_Undefined _wgpu_COMMA \ - /*.baseMipLevel=*/0 _wgpu_COMMA \ - /*.mipLevelCount=*/WGPU_MIP_LEVEL_COUNT_UNDEFINED _wgpu_COMMA \ - /*.baseArrayLayer=*/0 _wgpu_COMMA \ - /*.arrayLayerCount=*/WGPU_ARRAY_LAYER_COUNT_UNDEFINED _wgpu_COMMA \ - /*.aspect=*/WGPUTextureAspect_Undefined _wgpu_COMMA \ - /*.usage=*/WGPUTextureUsage_None _wgpu_COMMA \ -}) - typedef struct WGPUVertexBufferLayout { WGPUChainedStruct * nextInChain; WGPUVertexStepMode stepMode; @@ -3509,20 +3874,18 @@ typedef struct WGPUCompilationInfo { /*.messages=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUComputeState { +typedef struct WGPUComputePipelineDescriptor { WGPUChainedStruct * nextInChain; - WGPUShaderModule module; - WGPUStringView entryPoint; - size_t constantCount; - WGPUConstantEntry const * constants; -} WGPUComputeState WGPU_STRUCTURE_ATTRIBUTE; + WGPUStringView label; + WGPU_NULLABLE WGPUPipelineLayout layout; + WGPUComputeState compute; +} WGPUComputePipelineDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_COMPUTE_STATE_INIT _wgpu_MAKE_INIT_STRUCT(WGPUComputeState, { \ +#define WGPU_COMPUTE_PIPELINE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUComputePipelineDescriptor, { \ /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.module=*/NULL _wgpu_COMMA \ - /*.entryPoint=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ - /*.constantCount=*/0 _wgpu_COMMA \ - /*.constants=*/NULL _wgpu_COMMA \ + /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.layout=*/NULL _wgpu_COMMA \ + /*.compute=*/WGPU_COMPUTE_STATE_INIT _wgpu_COMMA \ }) typedef struct WGPUDawnFormatCapabilities { @@ -3559,8 +3922,8 @@ typedef struct WGPUPipelineLayoutDescriptor { WGPUChainedStruct * nextInChain; WGPUStringView label; size_t bindGroupLayoutCount; - WGPU_NULLABLE WGPUBindGroupLayout const * bindGroupLayouts; - uint32_t immediateDataRangeByteSize; + WGPUBindGroupLayout const * bindGroupLayouts; + uint32_t immediateSize; } WGPUPipelineLayoutDescriptor WGPU_STRUCTURE_ATTRIBUTE; #define WGPU_PIPELINE_LAYOUT_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUPipelineLayoutDescriptor, { \ @@ -3568,7 +3931,7 @@ typedef struct WGPUPipelineLayoutDescriptor { /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ /*.bindGroupLayoutCount=*/0 _wgpu_COMMA \ /*.bindGroupLayouts=*/NULL _wgpu_COMMA \ - /*.immediateDataRangeByteSize=*/0 _wgpu_COMMA \ + /*.immediateSize=*/0 _wgpu_COMMA \ }) // Can be chained in WGPURenderPassDescriptor @@ -3589,16 +3952,6 @@ typedef struct WGPURenderPassPixelLocalStorage { /*.storageAttachments=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUShaderModuleDescriptor { - WGPUChainedStruct * nextInChain; - WGPUStringView label; -} WGPUShaderModuleDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_SHADER_MODULE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUShaderModuleDescriptor, { \ - /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ -}) - typedef struct WGPUSharedTextureMemoryDescriptor { WGPUChainedStruct * nextInChain; WGPUStringView label; @@ -3623,14 +3976,30 @@ typedef struct WGPUSharedTextureMemoryProperties { /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ }) -typedef struct WGPUSurfaceDescriptor { +typedef struct WGPUTextureViewDescriptor { WGPUChainedStruct * nextInChain; WGPUStringView label; -} WGPUSurfaceDescriptor WGPU_STRUCTURE_ATTRIBUTE; + WGPUTextureFormat format; + WGPUTextureViewDimension dimension; + uint32_t baseMipLevel; + uint32_t mipLevelCount; + uint32_t baseArrayLayer; + uint32_t arrayLayerCount; + WGPUTextureAspect aspect; + WGPUTextureUsage usage; +} WGPUTextureViewDescriptor WGPU_STRUCTURE_ATTRIBUTE; -#define WGPU_SURFACE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUSurfaceDescriptor, { \ +#define WGPU_TEXTURE_VIEW_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUTextureViewDescriptor, { \ /*.nextInChain=*/NULL _wgpu_COMMA \ /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ + /*.format=*/WGPUTextureFormat_Undefined _wgpu_COMMA \ + /*.dimension=*/WGPUTextureViewDimension_Undefined _wgpu_COMMA \ + /*.baseMipLevel=*/0 _wgpu_COMMA \ + /*.mipLevelCount=*/WGPU_MIP_LEVEL_COUNT_UNDEFINED _wgpu_COMMA \ + /*.baseArrayLayer=*/0 _wgpu_COMMA \ + /*.arrayLayerCount=*/WGPU_ARRAY_LAYER_COUNT_UNDEFINED _wgpu_COMMA \ + /*.aspect=*/WGPUTextureAspect_Undefined _wgpu_COMMA \ + /*.usage=*/WGPUTextureUsage_None _wgpu_COMMA \ }) typedef struct WGPUVertexState { @@ -3653,20 +4022,6 @@ typedef struct WGPUVertexState { /*.buffers=*/NULL _wgpu_COMMA \ }) -typedef struct WGPUComputePipelineDescriptor { - WGPUChainedStruct * nextInChain; - WGPUStringView label; - WGPU_NULLABLE WGPUPipelineLayout layout; - WGPUComputeState compute; -} WGPUComputePipelineDescriptor WGPU_STRUCTURE_ATTRIBUTE; - -#define WGPU_COMPUTE_PIPELINE_DESCRIPTOR_INIT _wgpu_MAKE_INIT_STRUCT(WGPUComputePipelineDescriptor, { \ - /*.nextInChain=*/NULL _wgpu_COMMA \ - /*.label=*/WGPU_STRING_VIEW_INIT _wgpu_COMMA \ - /*.layout=*/NULL _wgpu_COMMA \ - /*.compute=*/WGPU_COMPUTE_STATE_INIT _wgpu_COMMA \ -}) - typedef struct WGPUFragmentState { WGPUChainedStruct * nextInChain; WGPUShaderModule module; @@ -3770,25 +4125,16 @@ extern "C" { #endif #if !defined(WGPU_SKIP_PROCS) - // TODO(374150686): Remove these Emscripten specific declarations from the // header once they are fully deprecated. -#ifdef __EMSCRIPTEN__ WGPU_EXPORT WGPUDevice emscripten_webgpu_get_device(void); -#endif +// Global procs +typedef WGPUInstance (*WGPUProcCreateInstance)(WGPU_NULLABLE WGPUInstanceDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcGetInstanceFeatures)(WGPUSupportedInstanceFeatures * features) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUStatus (*WGPUProcGetInstanceLimits)(WGPUInstanceLimits * limits) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUBool (*WGPUProcHasInstanceFeature)(WGPUInstanceFeatureName feature) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUProc (*WGPUProcGetProcAddress)(WGPUStringView procName) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcAdapterInfoFreeMembers)( WGPUAdapterInfo value) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcAdapterPropertiesMemoryHeapsFreeMembers)( WGPUAdapterPropertiesMemoryHeaps value) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcAdapterPropertiesSubgroupMatrixConfigsFreeMembers)( WGPUAdapterPropertiesSubgroupMatrixConfigs value) WGPU_FUNCTION_ATTRIBUTE; -typedef WGPUInstance (*WGPUProcCreateInstance)( WGPU_NULLABLE WGPUInstanceDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcDawnDrmFormatCapabilitiesFreeMembers)( WGPUDawnDrmFormatCapabilities value) WGPU_FUNCTION_ATTRIBUTE; -typedef WGPUStatus (*WGPUProcGetInstanceCapabilities)( WGPUInstanceCapabilities * capabilities) WGPU_FUNCTION_ATTRIBUTE; -typedef WGPUProc (*WGPUProcGetProcAddress)( WGPUStringView procName) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcSharedBufferMemoryEndAccessStateFreeMembers)( WGPUSharedBufferMemoryEndAccessState value) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcSharedTextureMemoryEndAccessStateFreeMembers)( WGPUSharedTextureMemoryEndAccessState value) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcSupportedWGSLLanguageFeaturesFreeMembers)( WGPUSupportedWGSLLanguageFeatures value) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcSupportedFeaturesFreeMembers)( WGPUSupportedFeatures value) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcSurfaceCapabilitiesFreeMembers)( WGPUSurfaceCapabilities value) WGPU_FUNCTION_ATTRIBUTE; // Procs of Adapter typedef WGPUDevice (*WGPUProcAdapterCreateDevice)(WGPUAdapter adapter, WGPU_NULLABLE WGPUDeviceDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; @@ -3798,10 +4144,19 @@ typedef WGPUStatus (*WGPUProcAdapterGetInfo)(WGPUAdapter adapter, WGPUAdapterInf typedef WGPUInstance (*WGPUProcAdapterGetInstance)(WGPUAdapter adapter) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUStatus (*WGPUProcAdapterGetLimits)(WGPUAdapter adapter, WGPULimits * limits) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUBool (*WGPUProcAdapterHasFeature)(WGPUAdapter adapter, WGPUFeatureName feature) WGPU_FUNCTION_ATTRIBUTE; -typedef WGPUFuture (*WGPUProcAdapterRequestDevice)(WGPUAdapter adapter, WGPU_NULLABLE WGPUDeviceDescriptor const * options, WGPURequestDeviceCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUFuture (*WGPUProcAdapterRequestDevice)(WGPUAdapter adapter, WGPU_NULLABLE WGPUDeviceDescriptor const * descriptor, WGPURequestDeviceCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcAdapterAddRef)(WGPUAdapter adapter) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcAdapterRelease)(WGPUAdapter adapter) WGPU_FUNCTION_ATTRIBUTE; +// Procs of AdapterInfo +typedef void (*WGPUProcAdapterInfoFreeMembers)(WGPUAdapterInfo adapterInfo) WGPU_FUNCTION_ATTRIBUTE; + +// Procs of AdapterPropertiesMemoryHeaps +typedef void (*WGPUProcAdapterPropertiesMemoryHeapsFreeMembers)(WGPUAdapterPropertiesMemoryHeaps adapterPropertiesMemoryHeaps) WGPU_FUNCTION_ATTRIBUTE; + +// Procs of AdapterPropertiesSubgroupMatrixConfigs +typedef void (*WGPUProcAdapterPropertiesSubgroupMatrixConfigsFreeMembers)(WGPUAdapterPropertiesSubgroupMatrixConfigs adapterPropertiesSubgroupMatrixConfigs) WGPU_FUNCTION_ATTRIBUTE; + // Procs of BindGroup typedef void (*WGPUProcBindGroupSetLabel)(WGPUBindGroup bindGroup, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcBindGroupAddRef)(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE; @@ -3815,8 +4170,8 @@ typedef void (*WGPUProcBindGroupLayoutRelease)(WGPUBindGroupLayout bindGroupLayo // Procs of Buffer typedef void (*WGPUProcBufferDestroy)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; typedef void const * (*WGPUProcBufferGetConstMappedRange)(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE; -typedef WGPUBufferMapState (*WGPUProcBufferGetMapState)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; typedef void * (*WGPUProcBufferGetMappedRange)(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUBufferMapState (*WGPUProcBufferGetMapState)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; typedef uint64_t (*WGPUProcBufferGetSize)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUBufferUsage (*WGPUProcBufferGetUsage)(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUFuture (*WGPUProcBufferMapAsync)(WGPUBuffer buffer, WGPUMapMode mode, size_t offset, size_t size, WGPUBufferMapCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; @@ -3873,10 +4228,13 @@ typedef void (*WGPUProcComputePipelineSetLabel)(WGPUComputePipeline computePipel typedef void (*WGPUProcComputePipelineAddRef)(WGPUComputePipeline computePipeline) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcComputePipelineRelease)(WGPUComputePipeline computePipeline) WGPU_FUNCTION_ATTRIBUTE; +// Procs of DawnDrmFormatCapabilities +typedef void (*WGPUProcDawnDrmFormatCapabilitiesFreeMembers)(WGPUDawnDrmFormatCapabilities dawnDrmFormatCapabilities) WGPU_FUNCTION_ATTRIBUTE; + // Procs of Device typedef WGPUBindGroup (*WGPUProcDeviceCreateBindGroup)(WGPUDevice device, WGPUBindGroupDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUBindGroupLayout (*WGPUProcDeviceCreateBindGroupLayout)(WGPUDevice device, WGPUBindGroupLayoutDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; -typedef WGPUBuffer (*WGPUProcDeviceCreateBuffer)(WGPUDevice device, WGPUBufferDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPU_NULLABLE WGPUBuffer (*WGPUProcDeviceCreateBuffer)(WGPUDevice device, WGPUBufferDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUCommandEncoder (*WGPUProcDeviceCreateCommandEncoder)(WGPUDevice device, WGPU_NULLABLE WGPUCommandEncoderDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUComputePipeline (*WGPUProcDeviceCreateComputePipeline)(WGPUDevice device, WGPUComputePipelineDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUFuture (*WGPUProcDeviceCreateComputePipelineAsync)(WGPUDevice device, WGPUComputePipelineDescriptor const * descriptor, WGPUCreateComputePipelineAsyncCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; @@ -3895,9 +4253,9 @@ typedef WGPUShaderModule (*WGPUProcDeviceCreateShaderModule)(WGPUDevice device, typedef WGPUTexture (*WGPUProcDeviceCreateTexture)(WGPUDevice device, WGPUTextureDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcDeviceDestroy)(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcDeviceForceLoss)(WGPUDevice device, WGPUDeviceLostReason type, WGPUStringView message) WGPU_FUNCTION_ATTRIBUTE; -typedef WGPUStatus (*WGPUProcDeviceGetAHardwareBufferProperties)(WGPUDevice device, void * handle, WGPUAHardwareBufferProperties * properties) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUAdapter (*WGPUProcDeviceGetAdapter)(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUStatus (*WGPUProcDeviceGetAdapterInfo)(WGPUDevice device, WGPUAdapterInfo * adapterInfo) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUStatus (*WGPUProcDeviceGetAHardwareBufferProperties)(WGPUDevice device, void * handle, WGPUAHardwareBufferProperties * properties) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcDeviceGetFeatures)(WGPUDevice device, WGPUSupportedFeatures * features) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUStatus (*WGPUProcDeviceGetLimits)(WGPUDevice device, WGPULimits * limits) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUFuture (*WGPUProcDeviceGetLostFuture)(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; @@ -3926,11 +4284,11 @@ typedef void (*WGPUProcExternalTextureRelease)(WGPUExternalTexture externalTextu // Procs of Instance typedef WGPUSurface (*WGPUProcInstanceCreateSurface)(WGPUInstance instance, WGPUSurfaceDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; -typedef WGPUStatus (*WGPUProcInstanceGetWGSLLanguageFeatures)(WGPUInstance instance, WGPUSupportedWGSLLanguageFeatures * features) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcInstanceGetWGSLLanguageFeatures)(WGPUInstance instance, WGPUSupportedWGSLLanguageFeatures * features) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUBool (*WGPUProcInstanceHasWGSLLanguageFeature)(WGPUInstance instance, WGPUWGSLLanguageFeatureName feature) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcInstanceProcessEvents)(WGPUInstance instance) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUFuture (*WGPUProcInstanceRequestAdapter)(WGPUInstance instance, WGPU_NULLABLE WGPURequestAdapterOptions const * options, WGPURequestAdapterCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; -typedef WGPUWaitStatus (*WGPUProcInstanceWaitAny)(WGPUInstance instance, size_t futureCount, WGPUFutureWaitInfo * futures, uint64_t timeoutNS) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUWaitStatus (*WGPUProcInstanceWaitAny)(WGPUInstance instance, size_t futureCount, WGPU_NULLABLE WGPUFutureWaitInfo * futures, uint64_t timeoutNS) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcInstanceAddRef)(WGPUInstance instance) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcInstanceRelease)(WGPUInstance instance) WGPU_FUNCTION_ATTRIBUTE; @@ -4037,6 +4395,9 @@ typedef void (*WGPUProcSharedBufferMemorySetLabel)(WGPUSharedBufferMemory shared typedef void (*WGPUProcSharedBufferMemoryAddRef)(WGPUSharedBufferMemory sharedBufferMemory) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcSharedBufferMemoryRelease)(WGPUSharedBufferMemory sharedBufferMemory) WGPU_FUNCTION_ATTRIBUTE; +// Procs of SharedBufferMemoryEndAccessState +typedef void (*WGPUProcSharedBufferMemoryEndAccessStateFreeMembers)(WGPUSharedBufferMemoryEndAccessState sharedBufferMemoryEndAccessState) WGPU_FUNCTION_ATTRIBUTE; + // Procs of SharedFence typedef void (*WGPUProcSharedFenceExportInfo)(WGPUSharedFence sharedFence, WGPUSharedFenceExportInfo * info) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcSharedFenceAddRef)(WGPUSharedFence sharedFence) WGPU_FUNCTION_ATTRIBUTE; @@ -4052,16 +4413,36 @@ typedef void (*WGPUProcSharedTextureMemorySetLabel)(WGPUSharedTextureMemory shar typedef void (*WGPUProcSharedTextureMemoryAddRef)(WGPUSharedTextureMemory sharedTextureMemory) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcSharedTextureMemoryRelease)(WGPUSharedTextureMemory sharedTextureMemory) WGPU_FUNCTION_ATTRIBUTE; +// Procs of SharedTextureMemoryEndAccessState +typedef void (*WGPUProcSharedTextureMemoryEndAccessStateFreeMembers)(WGPUSharedTextureMemoryEndAccessState sharedTextureMemoryEndAccessState) WGPU_FUNCTION_ATTRIBUTE; + +// Procs of SupportedFeatures +typedef void (*WGPUProcSupportedFeaturesFreeMembers)(WGPUSupportedFeatures supportedFeatures) WGPU_FUNCTION_ATTRIBUTE; + +// Procs of SupportedInstanceFeatures +typedef void (*WGPUProcSupportedInstanceFeaturesFreeMembers)(WGPUSupportedInstanceFeatures supportedInstanceFeatures) WGPU_FUNCTION_ATTRIBUTE; + +// Procs of SupportedWGSLLanguageFeatures +typedef void (*WGPUProcSupportedWGSLLanguageFeaturesFreeMembers)(WGPUSupportedWGSLLanguageFeatures supportedWGSLLanguageFeatures) WGPU_FUNCTION_ATTRIBUTE; + // Procs of Surface typedef void (*WGPUProcSurfaceConfigure)(WGPUSurface surface, WGPUSurfaceConfiguration const * config) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUStatus (*WGPUProcSurfaceGetCapabilities)(WGPUSurface surface, WGPUAdapter adapter, WGPUSurfaceCapabilities * capabilities) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcSurfaceGetCurrentTexture)(WGPUSurface surface, WGPUSurfaceTexture * surfaceTexture) WGPU_FUNCTION_ATTRIBUTE; -typedef void (*WGPUProcSurfacePresent)(WGPUSurface surface) WGPU_FUNCTION_ATTRIBUTE; +typedef WGPUStatus (*WGPUProcSurfacePresent)(WGPUSurface surface) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcSurfaceSetLabel)(WGPUSurface surface, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcSurfaceUnconfigure)(WGPUSurface surface) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcSurfaceAddRef)(WGPUSurface surface) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcSurfaceRelease)(WGPUSurface surface) WGPU_FUNCTION_ATTRIBUTE; +// Procs of SurfaceCapabilities +typedef void (*WGPUProcSurfaceCapabilitiesFreeMembers)(WGPUSurfaceCapabilities surfaceCapabilities) WGPU_FUNCTION_ATTRIBUTE; + +// Procs of TexelBufferView +typedef void (*WGPUProcTexelBufferViewSetLabel)(WGPUTexelBufferView texelBufferView, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcTexelBufferViewAddRef)(WGPUTexelBufferView texelBufferView) WGPU_FUNCTION_ATTRIBUTE; +typedef void (*WGPUProcTexelBufferViewRelease)(WGPUTexelBufferView texelBufferView) WGPU_FUNCTION_ATTRIBUTE; + // Procs of Texture typedef WGPUTextureView (*WGPUProcTextureCreateErrorView)(WGPUTexture texture, WGPU_NULLABLE WGPUTextureViewDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; typedef WGPUTextureView (*WGPUProcTextureCreateView)(WGPUTexture texture, WGPU_NULLABLE WGPUTextureViewDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; @@ -4083,23 +4464,14 @@ typedef void (*WGPUProcTextureViewSetLabel)(WGPUTextureView textureView, WGPUStr typedef void (*WGPUProcTextureViewAddRef)(WGPUTextureView textureView) WGPU_FUNCTION_ATTRIBUTE; typedef void (*WGPUProcTextureViewRelease)(WGPUTextureView textureView) WGPU_FUNCTION_ATTRIBUTE; - #endif // !defined(WGPU_SKIP_PROCS) #if !defined(WGPU_SKIP_DECLARATIONS) - -WGPU_EXPORT void wgpuAdapterInfoFreeMembers(WGPUAdapterInfo value) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuAdapterPropertiesMemoryHeapsFreeMembers(WGPUAdapterPropertiesMemoryHeaps value) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuAdapterPropertiesSubgroupMatrixConfigsFreeMembers(WGPUAdapterPropertiesSubgroupMatrixConfigs value) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUInstance wgpuCreateInstance(WGPU_NULLABLE WGPUInstanceDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuDawnDrmFormatCapabilitiesFreeMembers(WGPUDawnDrmFormatCapabilities value) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT WGPUStatus wgpuGetInstanceCapabilities(WGPUInstanceCapabilities * capabilities) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuGetInstanceFeatures(WGPUSupportedInstanceFeatures * features) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPUStatus wgpuGetInstanceLimits(WGPUInstanceLimits * limits) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPUBool wgpuHasInstanceFeature(WGPUInstanceFeatureName feature) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUProc wgpuGetProcAddress(WGPUStringView procName) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuSharedBufferMemoryEndAccessStateFreeMembers(WGPUSharedBufferMemoryEndAccessState value) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuSharedTextureMemoryEndAccessStateFreeMembers(WGPUSharedTextureMemoryEndAccessState value) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuSupportedWGSLLanguageFeaturesFreeMembers(WGPUSupportedWGSLLanguageFeatures value) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuSupportedFeaturesFreeMembers(WGPUSupportedFeatures value) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuSurfaceCapabilitiesFreeMembers(WGPUSurfaceCapabilities value) WGPU_FUNCTION_ATTRIBUTE; // Methods of Adapter WGPU_EXPORT WGPUDevice wgpuAdapterCreateDevice(WGPUAdapter adapter, WGPU_NULLABLE WGPUDeviceDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; @@ -4109,10 +4481,19 @@ WGPU_EXPORT WGPUStatus wgpuAdapterGetInfo(WGPUAdapter adapter, WGPUAdapterInfo * WGPU_EXPORT WGPUInstance wgpuAdapterGetInstance(WGPUAdapter adapter) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUStatus wgpuAdapterGetLimits(WGPUAdapter adapter, WGPULimits * limits) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUBool wgpuAdapterHasFeature(WGPUAdapter adapter, WGPUFeatureName feature) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT WGPUFuture wgpuAdapterRequestDevice(WGPUAdapter adapter, WGPU_NULLABLE WGPUDeviceDescriptor const * options, WGPURequestDeviceCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPUFuture wgpuAdapterRequestDevice(WGPUAdapter adapter, WGPU_NULLABLE WGPUDeviceDescriptor const * descriptor, WGPURequestDeviceCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuAdapterAddRef(WGPUAdapter adapter) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuAdapterRelease(WGPUAdapter adapter) WGPU_FUNCTION_ATTRIBUTE; +// Methods of AdapterInfo +WGPU_EXPORT void wgpuAdapterInfoFreeMembers(WGPUAdapterInfo adapterInfo) WGPU_FUNCTION_ATTRIBUTE; + +// Methods of AdapterPropertiesMemoryHeaps +WGPU_EXPORT void wgpuAdapterPropertiesMemoryHeapsFreeMembers(WGPUAdapterPropertiesMemoryHeaps adapterPropertiesMemoryHeaps) WGPU_FUNCTION_ATTRIBUTE; + +// Methods of AdapterPropertiesSubgroupMatrixConfigs +WGPU_EXPORT void wgpuAdapterPropertiesSubgroupMatrixConfigsFreeMembers(WGPUAdapterPropertiesSubgroupMatrixConfigs adapterPropertiesSubgroupMatrixConfigs) WGPU_FUNCTION_ATTRIBUTE; + // Methods of BindGroup WGPU_EXPORT void wgpuBindGroupSetLabel(WGPUBindGroup bindGroup, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuBindGroupAddRef(WGPUBindGroup bindGroup) WGPU_FUNCTION_ATTRIBUTE; @@ -4126,8 +4507,8 @@ WGPU_EXPORT void wgpuBindGroupLayoutRelease(WGPUBindGroupLayout bindGroupLayout) // Methods of Buffer WGPU_EXPORT void wgpuBufferDestroy(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void const * wgpuBufferGetConstMappedRange(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT WGPUBufferMapState wgpuBufferGetMapState(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void * wgpuBufferGetMappedRange(WGPUBuffer buffer, size_t offset, size_t size) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPUBufferMapState wgpuBufferGetMapState(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT uint64_t wgpuBufferGetSize(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUBufferUsage wgpuBufferGetUsage(WGPUBuffer buffer) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUFuture wgpuBufferMapAsync(WGPUBuffer buffer, WGPUMapMode mode, size_t offset, size_t size, WGPUBufferMapCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; @@ -4184,10 +4565,13 @@ WGPU_EXPORT void wgpuComputePipelineSetLabel(WGPUComputePipeline computePipeline WGPU_EXPORT void wgpuComputePipelineAddRef(WGPUComputePipeline computePipeline) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuComputePipelineRelease(WGPUComputePipeline computePipeline) WGPU_FUNCTION_ATTRIBUTE; +// Methods of DawnDrmFormatCapabilities +WGPU_EXPORT void wgpuDawnDrmFormatCapabilitiesFreeMembers(WGPUDawnDrmFormatCapabilities dawnDrmFormatCapabilities) WGPU_FUNCTION_ATTRIBUTE; + // Methods of Device WGPU_EXPORT WGPUBindGroup wgpuDeviceCreateBindGroup(WGPUDevice device, WGPUBindGroupDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUBindGroupLayout wgpuDeviceCreateBindGroupLayout(WGPUDevice device, WGPUBindGroupLayoutDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT WGPUBuffer wgpuDeviceCreateBuffer(WGPUDevice device, WGPUBufferDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPU_NULLABLE WGPUBuffer wgpuDeviceCreateBuffer(WGPUDevice device, WGPUBufferDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUCommandEncoder wgpuDeviceCreateCommandEncoder(WGPUDevice device, WGPU_NULLABLE WGPUCommandEncoderDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUComputePipeline wgpuDeviceCreateComputePipeline(WGPUDevice device, WGPUComputePipelineDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUFuture wgpuDeviceCreateComputePipelineAsync(WGPUDevice device, WGPUComputePipelineDescriptor const * descriptor, WGPUCreateComputePipelineAsyncCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; @@ -4206,9 +4590,9 @@ WGPU_EXPORT WGPUShaderModule wgpuDeviceCreateShaderModule(WGPUDevice device, WGP WGPU_EXPORT WGPUTexture wgpuDeviceCreateTexture(WGPUDevice device, WGPUTextureDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuDeviceDestroy(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuDeviceForceLoss(WGPUDevice device, WGPUDeviceLostReason type, WGPUStringView message) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT WGPUStatus wgpuDeviceGetAHardwareBufferProperties(WGPUDevice device, void * handle, WGPUAHardwareBufferProperties * properties) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUAdapter wgpuDeviceGetAdapter(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUStatus wgpuDeviceGetAdapterInfo(WGPUDevice device, WGPUAdapterInfo * adapterInfo) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPUStatus wgpuDeviceGetAHardwareBufferProperties(WGPUDevice device, void * handle, WGPUAHardwareBufferProperties * properties) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuDeviceGetFeatures(WGPUDevice device, WGPUSupportedFeatures * features) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUStatus wgpuDeviceGetLimits(WGPUDevice device, WGPULimits * limits) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUFuture wgpuDeviceGetLostFuture(WGPUDevice device) WGPU_FUNCTION_ATTRIBUTE; @@ -4237,11 +4621,11 @@ WGPU_EXPORT void wgpuExternalTextureRelease(WGPUExternalTexture externalTexture) // Methods of Instance WGPU_EXPORT WGPUSurface wgpuInstanceCreateSurface(WGPUInstance instance, WGPUSurfaceDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT WGPUStatus wgpuInstanceGetWGSLLanguageFeatures(WGPUInstance instance, WGPUSupportedWGSLLanguageFeatures * features) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuInstanceGetWGSLLanguageFeatures(WGPUInstance instance, WGPUSupportedWGSLLanguageFeatures * features) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUBool wgpuInstanceHasWGSLLanguageFeature(WGPUInstance instance, WGPUWGSLLanguageFeatureName feature) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuInstanceProcessEvents(WGPUInstance instance) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUFuture wgpuInstanceRequestAdapter(WGPUInstance instance, WGPU_NULLABLE WGPURequestAdapterOptions const * options, WGPURequestAdapterCallbackInfo callbackInfo) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT WGPUWaitStatus wgpuInstanceWaitAny(WGPUInstance instance, size_t futureCount, WGPUFutureWaitInfo * futures, uint64_t timeoutNS) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPUWaitStatus wgpuInstanceWaitAny(WGPUInstance instance, size_t futureCount, WGPU_NULLABLE WGPUFutureWaitInfo * futures, uint64_t timeoutNS) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuInstanceAddRef(WGPUInstance instance) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuInstanceRelease(WGPUInstance instance) WGPU_FUNCTION_ATTRIBUTE; @@ -4348,6 +4732,9 @@ WGPU_EXPORT void wgpuSharedBufferMemorySetLabel(WGPUSharedBufferMemory sharedBuf WGPU_EXPORT void wgpuSharedBufferMemoryAddRef(WGPUSharedBufferMemory sharedBufferMemory) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuSharedBufferMemoryRelease(WGPUSharedBufferMemory sharedBufferMemory) WGPU_FUNCTION_ATTRIBUTE; +// Methods of SharedBufferMemoryEndAccessState +WGPU_EXPORT void wgpuSharedBufferMemoryEndAccessStateFreeMembers(WGPUSharedBufferMemoryEndAccessState sharedBufferMemoryEndAccessState) WGPU_FUNCTION_ATTRIBUTE; + // Methods of SharedFence WGPU_EXPORT void wgpuSharedFenceExportInfo(WGPUSharedFence sharedFence, WGPUSharedFenceExportInfo * info) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuSharedFenceAddRef(WGPUSharedFence sharedFence) WGPU_FUNCTION_ATTRIBUTE; @@ -4363,16 +4750,36 @@ WGPU_EXPORT void wgpuSharedTextureMemorySetLabel(WGPUSharedTextureMemory sharedT WGPU_EXPORT void wgpuSharedTextureMemoryAddRef(WGPUSharedTextureMemory sharedTextureMemory) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuSharedTextureMemoryRelease(WGPUSharedTextureMemory sharedTextureMemory) WGPU_FUNCTION_ATTRIBUTE; +// Methods of SharedTextureMemoryEndAccessState +WGPU_EXPORT void wgpuSharedTextureMemoryEndAccessStateFreeMembers(WGPUSharedTextureMemoryEndAccessState sharedTextureMemoryEndAccessState) WGPU_FUNCTION_ATTRIBUTE; + +// Methods of SupportedFeatures +WGPU_EXPORT void wgpuSupportedFeaturesFreeMembers(WGPUSupportedFeatures supportedFeatures) WGPU_FUNCTION_ATTRIBUTE; + +// Methods of SupportedInstanceFeatures +WGPU_EXPORT void wgpuSupportedInstanceFeaturesFreeMembers(WGPUSupportedInstanceFeatures supportedInstanceFeatures) WGPU_FUNCTION_ATTRIBUTE; + +// Methods of SupportedWGSLLanguageFeatures +WGPU_EXPORT void wgpuSupportedWGSLLanguageFeaturesFreeMembers(WGPUSupportedWGSLLanguageFeatures supportedWGSLLanguageFeatures) WGPU_FUNCTION_ATTRIBUTE; + // Methods of Surface WGPU_EXPORT void wgpuSurfaceConfigure(WGPUSurface surface, WGPUSurfaceConfiguration const * config) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUStatus wgpuSurfaceGetCapabilities(WGPUSurface surface, WGPUAdapter adapter, WGPUSurfaceCapabilities * capabilities) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuSurfaceGetCurrentTexture(WGPUSurface surface, WGPUSurfaceTexture * surfaceTexture) WGPU_FUNCTION_ATTRIBUTE; -WGPU_EXPORT void wgpuSurfacePresent(WGPUSurface surface) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT WGPUStatus wgpuSurfacePresent(WGPUSurface surface) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuSurfaceSetLabel(WGPUSurface surface, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuSurfaceUnconfigure(WGPUSurface surface) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuSurfaceAddRef(WGPUSurface surface) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuSurfaceRelease(WGPUSurface surface) WGPU_FUNCTION_ATTRIBUTE; +// Methods of SurfaceCapabilities +WGPU_EXPORT void wgpuSurfaceCapabilitiesFreeMembers(WGPUSurfaceCapabilities surfaceCapabilities) WGPU_FUNCTION_ATTRIBUTE; + +// Methods of TexelBufferView +WGPU_EXPORT void wgpuTexelBufferViewSetLabel(WGPUTexelBufferView texelBufferView, WGPUStringView label) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuTexelBufferViewAddRef(WGPUTexelBufferView texelBufferView) WGPU_FUNCTION_ATTRIBUTE; +WGPU_EXPORT void wgpuTexelBufferViewRelease(WGPUTexelBufferView texelBufferView) WGPU_FUNCTION_ATTRIBUTE; + // Methods of Texture WGPU_EXPORT WGPUTextureView wgpuTextureCreateErrorView(WGPUTexture texture, WGPU_NULLABLE WGPUTextureViewDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT WGPUTextureView wgpuTextureCreateView(WGPUTexture texture, WGPU_NULLABLE WGPUTextureViewDescriptor const * descriptor) WGPU_FUNCTION_ATTRIBUTE; @@ -4394,7 +4801,6 @@ WGPU_EXPORT void wgpuTextureViewSetLabel(WGPUTextureView textureView, WGPUString WGPU_EXPORT void wgpuTextureViewAddRef(WGPUTextureView textureView) WGPU_FUNCTION_ATTRIBUTE; WGPU_EXPORT void wgpuTextureViewRelease(WGPUTextureView textureView) WGPU_FUNCTION_ATTRIBUTE; - #endif // !defined(WGPU_SKIP_DECLARATIONS) #ifdef __cplusplus From d2fcce95f1ffab7800ecb0c6592f86a2132c0dd7 Mon Sep 17 00:00:00 2001 From: Junji Hashimoto Date: Mon, 29 Sep 2025 17:11:24 +0900 Subject: [PATCH 53/54] Fix NUM_JOBS for macos --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 03d5e42..e5612b8 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,8 @@ +ifeq ($(shell uname),Darwin) +NUM_JOBS=$(shell sysctl -n hw.ncpu) +else NUM_JOBS=$(shell nproc) +endif CXX=clang++ .PHONY: default examples/hello_world/build/hello_world tests libgpu debug build check-clang clean-build clean all watch-tests docs From 4a500525df462723a26ce818aead1bcb7610a918 Mon Sep 17 00:00:00 2001 From: Junji Hashimoto Date: Mon, 29 Sep 2025 18:23:27 +0900 Subject: [PATCH 54/54] Add the artifacts of the libwebgpu_dawn library --- .github/workflows/cmake-ci.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/workflows/cmake-ci.yml b/.github/workflows/cmake-ci.yml index cab53b8..0b5e7c2 100644 --- a/.github/workflows/cmake-ci.yml +++ b/.github/workflows/cmake-ci.yml @@ -39,3 +39,23 @@ jobs: - name: Test run: make test-cmake + + - name: Upload WebGPU artifacts (macOS) + if: matrix.os == 'macos-latest' + uses: actions/upload-artifact@v4 + with: + name: webgpu-macos-arm64 + path: | + external/dawn/build_mac_arm64/src/dawn/native/libwebgpu_dawn.dylib + external/dawn/build_mac_arm64/gen/include/dawn/webgpu.h + retention-days: 7 + + - name: Upload WebGPU artifacts (Linux) + if: matrix.os == 'ubuntu-latest' + uses: actions/upload-artifact@v4 + with: + name: webgpu-linux-x86_64 + path: | + external/dawn/build_unix_x86_64/src/dawn/native/libwebgpu_dawn.so + external/dawn/build_unix_x86_64/gen/include/dawn/webgpu.h + retention-days: 7