add tracy
@ -7,6 +7,8 @@ set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_COMPILE_WARNING_AS_ERROR ON)
|
||||
|
||||
option(TRACY_ENABLE "Enable Tracy profiling" OFF)
|
||||
|
||||
# SDL
|
||||
set(SDL_SHARED OFF)
|
||||
set(SDL_STATIC ON)
|
||||
@ -38,6 +40,10 @@ find_program(SLANGC
|
||||
add_library(stb_image STATIC libs/stb/stb_image.c)
|
||||
target_include_directories(stb_image INTERFACE libs/stb)
|
||||
|
||||
# tracy
|
||||
option(TRACY_ONLY_LOCALHOST "" ON)
|
||||
add_subdirectory(libs/tracy)
|
||||
|
||||
function(add_shader name)
|
||||
set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/assets/shader/${name}.slang)
|
||||
set(OUTPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/assets/shader/${name}.h)
|
||||
@ -77,6 +83,7 @@ target_link_libraries(mikemon
|
||||
SDL3::SDL3
|
||||
stb_image
|
||||
imgui
|
||||
TracyClient
|
||||
)
|
||||
add_dependencies(mikemon
|
||||
SHADER_basic
|
||||
|
||||
18
libs/tracy/.clang-format
Normal file
@ -0,0 +1,18 @@
|
||||
# Empirical format config, based on observed style guide
|
||||
# Use this only as an help to fit the surrounding code style - don't reformat whole files at once
|
||||
---
|
||||
BasedOnStyle: LLVM
|
||||
AllowShortIfStatementsOnASingleLine: WithoutElse
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AlwaysBreakTemplateDeclarations: Yes
|
||||
BreakBeforeBraces: Allman
|
||||
BreakConstructorInitializers: BeforeComma
|
||||
BreakStringLiterals: false
|
||||
ColumnLimit: 120
|
||||
FixNamespaceComments: false
|
||||
IndentPPDirectives: AfterHash
|
||||
IndentWidth: 4
|
||||
PointerAlignment: Left
|
||||
SpaceBeforeParens: Never
|
||||
SpacesInParentheses: true
|
||||
TabWidth: 4
|
||||
62
libs/tracy/.clang-tidy
Normal file
@ -0,0 +1,62 @@
|
||||
---
|
||||
Checks:
|
||||
'
|
||||
clang-diagnostic-*,
|
||||
clang-analyzer-*,
|
||||
bugprone-*,
|
||||
google-*,
|
||||
misc-*,
|
||||
modernize-*,
|
||||
performance-*,
|
||||
readability-*,
|
||||
-bugprone-easily-swappable-parameters,
|
||||
-bugprone-implicit-widening-of-multiplication-result,
|
||||
-bugprone-narrowing-conversions,
|
||||
-bugprone-reserved-identifier,
|
||||
-google-readability-braces-around-statements,
|
||||
-google-readability-casting,
|
||||
-google-readability-function-size,
|
||||
-google-readability-todo,
|
||||
-google-readability-namespace-comments,
|
||||
-misc-confusable-identifiers,
|
||||
-misc-no-recursion,
|
||||
-modernize-avoid-c-arrays,
|
||||
-modernize-deprecated-headers,
|
||||
-modernize-use-default-member-init,
|
||||
-modernize-use-trailing-return-type,
|
||||
-performance-no-int-to-ptr,
|
||||
-readability-braces-around-statements,
|
||||
-readability-else-after-return,
|
||||
-readability-function-cognitive-complexity,
|
||||
-readability-identifier-length,
|
||||
-readability-implicit-bool-conversion,
|
||||
-readability-isolate-declaration,
|
||||
-readability-magic-numbers,
|
||||
-readability-qualified-auto,
|
||||
-readability-uppercase-literal-suffix
|
||||
'
|
||||
WarningsAsErrors: ''
|
||||
HeaderFilterRegex: ''
|
||||
AnalyzeTemporaryDtors: false
|
||||
FormatStyle: none
|
||||
CheckOptions:
|
||||
llvm-else-after-return.WarnOnConditionVariables: 'false'
|
||||
modernize-loop-convert.MinConfidence: reasonable
|
||||
modernize-replace-auto-ptr.IncludeStyle: llvm
|
||||
modernize-pass-by-value.IncludeStyle: llvm
|
||||
google-readability-namespace-comments.ShortNamespaceLines: '10'
|
||||
google-readability-namespace-comments.SpacesBeforeComments: '2'
|
||||
cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: 'true'
|
||||
google-readability-braces-around-statements.ShortStatementLines: '1'
|
||||
cert-err33-c.CheckedFunctions: '::aligned_alloc;::asctime_s;::at_quick_exit;::atexit;::bsearch;::bsearch_s;::btowc;::c16rtomb;::c32rtomb;::calloc;::clock;::cnd_broadcast;::cnd_init;::cnd_signal;::cnd_timedwait;::cnd_wait;::ctime_s;::fclose;::fflush;::fgetc;::fgetpos;::fgets;::fgetwc;::fopen;::fopen_s;::fprintf;::fprintf_s;::fputc;::fputs;::fputwc;::fputws;::fread;::freopen;::freopen_s;::fscanf;::fscanf_s;::fseek;::fsetpos;::ftell;::fwprintf;::fwprintf_s;::fwrite;::fwscanf;::fwscanf_s;::getc;::getchar;::getenv;::getenv_s;::gets_s;::getwc;::getwchar;::gmtime;::gmtime_s;::localtime;::localtime_s;::malloc;::mbrtoc16;::mbrtoc32;::mbsrtowcs;::mbsrtowcs_s;::mbstowcs;::mbstowcs_s;::memchr;::mktime;::mtx_init;::mtx_lock;::mtx_timedlock;::mtx_trylock;::mtx_unlock;::printf_s;::putc;::putwc;::raise;::realloc;::remove;::rename;::scanf;::scanf_s;::setlocale;::setvbuf;::signal;::snprintf;::snprintf_s;::sprintf;::sprintf_s;::sscanf;::sscanf_s;::strchr;::strerror_s;::strftime;::strpbrk;::strrchr;::strstr;::strtod;::strtof;::strtoimax;::strtok;::strtok_s;::strtol;::strtold;::strtoll;::strtoul;::strtoull;::strtoumax;::strxfrm;::swprintf;::swprintf_s;::swscanf;::swscanf_s;::thrd_create;::thrd_detach;::thrd_join;::thrd_sleep;::time;::timespec_get;::tmpfile;::tmpfile_s;::tmpnam;::tmpnam_s;::tss_create;::tss_get;::tss_set;::ungetc;::ungetwc;::vfprintf;::vfprintf_s;::vfscanf;::vfscanf_s;::vfwprintf;::vfwprintf_s;::vfwscanf;::vfwscanf_s;::vprintf_s;::vscanf;::vscanf_s;::vsnprintf;::vsnprintf_s;::vsprintf;::vsprintf_s;::vsscanf;::vsscanf_s;::vswprintf;::vswprintf_s;::vswscanf;::vswscanf_s;::vwprintf_s;::vwscanf;::vwscanf_s;::wcrtomb;::wcschr;::wcsftime;::wcspbrk;::wcsrchr;::wcsrtombs;::wcsrtombs_s;::wcsstr;::wcstod;::wcstof;::wcstoimax;::wcstok;::wcstok_s;::wcstol;::wcstold;::wcstoll;::wcstombs;::wcstombs_s;::wcstoul;::wcstoull;::wcstoumax;::wcsxfrm;::wctob;::wctrans;::wctype;::wmemchr;::wprintf_s;::wscanf;::wscanf_s;'
|
||||
modernize-loop-convert.MaxCopySize: '16'
|
||||
cert-dcl16-c.NewSuffixes: 'L;LL;LU;LLU'
|
||||
cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField: 'false'
|
||||
cert-str34-c.DiagnoseSignedUnsignedCharComparisons: 'false'
|
||||
modernize-use-nullptr.NullMacros: 'NULL'
|
||||
llvm-qualified-auto.AddConstToQualified: 'false'
|
||||
modernize-loop-convert.NamingStyle: CamelCase
|
||||
llvm-else-after-return.WarnOnUnfixable: 'false'
|
||||
google-readability-function-size.StatementThreshold: '800'
|
||||
...
|
||||
|
||||
1
libs/tracy/.github/FUNDING.yml
vendored
Normal file
@ -0,0 +1 @@
|
||||
github: wolfpld
|
||||
BIN
libs/tracy/.github/sponsor.png
vendored
Normal file
|
After Width: | Height: | Size: 1.0 KiB |
78
libs/tracy/.github/workflows/build.yml
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
name: build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ windows-latest, macos-latest ]
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- if: startsWith(matrix.os, 'windows')
|
||||
uses: microsoft/setup-msbuild@v2
|
||||
- if: startsWith(matrix.os, 'windows')
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- if: startsWith(matrix.os, 'windows')
|
||||
run: pip install meson ninja
|
||||
- if: startsWith(matrix.os, 'macos')
|
||||
name: Install macos dependencies
|
||||
run: brew install pkg-config glfw meson
|
||||
- name: Profiler GUI
|
||||
run: |
|
||||
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build profiler/build --parallel --config Release
|
||||
- name: Update utility
|
||||
run: |
|
||||
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build update/build --parallel --config Release
|
||||
- name: Capture utility
|
||||
run: |
|
||||
cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build capture/build --parallel --config Release
|
||||
- name: Csvexport utility
|
||||
run: |
|
||||
cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build csvexport/build --parallel --config Release
|
||||
- name: Import utilities
|
||||
run: |
|
||||
cmake -B import/build -S import -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build import/build --parallel --config Release
|
||||
- if: ${{ !startsWith(matrix.os, 'windows') }}
|
||||
name: Library
|
||||
run: meson setup -Dprefix=$GITHUB_WORKSPACE/bin/lib build && meson compile -C build && meson install -C build
|
||||
- if: ${{ !startsWith(matrix.os, 'windows') }}
|
||||
name: Find Artifacts
|
||||
id: find_artifacts
|
||||
run: |
|
||||
mkdir -p bin
|
||||
cp profiler/build/tracy-profiler bin
|
||||
cp update/build/tracy-update bin
|
||||
cp capture/build/tracy-capture bin
|
||||
cp csvexport/build/tracy-csvexport bin
|
||||
cp import/build/tracy-import-chrome bin
|
||||
cp import/build/tracy-import-fuchsia bin
|
||||
- if: startsWith(matrix.os, 'windows')
|
||||
name: Find Artifacts
|
||||
id: find_artifacts_windows
|
||||
run: |
|
||||
mkdir bin
|
||||
copy profiler\build\Release\tracy-profiler.exe bin
|
||||
copy update\build\Release\tracy-update.exe bin
|
||||
copy capture\build\Release\tracy-capture.exe bin
|
||||
copy csvexport\build\Release\tracy-csvexport.exe bin
|
||||
copy import\build\Release\tracy-import-chrome.exe bin
|
||||
copy import\build\Release\tracy-import-fuchsia.exe bin
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.os }}
|
||||
path: bin
|
||||
27
libs/tracy/.github/workflows/latex.yml
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
name: Manual
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fix stupidity
|
||||
run: |
|
||||
cp LICENSE LICENSE.
|
||||
- name: Compile LaTeX
|
||||
uses: xu-cheng/latex-action@v3
|
||||
with:
|
||||
working_directory: manual
|
||||
root_file: tracy.tex
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: manual
|
||||
path: manual/tracy.pdf
|
||||
75
libs/tracy/.github/workflows/linux.yml
vendored
Normal file
@ -0,0 +1,75 @@
|
||||
name: linux
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
container: archlinux:base-devel
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 tbb debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs
|
||||
- uses: actions/checkout@v4
|
||||
- name: Profiler GUI
|
||||
run: |
|
||||
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build profiler/build --parallel
|
||||
- name: Update utility
|
||||
run: |
|
||||
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build update/build --parallel
|
||||
- name: Capture utility
|
||||
run: |
|
||||
cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build capture/build --parallel
|
||||
- name: Csvexport utility
|
||||
run: |
|
||||
cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build csvexport/build --parallel
|
||||
- name: Import utilities
|
||||
run: |
|
||||
cmake -B import/build -S import -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build import/build --parallel
|
||||
- name: Library
|
||||
run: meson setup -Dprefix=$GITHUB_WORKSPACE/bin/lib build && meson compile -C build && meson install -C build
|
||||
- name: Test application
|
||||
run: |
|
||||
# test compilation with different flags
|
||||
# we clean the build folder to reset cached variables between runs
|
||||
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build test/build --parallel
|
||||
rm -rf test/build
|
||||
|
||||
# same with TRACY_ON_DEMAND
|
||||
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_ON_DEMAND=ON .
|
||||
cmake --build test/build --parallel
|
||||
rm -rf test/build
|
||||
|
||||
# same with TRACY_DELAYED_INIT TRACY_MANUAL_LIFETIME
|
||||
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_DELAYED_INIT=ON -DTRACY_MANUAL_LIFETIME=ON .
|
||||
cmake --build test/build --parallel
|
||||
rm -rf test/build
|
||||
|
||||
# same with TRACY_DEMANGLE
|
||||
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_DEMANGLE=ON .
|
||||
cmake --build test/build --parallel
|
||||
rm -rf test/build
|
||||
- name: Find Artifacts
|
||||
id: find_artifacts
|
||||
run: |
|
||||
mkdir -p bin
|
||||
cp profiler/build/tracy-profiler bin
|
||||
cp update/build/tracy-update bin
|
||||
cp capture/build/tracy-capture bin
|
||||
cp csvexport/build/tracy-csvexport bin
|
||||
cp import/build/tracy-import-chrome bin
|
||||
cp import/build/tracy-import-fuchsia bin
|
||||
strip bin/tracy-*
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: arch-linux
|
||||
path: bin
|
||||
36
libs/tracy/.gitignore
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
.vs
|
||||
_build
|
||||
_compiler
|
||||
tools/*
|
||||
*.d
|
||||
*.o
|
||||
*.so
|
||||
*.swp
|
||||
*.obj
|
||||
imgui.ini
|
||||
test/tracy_test
|
||||
test/tracy_test.exe
|
||||
*/build/unix/*-*
|
||||
manual/t*.aux
|
||||
manual/t*.log
|
||||
manual/t*.out
|
||||
manual/t*.pdf
|
||||
manual/t*.synctex.gz
|
||||
manual/t*.toc
|
||||
manual/t*.bbl
|
||||
manual/t*.blg
|
||||
manual/t*.fdb_latexmk
|
||||
manual/t*.fls
|
||||
profiler/build/win32/packages
|
||||
profiler/build/win32/Tracy.aps
|
||||
.deps/
|
||||
.dirstamp
|
||||
/_*/**
|
||||
/**/__pycache__/**
|
||||
extra/vswhere.exe
|
||||
extra/tracy-build
|
||||
.cache
|
||||
compile_commands.json
|
||||
profiler/build/wasm/Tracy-release.*
|
||||
profiler/build/wasm/Tracy-debug.*
|
||||
profiler/build/wasm/embed.tracy
|
||||
7
libs/tracy/.vscode/extensions.json
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"vadimcn.vscode-lldb",
|
||||
"ms-vscode.cmake-tools"
|
||||
]
|
||||
}
|
||||
14
libs/tracy/.vscode/launch.json
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Launch",
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"program": "${command:cmake.launchTargetPath}",
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}",
|
||||
"terminal": "console"
|
||||
}
|
||||
]
|
||||
}
|
||||
20
libs/tracy/.vscode/settings.json
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
"cmake.configureOnOpen": true,
|
||||
"cmake.sourceDirectory": [
|
||||
"${workspaceFolder}/profiler",
|
||||
"${workspaceFolder}/capture",
|
||||
"${workspaceFolder}/csvexport",
|
||||
"${workspaceFolder}/import",
|
||||
"${workspaceFolder}/update",
|
||||
"${workspaceFolder}/test",
|
||||
"${workspaceFolder}",
|
||||
],
|
||||
"cmake.buildDirectory": "${sourceDirectory}/build",
|
||||
"cmake.autoSelectActiveFolder": false,
|
||||
"cmake.options.advanced": {
|
||||
"folder": { "statusBarVisibility": "visible" },
|
||||
"variant": { "statusBarVisibility": "compact" }
|
||||
},
|
||||
"cmake.copyCompileCommands": "${workspaceFolder}/compile_commands.json",
|
||||
"lldb.launch.initCommands": ["command script import ${workspaceRoot}/extra/natvis.py"],
|
||||
}
|
||||
195
libs/tracy/CMakeLists.txt
Normal file
@ -0,0 +1,195 @@
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
# Run version helper script
|
||||
include(cmake/version.cmake)
|
||||
|
||||
project(Tracy LANGUAGES CXX VERSION ${TRACY_VERSION_STRING})
|
||||
|
||||
file(GENERATE OUTPUT .gitignore CONTENT "*")
|
||||
|
||||
if(${BUILD_SHARED_LIBS})
|
||||
set(DEFAULT_STATIC OFF)
|
||||
else()
|
||||
set(DEFAULT_STATIC ON)
|
||||
endif()
|
||||
|
||||
option(TRACY_STATIC "Whether to build Tracy as a static library" ${DEFAULT_STATIC})
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
set(TRACY_PUBLIC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/public)
|
||||
|
||||
if(TRACY_STATIC)
|
||||
set(TRACY_VISIBILITY "STATIC")
|
||||
else()
|
||||
set(TRACY_VISIBILITY "SHARED")
|
||||
endif()
|
||||
|
||||
add_library(TracyClient ${TRACY_VISIBILITY} "${TRACY_PUBLIC_DIR}/TracyClient.cpp")
|
||||
target_compile_features(TracyClient PUBLIC cxx_std_11)
|
||||
target_include_directories(TracyClient SYSTEM PUBLIC
|
||||
$<BUILD_INTERFACE:${TRACY_PUBLIC_DIR}>
|
||||
$<INSTALL_INTERFACE:include>)
|
||||
target_link_libraries(
|
||||
TracyClient
|
||||
PUBLIC
|
||||
Threads::Threads
|
||||
${CMAKE_DL_LIBS}
|
||||
)
|
||||
|
||||
# Public dependency on some libraries required when using Mingw
|
||||
if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} MATCHES "GNU|Clang")
|
||||
target_link_libraries(TracyClient PUBLIC ws2_32 dbghelp)
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
find_library(EXECINFO_LIBRARY NAMES execinfo REQUIRED)
|
||||
target_link_libraries(TracyClient PUBLIC ${EXECINFO_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(TRACY_LIBUNWIND_BACKTRACE)
|
||||
include(FindPkgConfig)
|
||||
pkg_check_modules(unwind REQUIRED libunwind)
|
||||
target_include_directories(TracyClient INTERFACE ${unwind_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyClient INTERFACE ${unwind_LINK_LIBRARIES})
|
||||
endif()
|
||||
|
||||
add_library(Tracy::TracyClient ALIAS TracyClient)
|
||||
|
||||
macro(set_option option help value)
|
||||
option(${option} ${help} ${value})
|
||||
if(${option})
|
||||
message(STATUS "${option}: ON")
|
||||
target_compile_definitions(TracyClient PUBLIC ${option})
|
||||
else()
|
||||
message(STATUS "${option}: OFF")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
set_option(TRACY_ENABLE "Enable profiling" ON)
|
||||
set_option(TRACY_ON_DEMAND "On-demand profiling" OFF)
|
||||
set_option(TRACY_CALLSTACK "Enforce callstack collection for tracy regions" OFF)
|
||||
set_option(TRACY_NO_CALLSTACK "Disable all callstack related functionality" OFF)
|
||||
set_option(TRACY_NO_CALLSTACK_INLINES "Disables the inline functions in callstacks" OFF)
|
||||
set_option(TRACY_ONLY_LOCALHOST "Only listen on the localhost interface" OFF)
|
||||
set_option(TRACY_NO_BROADCAST "Disable client discovery by broadcast to local network" OFF)
|
||||
set_option(TRACY_ONLY_IPV4 "Tracy will only accept connections on IPv4 addresses (disable IPv6)" OFF)
|
||||
set_option(TRACY_NO_CODE_TRANSFER "Disable collection of source code" OFF)
|
||||
set_option(TRACY_NO_CONTEXT_SWITCH "Disable capture of context switches" OFF)
|
||||
set_option(TRACY_NO_EXIT "Client executable does not exit until all profile data is sent to server" OFF)
|
||||
set_option(TRACY_NO_SAMPLING "Disable call stack sampling" OFF)
|
||||
set_option(TRACY_NO_VERIFY "Disable zone validation for C API" OFF)
|
||||
set_option(TRACY_NO_VSYNC_CAPTURE "Disable capture of hardware Vsync events" OFF)
|
||||
set_option(TRACY_NO_FRAME_IMAGE "Disable the frame image support and its thread" OFF)
|
||||
set_option(TRACY_NO_SYSTEM_TRACING "Disable systrace sampling" OFF)
|
||||
set_option(TRACY_PATCHABLE_NOPSLEDS "Enable nopsleds for efficient patching by system-level tools (e.g. rr)" OFF)
|
||||
set_option(TRACY_DELAYED_INIT "Enable delayed initialization of the library (init on first call)" OFF)
|
||||
set_option(TRACY_MANUAL_LIFETIME "Enable the manual lifetime management of the profile" OFF)
|
||||
set_option(TRACY_FIBERS "Enable fibers support" OFF)
|
||||
set_option(TRACY_NO_CRASH_HANDLER "Disable crash handling" OFF)
|
||||
set_option(TRACY_TIMER_FALLBACK "Use lower resolution timers" OFF)
|
||||
set_option(TRACY_LIBUNWIND_BACKTRACE "Use libunwind backtracing where supported" OFF)
|
||||
set_option(TRACY_SYMBOL_OFFLINE_RESOLVE "Instead of full runtime symbol resolution, only resolve the image path and offset to enable offline symbol resolution" OFF)
|
||||
set_option(TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT "Enable libbacktrace to support dynamically loaded elfs in symbol resolution resolution after the first symbol resolve operation" OFF)
|
||||
|
||||
# advanced
|
||||
set_option(TRACY_VERBOSE "[advanced] Verbose output from the profiler" OFF)
|
||||
mark_as_advanced(TRACY_VERBOSE)
|
||||
set_option(TRACY_DEMANGLE "[advanced] Don't use default demangling function - You'll need to provide your own" OFF)
|
||||
mark_as_advanced(TRACY_DEMANGLE)
|
||||
|
||||
if(NOT TRACY_STATIC)
|
||||
target_compile_definitions(TracyClient PRIVATE TRACY_EXPORTS)
|
||||
target_compile_definitions(TracyClient PUBLIC TRACY_IMPORTS)
|
||||
endif()
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
include(GNUInstallDirs)
|
||||
|
||||
set_target_properties(TracyClient PROPERTIES VERSION ${PROJECT_VERSION})
|
||||
|
||||
set(tracy_includes
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyC.h
|
||||
${TRACY_PUBLIC_DIR}/tracy/Tracy.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyD3D11.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyD3D12.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyLua.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyOpenCL.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyOpenGL.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyVulkan.hpp)
|
||||
|
||||
set(client_includes
|
||||
${TRACY_PUBLIC_DIR}/client/tracy_concurrentqueue.h
|
||||
${TRACY_PUBLIC_DIR}/client/tracy_rpmalloc.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/tracy_SPSCQueue.h
|
||||
${TRACY_PUBLIC_DIR}/client/TracyKCore.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyArmCpuTable.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyCallstack.h
|
||||
${TRACY_PUBLIC_DIR}/client/TracyCallstack.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyCpuid.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyDebug.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyDxt1.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyFastVector.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyLock.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyProfiler.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyRingBuffer.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyScoped.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyStringHelpers.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracySysPower.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracySysTime.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracySysTrace.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyThread.hpp)
|
||||
|
||||
set(common_includes
|
||||
${TRACY_PUBLIC_DIR}/common/tracy_lz4.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/tracy_lz4hc.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyAlign.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyAlloc.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyApi.h
|
||||
${TRACY_PUBLIC_DIR}/common/TracyColor.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyForceInline.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyMutex.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyProtocol.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyQueue.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracySocket.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyStackFrames.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracySystem.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyUwp.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyYield.hpp)
|
||||
|
||||
install(TARGETS TracyClient
|
||||
EXPORT TracyConfig
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
COMPONENT lib)
|
||||
# Export targets to build tree root
|
||||
export(TARGETS TracyClient
|
||||
NAMESPACE Tracy::
|
||||
FILE ${CMAKE_BINARY_DIR}/TracyTargets.cmake)
|
||||
install(FILES ${tracy_includes}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tracy)
|
||||
install(FILES ${client_includes}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/client)
|
||||
install(FILES ${common_includes}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/common)
|
||||
install(EXPORT TracyConfig
|
||||
NAMESPACE Tracy::
|
||||
FILE TracyTargets.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/Tracy)
|
||||
include(CMakePackageConfigHelpers)
|
||||
configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/TracyConfig.cmake"
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/Tracy)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/TracyConfig.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/Tracy)
|
||||
|
||||
option(TRACY_CLIENT_PYTHON "Whether to build Tracy python client library" OFF)
|
||||
|
||||
if(TRACY_CLIENT_PYTHON)
|
||||
if(TRACY_STATIC)
|
||||
message(FATAL_ERROR "Python-bindings require a shared client library")
|
||||
endif()
|
||||
|
||||
add_subdirectory(python)
|
||||
endif()
|
||||
6
libs/tracy/Config.cmake.in
Normal file
@ -0,0 +1,6 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
include(CMakeFindDependencyMacro)
|
||||
find_dependency(Threads REQUIRED)
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/TracyTargets.cmake")
|
||||
27
libs/tracy/LICENSE
Normal file
@ -0,0 +1,27 @@
|
||||
Tracy Profiler (https://github.com/wolfpld/tracy) is licensed under the
|
||||
3-clause BSD license.
|
||||
|
||||
Copyright (c) 2017-2024, Bartosz Taudul <wolf@nereid.pl>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the <organization> nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
1239
libs/tracy/NEWS
Normal file
28
libs/tracy/README.md
Normal file
@ -0,0 +1,28 @@
|
||||
# Tracy Profiler
|
||||
|
||||
[](https://github.com/sponsors/wolfpld/)
|
||||
|
||||
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
|
||||
|
||||
Tracy supports profiling CPU (Direct support is provided for C, C++, Lua and Python integration. At the same time, third-party bindings to many other languages exist on the internet, such as [Rust](https://github.com/nagisa/rust_tracy_client), [Zig](https://github.com/nektro/zig-tracy), [C#](https://github.com/clibequilibrium/Tracy-CSharp), [OCaml](https://github.com/imandra-ai/ocaml-tracy), [Odin](https://github.com/oskarnp/odin-tracy), etc.), GPU (All major graphic APIs: OpenGL, Vulkan, Direct3D 11/12, OpenCL.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
|
||||
|
||||
- [Documentation](https://github.com/wolfpld/tracy/releases/latest/download/tracy.pdf) for usage and build process instructions
|
||||
- [Releases](https://github.com/wolfpld/tracy/releases) containing the documentation (`tracy.pdf`) and compiled Windows x64 binaries (`Tracy-<version>.7z`) as assets
|
||||
- [Changelog](NEWS)
|
||||
- [Interactive demo](https://tracy.nereid.pl/)
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
[An Introduction to Tracy Profiler in C++ - Marcos Slomp - CppCon 2023](https://youtu.be/ghXk3Bk5F2U?t=37)
|
||||
|
||||
[Introduction to Tracy Profiler v0.2](https://www.youtube.com/watch?v=fB5B46lbapc)
|
||||
[New features in Tracy Profiler v0.3](https://www.youtube.com/watch?v=3SXpDpDh2Uo)
|
||||
[New features in Tracy Profiler v0.4](https://www.youtube.com/watch?v=eAkgkaO8B9o)
|
||||
[New features in Tracy Profiler v0.5](https://www.youtube.com/watch?v=P6E7qLMmzTQ)
|
||||
[New features in Tracy Profiler v0.6](https://www.youtube.com/watch?v=uJkrFgriuOo)
|
||||
[New features in Tracy Profiler v0.7](https://www.youtube.com/watch?v=_hU7vw00MZ4)
|
||||
[New features in Tracy Profiler v0.8](https://www.youtube.com/watch?v=30wpRpHTTag)
|
||||
27
libs/tracy/capture/CMakeLists.txt
Normal file
@ -0,0 +1,27 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
|
||||
option(NO_STATISTICS "Disable calculation of statistics" ON)
|
||||
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
|
||||
project(
|
||||
tracy-capture
|
||||
LANGUAGES C CXX
|
||||
VERSION ${TRACY_VERSION_STRING}
|
||||
)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/config.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/vendor.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/server.cmake)
|
||||
|
||||
set(PROGRAM_FILES
|
||||
src/capture.cpp
|
||||
)
|
||||
|
||||
add_executable(${PROJECT_NAME} ${PROGRAM_FILES} ${COMMON_FILES} ${SERVER_FILES})
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE TracyServer TracyGetOpt)
|
||||
set_property(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})
|
||||
364
libs/tracy/capture/src/capture.cpp
Normal file
@ -0,0 +1,364 @@
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
# include <io.h>
|
||||
#else
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <inttypes.h>
|
||||
#include <mutex>
|
||||
#include <signal.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "../../public/common/TracyProtocol.hpp"
|
||||
#include "../../public/common/TracyStackFrames.hpp"
|
||||
#include "../../server/TracyFileWrite.hpp"
|
||||
#include "../../server/TracyMemory.hpp"
|
||||
#include "../../server/TracyPrint.hpp"
|
||||
#include "../../server/TracySysUtil.hpp"
|
||||
#include "../../server/TracyWorker.hpp"
|
||||
|
||||
#ifdef _WIN32
|
||||
# include "../../getopt/getopt.h"
|
||||
#endif
|
||||
|
||||
|
||||
// This atomic is written by a signal handler (SigInt). Traditionally that would
|
||||
// have had to be `volatile sig_atomic_t`, and annoyingly, `bool` was
|
||||
// technically not allowed there, even though in practice it would work.
|
||||
// The good thing with C++11 atomics is that we can use atomic<bool> instead
|
||||
// here and be on the actually supported path.
|
||||
static std::atomic<bool> s_disconnect { false };
|
||||
|
||||
void SigInt( int )
|
||||
{
|
||||
// Relaxed order is closest to a traditional `volatile` write.
|
||||
// We don't need stronger ordering since this signal handler doesn't do
|
||||
// anything else that would need to be ordered relatively to this.
|
||||
s_disconnect.store(true, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
static bool s_isStdoutATerminal = false;
|
||||
|
||||
void InitIsStdoutATerminal() {
|
||||
#ifdef _WIN32
|
||||
s_isStdoutATerminal = _isatty( fileno( stdout ) );
|
||||
#else
|
||||
s_isStdoutATerminal = isatty( fileno( stdout ) );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool IsStdoutATerminal() { return s_isStdoutATerminal; }
|
||||
|
||||
#define ANSI_RESET "\033[0m"
|
||||
#define ANSI_BOLD "\033[1m"
|
||||
#define ANSI_BLACK "\033[30m"
|
||||
#define ANSI_RED "\033[31m"
|
||||
#define ANSI_GREEN "\033[32m"
|
||||
#define ANSI_YELLOW "\033[33m"
|
||||
#define ANSI_BLUE "\033[34m"
|
||||
#define ANSI_MAGENTA "\033[35m"
|
||||
#define ANSI_CYAN "\033[36m"
|
||||
#define ANSI_ERASE_LINE "\033[2K"
|
||||
|
||||
// Like printf, but if stdout is a terminal, prepends the output with
|
||||
// the given `ansiEscape` and appends ANSI_RESET.
|
||||
void AnsiPrintf( const char* ansiEscape, const char* format, ... ) {
|
||||
if( IsStdoutATerminal() )
|
||||
{
|
||||
// Prepend ansiEscape and append ANSI_RESET.
|
||||
char buf[256];
|
||||
va_list args;
|
||||
va_start( args, format );
|
||||
vsnprintf( buf, sizeof buf, format, args );
|
||||
va_end( args );
|
||||
printf( "%s%s" ANSI_RESET, ansiEscape, buf );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Just a normal printf.
|
||||
va_list args;
|
||||
va_start( args, format );
|
||||
vfprintf( stdout, format, args );
|
||||
va_end( args );
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] void Usage()
|
||||
{
|
||||
printf( "Usage: capture -o output.tracy [-a address] [-p port] [-f] [-s seconds] [-m memlimit]\n" );
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if( !AttachConsole( ATTACH_PARENT_PROCESS ) )
|
||||
{
|
||||
AllocConsole();
|
||||
SetConsoleMode( GetStdHandle( STD_OUTPUT_HANDLE ), 0x07 );
|
||||
}
|
||||
#endif
|
||||
|
||||
InitIsStdoutATerminal();
|
||||
|
||||
bool overwrite = false;
|
||||
const char* address = "127.0.0.1";
|
||||
const char* output = nullptr;
|
||||
int port = 8086;
|
||||
int seconds = -1;
|
||||
int64_t memoryLimit = -1;
|
||||
|
||||
int c;
|
||||
while( ( c = getopt( argc, argv, "a:o:p:fs:m:" ) ) != -1 )
|
||||
{
|
||||
switch( c )
|
||||
{
|
||||
case 'a':
|
||||
address = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
output = optarg;
|
||||
break;
|
||||
case 'p':
|
||||
port = atoi( optarg );
|
||||
break;
|
||||
case 'f':
|
||||
overwrite = true;
|
||||
break;
|
||||
case 's':
|
||||
seconds = atoi(optarg);
|
||||
break;
|
||||
case 'm':
|
||||
memoryLimit = std::clamp( atoll( optarg ), 1ll, 999ll ) * tracy::GetPhysicalMemorySize() / 100;
|
||||
break;
|
||||
default:
|
||||
Usage();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( !address || !output ) Usage();
|
||||
|
||||
struct stat st;
|
||||
if( stat( output, &st ) == 0 && !overwrite )
|
||||
{
|
||||
printf( "Output file %s already exists! Use -f to force overwrite.\n", output );
|
||||
return 4;
|
||||
}
|
||||
|
||||
FILE* test = fopen( output, "wb" );
|
||||
if( !test )
|
||||
{
|
||||
printf( "Cannot open output file %s for writing!\n", output );
|
||||
return 5;
|
||||
}
|
||||
fclose( test );
|
||||
unlink( output );
|
||||
|
||||
printf( "Connecting to %s:%i...", address, port );
|
||||
fflush( stdout );
|
||||
tracy::Worker worker( address, port, memoryLimit );
|
||||
while( !worker.HasData() )
|
||||
{
|
||||
const auto handshake = worker.GetHandshakeStatus();
|
||||
if( handshake == tracy::HandshakeProtocolMismatch )
|
||||
{
|
||||
printf( "\nThe client you are trying to connect to uses incompatible protocol version.\nMake sure you are using the same Tracy version on both client and server.\n" );
|
||||
return 1;
|
||||
}
|
||||
if( handshake == tracy::HandshakeNotAvailable )
|
||||
{
|
||||
printf( "\nThe client you are trying to connect to is no longer able to sent profiling data,\nbecause another server was already connected to it.\nYou can do the following:\n\n 1. Restart the client application.\n 2. Rebuild the client application with on-demand mode enabled.\n" );
|
||||
return 2;
|
||||
}
|
||||
if( handshake == tracy::HandshakeDropped )
|
||||
{
|
||||
printf( "\nThe client you are trying to connect to has disconnected during the initial\nconnection handshake. Please check your network configuration.\n" );
|
||||
return 3;
|
||||
}
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
|
||||
}
|
||||
printf( "\nQueue delay: %s\nTimer resolution: %s\n", tracy::TimeToString( worker.GetDelay() ), tracy::TimeToString( worker.GetResolution() ) );
|
||||
|
||||
#ifdef _WIN32
|
||||
signal( SIGINT, SigInt );
|
||||
#else
|
||||
struct sigaction sigint, oldsigint;
|
||||
memset( &sigint, 0, sizeof( sigint ) );
|
||||
sigint.sa_handler = SigInt;
|
||||
sigaction( SIGINT, &sigint, &oldsigint );
|
||||
#endif
|
||||
|
||||
const auto firstTime = worker.GetFirstTime();
|
||||
auto& lock = worker.GetMbpsDataLock();
|
||||
|
||||
const auto t0 = std::chrono::high_resolution_clock::now();
|
||||
while( worker.IsConnected() )
|
||||
{
|
||||
// Relaxed order is sufficient here because `s_disconnect` is only ever
|
||||
// set by this thread or by the SigInt handler, and that handler does
|
||||
// nothing else than storing `s_disconnect`.
|
||||
if( s_disconnect.load( std::memory_order_relaxed ) )
|
||||
{
|
||||
worker.Disconnect();
|
||||
// Relaxed order is sufficient because only this thread ever reads
|
||||
// this value.
|
||||
s_disconnect.store(false, std::memory_order_relaxed );
|
||||
break;
|
||||
}
|
||||
|
||||
lock.lock();
|
||||
const auto mbps = worker.GetMbpsData().back();
|
||||
const auto compRatio = worker.GetCompRatio();
|
||||
const auto netTotal = worker.GetDataTransferred();
|
||||
lock.unlock();
|
||||
|
||||
// Output progress info only if destination is a TTY to avoid bloating
|
||||
// log files (so this is not just about usage of ANSI color codes).
|
||||
if( IsStdoutATerminal() )
|
||||
{
|
||||
const char* unit = "Mbps";
|
||||
float unitsPerMbps = 1.f;
|
||||
if( mbps < 0.1f )
|
||||
{
|
||||
unit = "Kbps";
|
||||
unitsPerMbps = 1000.f;
|
||||
}
|
||||
AnsiPrintf( ANSI_ERASE_LINE ANSI_CYAN ANSI_BOLD, "\r%7.2f %s", mbps * unitsPerMbps, unit );
|
||||
printf( " /");
|
||||
AnsiPrintf( ANSI_CYAN ANSI_BOLD, "%5.1f%%", compRatio * 100.f );
|
||||
printf( " =");
|
||||
AnsiPrintf( ANSI_YELLOW ANSI_BOLD, "%7.2f Mbps", mbps / compRatio );
|
||||
printf( " | ");
|
||||
AnsiPrintf( ANSI_YELLOW, "Tx: ");
|
||||
AnsiPrintf( ANSI_GREEN, "%s", tracy::MemSizeToString( netTotal ) );
|
||||
printf( " | ");
|
||||
AnsiPrintf( ANSI_RED ANSI_BOLD, "%s", tracy::MemSizeToString( tracy::memUsage.load( std::memory_order_relaxed ) ) );
|
||||
if( memoryLimit > 0 )
|
||||
{
|
||||
printf( " / " );
|
||||
AnsiPrintf( ANSI_BLUE ANSI_BOLD, "%s", tracy::MemSizeToString( memoryLimit ) );
|
||||
}
|
||||
printf( " | ");
|
||||
AnsiPrintf( ANSI_RED, "%s", tracy::TimeToString( worker.GetLastTime() - firstTime ) );
|
||||
fflush( stdout );
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
|
||||
if( seconds != -1 )
|
||||
{
|
||||
const auto dur = std::chrono::high_resolution_clock::now() - t0;
|
||||
if( std::chrono::duration_cast<std::chrono::seconds>(dur).count() >= seconds )
|
||||
{
|
||||
// Relaxed order is sufficient because only this thread ever reads
|
||||
// this value.
|
||||
s_disconnect.store(true, std::memory_order_relaxed );
|
||||
}
|
||||
}
|
||||
}
|
||||
const auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
const auto& failure = worker.GetFailureType();
|
||||
if( failure != tracy::Worker::Failure::None )
|
||||
{
|
||||
AnsiPrintf( ANSI_RED ANSI_BOLD, "\nInstrumentation failure: %s", tracy::Worker::GetFailureString( failure ) );
|
||||
auto& fd = worker.GetFailureData();
|
||||
if( !fd.message.empty() )
|
||||
{
|
||||
printf( "\nContext: %s", fd.message.c_str() );
|
||||
}
|
||||
if( fd.callstack != 0 )
|
||||
{
|
||||
AnsiPrintf( ANSI_BOLD, "\nFailure callstack:\n" );
|
||||
auto& cs = worker.GetCallstack( fd.callstack );
|
||||
int fidx = 0;
|
||||
for( auto& entry : cs )
|
||||
{
|
||||
auto frameData = worker.GetCallstackFrame( entry );
|
||||
if( !frameData )
|
||||
{
|
||||
printf( "%3i. %p\n", fidx++, (void*)worker.GetCanonicalPointer( entry ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto fsz = frameData->size;
|
||||
for( uint8_t f=0; f<fsz; f++ )
|
||||
{
|
||||
const auto& frame = frameData->data[f];
|
||||
auto txt = worker.GetString( frame.name );
|
||||
|
||||
if( fidx == 0 && f != fsz-1 )
|
||||
{
|
||||
auto test = tracy::s_tracyStackFrames;
|
||||
bool match = false;
|
||||
do
|
||||
{
|
||||
if( strcmp( txt, *test ) == 0 )
|
||||
{
|
||||
match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
while( *++test );
|
||||
if( match ) continue;
|
||||
}
|
||||
|
||||
if( f == fsz-1 )
|
||||
{
|
||||
printf( "%3i. ", fidx++ );
|
||||
}
|
||||
else
|
||||
{
|
||||
AnsiPrintf( ANSI_BLACK ANSI_BOLD, "inl. " );
|
||||
}
|
||||
AnsiPrintf( ANSI_CYAN, "%s ", txt );
|
||||
txt = worker.GetString( frame.file );
|
||||
if( frame.line == 0 )
|
||||
{
|
||||
AnsiPrintf( ANSI_YELLOW, "(%s)", txt );
|
||||
}
|
||||
else
|
||||
{
|
||||
AnsiPrintf( ANSI_YELLOW, "(%s:%" PRIu32 ")", txt, frame.line );
|
||||
}
|
||||
if( frameData->imageName.Active() )
|
||||
{
|
||||
AnsiPrintf( ANSI_MAGENTA, " %s\n", worker.GetString( frameData->imageName ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "\n" );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nElapsed time: %s\nSaving trace...",
|
||||
worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() - firstTime ), tracy::RealToString( worker.GetZoneCount() ),
|
||||
tracy::TimeToString( std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count() ) );
|
||||
fflush( stdout );
|
||||
auto f = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output, tracy::FileCompression::Zstd, 3, 4 ) );
|
||||
if( f )
|
||||
{
|
||||
worker.Write( *f, false );
|
||||
AnsiPrintf( ANSI_GREEN ANSI_BOLD, " done!\n" );
|
||||
f->Finish();
|
||||
const auto stats = f->GetCompressionStatistics();
|
||||
printf( "Trace size %s (%.2f%% ratio)\n", tracy::MemSizeToString( stats.second ), 100.f * stats.second / stats.first );
|
||||
}
|
||||
else
|
||||
{
|
||||
AnsiPrintf( ANSI_RED ANSI_BOLD, " failed!\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
1269
libs/tracy/cmake/CPM.cmake
Normal file
300
libs/tracy/cmake/ECMFindModuleHelpers.cmake
Normal file
@ -0,0 +1,300 @@
|
||||
#.rst:
|
||||
# ECMFindModuleHelpers
|
||||
# --------------------
|
||||
#
|
||||
# Helper macros for find modules: ecm_find_package_version_check(),
|
||||
# ecm_find_package_parse_components() and
|
||||
# ecm_find_package_handle_library_components().
|
||||
#
|
||||
# ::
|
||||
#
|
||||
# ecm_find_package_version_check(<name>)
|
||||
#
|
||||
# Prints warnings if the CMake version or the project's required CMake version
|
||||
# is older than that required by extra-cmake-modules.
|
||||
#
|
||||
# ::
|
||||
#
|
||||
# ecm_find_package_parse_components(<name>
|
||||
# RESULT_VAR <variable>
|
||||
# KNOWN_COMPONENTS <component1> [<component2> [...]]
|
||||
# [SKIP_DEPENDENCY_HANDLING])
|
||||
#
|
||||
# This macro will populate <variable> with a list of components found in
|
||||
# <name>_FIND_COMPONENTS, after checking that all those components are in the
|
||||
# list of KNOWN_COMPONENTS; if there are any unknown components, it will print
|
||||
# an error or warning (depending on the value of <name>_FIND_REQUIRED) and call
|
||||
# return().
|
||||
#
|
||||
# The order of components in <variable> is guaranteed to match the order they
|
||||
# are listed in the KNOWN_COMPONENTS argument.
|
||||
#
|
||||
# If SKIP_DEPENDENCY_HANDLING is not set, for each component the variable
|
||||
# <name>_<component>_component_deps will be checked for dependent components.
|
||||
# If <component> is listed in <name>_FIND_COMPONENTS, then all its (transitive)
|
||||
# dependencies will also be added to <variable>.
|
||||
#
|
||||
# ::
|
||||
#
|
||||
# ecm_find_package_handle_library_components(<name>
|
||||
# COMPONENTS <component> [<component> [...]]
|
||||
# [SKIP_DEPENDENCY_HANDLING])
|
||||
# [SKIP_PKG_CONFIG])
|
||||
#
|
||||
# Creates an imported library target for each component. The operation of this
|
||||
# macro depends on the presence of a number of CMake variables.
|
||||
#
|
||||
# The <name>_<component>_lib variable should contain the name of this library,
|
||||
# and <name>_<component>_header variable should contain the name of a header
|
||||
# file associated with it (whatever relative path is normally passed to
|
||||
# '#include'). <name>_<component>_header_subdir variable can be used to specify
|
||||
# which subdirectory of the include path the headers will be found in.
|
||||
# ecm_find_package_components() will then search for the library
|
||||
# and include directory (creating appropriate cache variables) and create an
|
||||
# imported library target named <name>::<component>.
|
||||
#
|
||||
# Additional variables can be used to provide additional information:
|
||||
#
|
||||
# If SKIP_PKG_CONFIG, the <name>_<component>_pkg_config variable is set, and
|
||||
# pkg-config is found, the pkg-config module given by
|
||||
# <name>_<component>_pkg_config will be searched for and used to help locate the
|
||||
# library and header file. It will also be used to set
|
||||
# <name>_<component>_VERSION.
|
||||
#
|
||||
# Note that if version information is found via pkg-config,
|
||||
# <name>_<component>_FIND_VERSION can be set to require a particular version
|
||||
# for each component.
|
||||
#
|
||||
# If SKIP_DEPENDENCY_HANDLING is not set, the INTERFACE_LINK_LIBRARIES property
|
||||
# of the imported target for <component> will be set to contain the imported
|
||||
# targets for the components listed in <name>_<component>_component_deps.
|
||||
# <component>_FOUND will also be set to false if any of the components in
|
||||
# <name>_<component>_component_deps are not found. This requires the components
|
||||
# in <name>_<component>_component_deps to be listed before <component> in the
|
||||
# COMPONENTS argument.
|
||||
#
|
||||
# The following variables will be set:
|
||||
#
|
||||
# ``<name>_TARGETS``
|
||||
# the imported targets
|
||||
# ``<name>_LIBRARIES``
|
||||
# the found libraries
|
||||
# ``<name>_INCLUDE_DIRS``
|
||||
# the combined required include directories for the components
|
||||
# ``<name>_DEFINITIONS``
|
||||
# the "other" CFLAGS provided by pkg-config, if any
|
||||
# ``<name>_VERSION``
|
||||
# the value of ``<name>_<component>_VERSION`` for the first component that
|
||||
# has this variable set (note that components are searched for in the order
|
||||
# they are passed to the macro), although if it is already set, it will not
|
||||
# be altered
|
||||
#
|
||||
# Note that these variables are never cleared, so if
|
||||
# ecm_find_package_handle_library_components() is called multiple times with
|
||||
# different components (typically because of multiple find_package() calls) then
|
||||
# ``<name>_TARGETS``, for example, will contain all the targets found in any
|
||||
# call (although no duplicates).
|
||||
#
|
||||
# Since pre-1.0.0.
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2014 Alex Merry <alex.merry@kde.org>
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
macro(ecm_find_package_version_check module_name)
|
||||
if(CMAKE_VERSION VERSION_LESS 2.8.12)
|
||||
message(FATAL_ERROR "CMake 2.8.12 is required by Find${module_name}.cmake")
|
||||
endif()
|
||||
if(CMAKE_MINIMUM_REQUIRED_VERSION VERSION_LESS 2.8.12)
|
||||
message(AUTHOR_WARNING "Your project should require at least CMake 2.8.12 to use Find${module_name}.cmake")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(ecm_find_package_parse_components module_name)
|
||||
set(ecm_fppc_options SKIP_DEPENDENCY_HANDLING)
|
||||
set(ecm_fppc_oneValueArgs RESULT_VAR)
|
||||
set(ecm_fppc_multiValueArgs KNOWN_COMPONENTS DEFAULT_COMPONENTS)
|
||||
cmake_parse_arguments(ECM_FPPC "${ecm_fppc_options}" "${ecm_fppc_oneValueArgs}" "${ecm_fppc_multiValueArgs}" ${ARGN})
|
||||
|
||||
if(ECM_FPPC_UNPARSED_ARGUMENTS)
|
||||
message(FATAL_ERROR "Unexpected arguments to ecm_find_package_parse_components: ${ECM_FPPC_UNPARSED_ARGUMENTS}")
|
||||
endif()
|
||||
if(NOT ECM_FPPC_RESULT_VAR)
|
||||
message(FATAL_ERROR "Missing RESULT_VAR argument to ecm_find_package_parse_components")
|
||||
endif()
|
||||
if(NOT ECM_FPPC_KNOWN_COMPONENTS)
|
||||
message(FATAL_ERROR "Missing KNOWN_COMPONENTS argument to ecm_find_package_parse_components")
|
||||
endif()
|
||||
if(NOT ECM_FPPC_DEFAULT_COMPONENTS)
|
||||
set(ECM_FPPC_DEFAULT_COMPONENTS ${ECM_FPPC_KNOWN_COMPONENTS})
|
||||
endif()
|
||||
|
||||
if(${module_name}_FIND_COMPONENTS)
|
||||
set(ecm_fppc_requestedComps ${${module_name}_FIND_COMPONENTS})
|
||||
|
||||
if(NOT ECM_FPPC_SKIP_DEPENDENCY_HANDLING)
|
||||
# Make sure deps are included
|
||||
foreach(ecm_fppc_comp ${ecm_fppc_requestedComps})
|
||||
foreach(ecm_fppc_dep_comp ${${module_name}_${ecm_fppc_comp}_component_deps})
|
||||
list(FIND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}" ecm_fppc_index)
|
||||
if("${ecm_fppc_index}" STREQUAL "-1")
|
||||
if(NOT ${module_name}_FIND_QUIETLY)
|
||||
message(STATUS "${module_name}: ${ecm_fppc_comp} requires ${${module_name}_${ecm_fppc_comp}_component_deps}")
|
||||
endif()
|
||||
list(APPEND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}")
|
||||
endif()
|
||||
endforeach()
|
||||
endforeach()
|
||||
else()
|
||||
message(STATUS "Skipping dependency handling for ${module_name}")
|
||||
endif()
|
||||
list(REMOVE_DUPLICATES ecm_fppc_requestedComps)
|
||||
|
||||
# This makes sure components are listed in the same order as
|
||||
# KNOWN_COMPONENTS (potentially important for inter-dependencies)
|
||||
set(${ECM_FPPC_RESULT_VAR})
|
||||
foreach(ecm_fppc_comp ${ECM_FPPC_KNOWN_COMPONENTS})
|
||||
list(FIND ecm_fppc_requestedComps "${ecm_fppc_comp}" ecm_fppc_index)
|
||||
if(NOT "${ecm_fppc_index}" STREQUAL "-1")
|
||||
list(APPEND ${ECM_FPPC_RESULT_VAR} "${ecm_fppc_comp}")
|
||||
list(REMOVE_AT ecm_fppc_requestedComps ${ecm_fppc_index})
|
||||
endif()
|
||||
endforeach()
|
||||
# if there are any left, they are unknown components
|
||||
if(ecm_fppc_requestedComps)
|
||||
set(ecm_fppc_msgType STATUS)
|
||||
if(${module_name}_FIND_REQUIRED)
|
||||
set(ecm_fppc_msgType FATAL_ERROR)
|
||||
endif()
|
||||
if(NOT ${module_name}_FIND_QUIETLY)
|
||||
message(${ecm_fppc_msgType} "${module_name}: requested unknown components ${ecm_fppc_requestedComps}")
|
||||
endif()
|
||||
return()
|
||||
endif()
|
||||
else()
|
||||
set(${ECM_FPPC_RESULT_VAR} ${ECM_FPPC_DEFAULT_COMPONENTS})
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(ecm_find_package_handle_library_components module_name)
|
||||
set(ecm_fpwc_options SKIP_PKG_CONFIG SKIP_DEPENDENCY_HANDLING)
|
||||
set(ecm_fpwc_oneValueArgs)
|
||||
set(ecm_fpwc_multiValueArgs COMPONENTS)
|
||||
cmake_parse_arguments(ECM_FPWC "${ecm_fpwc_options}" "${ecm_fpwc_oneValueArgs}" "${ecm_fpwc_multiValueArgs}" ${ARGN})
|
||||
|
||||
if(ECM_FPWC_UNPARSED_ARGUMENTS)
|
||||
message(FATAL_ERROR "Unexpected arguments to ecm_find_package_handle_components: ${ECM_FPWC_UNPARSED_ARGUMENTS}")
|
||||
endif()
|
||||
if(NOT ECM_FPWC_COMPONENTS)
|
||||
message(FATAL_ERROR "Missing COMPONENTS argument to ecm_find_package_handle_components")
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package(PkgConfig QUIET)
|
||||
foreach(ecm_fpwc_comp ${ECM_FPWC_COMPONENTS})
|
||||
set(ecm_fpwc_dep_vars)
|
||||
set(ecm_fpwc_dep_targets)
|
||||
if(NOT SKIP_DEPENDENCY_HANDLING)
|
||||
foreach(ecm_fpwc_dep ${${module_name}_${ecm_fpwc_comp}_component_deps})
|
||||
list(APPEND ecm_fpwc_dep_vars "${module_name}_${ecm_fpwc_dep}_FOUND")
|
||||
list(APPEND ecm_fpwc_dep_targets "${module_name}::${ecm_fpwc_dep}")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(NOT ECM_FPWC_SKIP_PKG_CONFIG AND ${module_name}_${ecm_fpwc_comp}_pkg_config)
|
||||
pkg_check_modules(PKG_${module_name}_${ecm_fpwc_comp} QUIET
|
||||
${${module_name}_${ecm_fpwc_comp}_pkg_config})
|
||||
endif()
|
||||
|
||||
find_path(${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
|
||||
NAMES ${${module_name}_${ecm_fpwc_comp}_header}
|
||||
HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES ${${module_name}_${ecm_fpwc_comp}_header_subdir}
|
||||
)
|
||||
find_library(${module_name}_${ecm_fpwc_comp}_LIBRARY
|
||||
NAMES ${${module_name}_${ecm_fpwc_comp}_lib}
|
||||
HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_LIBRARY_DIRS}
|
||||
)
|
||||
|
||||
set(${module_name}_${ecm_fpwc_comp}_VERSION "${PKG_${module_name}_${ecm_fpwc_comp}_VERSION}")
|
||||
if(NOT ${module_name}_VERSION)
|
||||
set(${module_name}_VERSION ${${module_name}_${ecm_fpwc_comp}_VERSION})
|
||||
endif()
|
||||
|
||||
set(_name_mismatched_arg)
|
||||
if(NOT CMAKE_VERSION VERSION_LESS 3.17)
|
||||
set(_name_mismatched_arg NAME_MISMATCHED)
|
||||
endif()
|
||||
find_package_handle_standard_args(${module_name}_${ecm_fpwc_comp}
|
||||
FOUND_VAR
|
||||
${module_name}_${ecm_fpwc_comp}_FOUND
|
||||
REQUIRED_VARS
|
||||
${module_name}_${ecm_fpwc_comp}_LIBRARY
|
||||
${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
|
||||
${ecm_fpwc_dep_vars}
|
||||
VERSION_VAR
|
||||
${module_name}_${ecm_fpwc_comp}_VERSION
|
||||
${_name_mismatched_arg}
|
||||
)
|
||||
|
||||
mark_as_advanced(
|
||||
${module_name}_${ecm_fpwc_comp}_LIBRARY
|
||||
${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
|
||||
)
|
||||
|
||||
if(${module_name}_${ecm_fpwc_comp}_FOUND)
|
||||
list(APPEND ${module_name}_LIBRARIES
|
||||
"${${module_name}_${ecm_fpwc_comp}_LIBRARY}")
|
||||
list(APPEND ${module_name}_INCLUDE_DIRS
|
||||
"${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}")
|
||||
set(${module_name}_DEFINITIONS
|
||||
${${module_name}_DEFINITIONS}
|
||||
${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS})
|
||||
if(NOT TARGET ${module_name}::${ecm_fpwc_comp})
|
||||
add_library(${module_name}::${ecm_fpwc_comp} UNKNOWN IMPORTED)
|
||||
set_target_properties(${module_name}::${ecm_fpwc_comp} PROPERTIES
|
||||
IMPORTED_LOCATION "${${module_name}_${ecm_fpwc_comp}_LIBRARY}"
|
||||
INTERFACE_COMPILE_OPTIONS "${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}"
|
||||
INTERFACE_LINK_LIBRARIES "${ecm_fpwc_dep_targets}"
|
||||
)
|
||||
endif()
|
||||
list(APPEND ${module_name}_TARGETS
|
||||
"${module_name}::${ecm_fpwc_comp}")
|
||||
endif()
|
||||
endforeach()
|
||||
if(${module_name}_LIBRARIES)
|
||||
list(REMOVE_DUPLICATES ${module_name}_LIBRARIES)
|
||||
endif()
|
||||
if(${module_name}_INCLUDE_DIRS)
|
||||
list(REMOVE_DUPLICATES ${module_name}_INCLUDE_DIRS)
|
||||
endif()
|
||||
if(${module_name}_DEFINITIONS)
|
||||
list(REMOVE_DUPLICATES ${module_name}_DEFINITIONS)
|
||||
endif()
|
||||
if(${module_name}_TARGETS)
|
||||
list(REMOVE_DUPLICATES ${module_name}_TARGETS)
|
||||
endif()
|
||||
endmacro()
|
||||
170
libs/tracy/cmake/FindWaylandScanner.cmake
Normal file
@ -0,0 +1,170 @@
|
||||
#.rst:
|
||||
# FindWaylandScanner
|
||||
# ------------------
|
||||
#
|
||||
# Try to find wayland-scanner.
|
||||
#
|
||||
# If the wayland-scanner executable is not in your PATH, you can provide
|
||||
# an alternative name or full path location with the ``WaylandScanner_EXECUTABLE``
|
||||
# variable.
|
||||
#
|
||||
# This will define the following variables:
|
||||
#
|
||||
# ``WaylandScanner_FOUND``
|
||||
# True if wayland-scanner is available.
|
||||
#
|
||||
# ``WaylandScanner_EXECUTABLE``
|
||||
# The wayland-scanner executable.
|
||||
#
|
||||
# If ``WaylandScanner_FOUND`` is TRUE, it will also define the following imported
|
||||
# target:
|
||||
#
|
||||
# ``Wayland::Scanner``
|
||||
# The wayland-scanner executable.
|
||||
#
|
||||
# This module provides the following functions to generate C protocol
|
||||
# implementations:
|
||||
#
|
||||
# - ``ecm_add_wayland_client_protocol``
|
||||
# - ``ecm_add_wayland_server_protocol``
|
||||
#
|
||||
# ::
|
||||
#
|
||||
# ecm_add_wayland_client_protocol(<source_files_var>
|
||||
# PROTOCOL <xmlfile>
|
||||
# BASENAME <basename>)
|
||||
#
|
||||
# Generate Wayland client protocol files from ``<xmlfile>`` XML
|
||||
# definition for the ``<basename>`` interface and append those files
|
||||
# to ``<source_files_var>``.
|
||||
#
|
||||
# ::
|
||||
#
|
||||
# ecm_add_wayland_server_protocol(<source_files_var>
|
||||
# PROTOCOL <xmlfile>
|
||||
# BASENAME <basename>)
|
||||
#
|
||||
# Generate Wayland server protocol files from ``<xmlfile>`` XML
|
||||
# definition for the ``<basename>`` interface and append those files
|
||||
# to ``<source_files_var>``.
|
||||
#
|
||||
# Since 1.4.0.
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2012-2014 Pier Luigi Fiorini <pierluigi.fiorini@gmail.com>
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#=============================================================================
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/ECMFindModuleHelpers.cmake)
|
||||
|
||||
ecm_find_package_version_check(WaylandScanner)
|
||||
|
||||
# Find wayland-scanner
|
||||
find_program(WaylandScanner_EXECUTABLE NAMES wayland-scanner)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(WaylandScanner
|
||||
FOUND_VAR
|
||||
WaylandScanner_FOUND
|
||||
REQUIRED_VARS
|
||||
WaylandScanner_EXECUTABLE
|
||||
)
|
||||
|
||||
mark_as_advanced(WaylandScanner_EXECUTABLE)
|
||||
|
||||
if(NOT TARGET Wayland::Scanner AND WaylandScanner_FOUND)
|
||||
add_executable(Wayland::Scanner IMPORTED)
|
||||
set_target_properties(Wayland::Scanner PROPERTIES
|
||||
IMPORTED_LOCATION "${WaylandScanner_EXECUTABLE}"
|
||||
)
|
||||
endif()
|
||||
|
||||
include(FeatureSummary)
|
||||
set_package_properties(WaylandScanner PROPERTIES
|
||||
URL "https://wayland.freedesktop.org/"
|
||||
DESCRIPTION "Executable that converts XML protocol files to C code"
|
||||
)
|
||||
|
||||
function(ecm_add_wayland_client_protocol out_var)
|
||||
# Parse arguments
|
||||
set(oneValueArgs PROTOCOL BASENAME)
|
||||
cmake_parse_arguments(ARGS "" "${oneValueArgs}" "" ${ARGN})
|
||||
|
||||
if(ARGS_UNPARSED_ARGUMENTS)
|
||||
message(FATAL_ERROR "Unknown keywords given to ecm_add_wayland_client_protocol(): \"${ARGS_UNPARSED_ARGUMENTS}\"")
|
||||
endif()
|
||||
|
||||
get_filename_component(_infile ${ARGS_PROTOCOL} ABSOLUTE)
|
||||
set(_client_header "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-client-protocol.h")
|
||||
set(_code "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-protocol.c")
|
||||
|
||||
set_source_files_properties(${_client_header} GENERATED)
|
||||
set_source_files_properties(${_code} GENERATED)
|
||||
set_property(SOURCE ${_client_header} PROPERTY SKIP_AUTOMOC ON)
|
||||
|
||||
add_custom_command(OUTPUT "${_client_header}"
|
||||
COMMAND ${WaylandScanner_EXECUTABLE} client-header ${_infile} ${_client_header}
|
||||
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile}
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
add_custom_command(OUTPUT "${_code}"
|
||||
COMMAND ${WaylandScanner_EXECUTABLE} private-code ${_infile} ${_code}
|
||||
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile} ${_client_header}
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
list(APPEND ${out_var} "${_client_header}" "${_code}")
|
||||
set(${out_var} ${${out_var}} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
|
||||
function(ecm_add_wayland_server_protocol out_var)
|
||||
# Parse arguments
|
||||
set(oneValueArgs PROTOCOL BASENAME)
|
||||
cmake_parse_arguments(ARGS "" "${oneValueArgs}" "" ${ARGN})
|
||||
|
||||
if(ARGS_UNPARSED_ARGUMENTS)
|
||||
message(FATAL_ERROR "Unknown keywords given to ecm_add_wayland_server_protocol(): \"${ARGS_UNPARSED_ARGUMENTS}\"")
|
||||
endif()
|
||||
|
||||
ecm_add_wayland_client_protocol(${out_var}
|
||||
PROTOCOL ${ARGS_PROTOCOL}
|
||||
BASENAME ${ARGS_BASENAME})
|
||||
|
||||
get_filename_component(_infile ${ARGS_PROTOCOL} ABSOLUTE)
|
||||
set(_server_header "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-server-protocol.h")
|
||||
set_property(SOURCE ${_server_header} PROPERTY SKIP_AUTOMOC ON)
|
||||
set_source_files_properties(${_server_header} GENERATED)
|
||||
|
||||
add_custom_command(OUTPUT "${_server_header}"
|
||||
COMMAND ${WaylandScanner_EXECUTABLE} server-header ${_infile} ${_server_header}
|
||||
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile}
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
list(APPEND ${out_var} "${_server_header}")
|
||||
set(${out_var} ${${out_var}} PARENT_SCOPE)
|
||||
endfunction()
|
||||
56
libs/tracy/cmake/config.cmake
Normal file
@ -0,0 +1,56 @@
|
||||
if (NOT NO_ISA_EXTENSIONS)
|
||||
include(CheckCXXCompilerFlag)
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
CHECK_CXX_COMPILER_FLAG("-mcpu=native" COMPILER_SUPPORTS_MCPU_NATIVE)
|
||||
if(COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=native")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=native")
|
||||
endif()
|
||||
else()
|
||||
CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
if(COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
|
||||
endif()
|
||||
endif()
|
||||
if(WIN32)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT LEGACY)
|
||||
set(USE_WAYLAND ON)
|
||||
else()
|
||||
set(USE_WAYLAND OFF)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN)
|
||||
add_compile_options(/MP)
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN)
|
||||
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
|
||||
endif()
|
||||
|
||||
if(EMSCRIPTEN)
|
||||
add_compile_options(-pthread)
|
||||
add_link_options(-pthread)
|
||||
endif()
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT EMSCRIPTEN)
|
||||
find_program(MOLD_LINKER mold)
|
||||
if(MOLD_LINKER)
|
||||
set(CMAKE_LINKER_TYPE "MOLD")
|
||||
endif()
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-eliminate-unused-debug-types")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-eliminate-unused-debug-types")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
file(GENERATE OUTPUT .gitignore CONTENT "*")
|
||||
39
libs/tracy/cmake/server.cmake
Normal file
@ -0,0 +1,39 @@
|
||||
set(TRACY_COMMON_DIR ${CMAKE_CURRENT_LIST_DIR}/../public/common)
|
||||
|
||||
set(TRACY_COMMON_SOURCES
|
||||
tracy_lz4.cpp
|
||||
tracy_lz4hc.cpp
|
||||
TracySocket.cpp
|
||||
TracyStackFrames.cpp
|
||||
TracySystem.cpp
|
||||
)
|
||||
|
||||
list(TRANSFORM TRACY_COMMON_SOURCES PREPEND "${TRACY_COMMON_DIR}/")
|
||||
|
||||
|
||||
set(TRACY_SERVER_DIR ${CMAKE_CURRENT_LIST_DIR}/../server)
|
||||
|
||||
set(TRACY_SERVER_SOURCES
|
||||
TracyMemory.cpp
|
||||
TracyMmap.cpp
|
||||
TracyPrint.cpp
|
||||
TracySysUtil.cpp
|
||||
TracyTaskDispatch.cpp
|
||||
TracyTextureCompression.cpp
|
||||
TracyThreadCompress.cpp
|
||||
TracyWorker.cpp
|
||||
)
|
||||
|
||||
list(TRANSFORM TRACY_SERVER_SOURCES PREPEND "${TRACY_SERVER_DIR}/")
|
||||
|
||||
|
||||
add_library(TracyServer STATIC ${TRACY_COMMON_SOURCES} ${TRACY_SERVER_SOURCES})
|
||||
target_include_directories(TracyServer PUBLIC ${TRACY_COMMON_DIR} ${TRACY_SERVER_DIR})
|
||||
target_link_libraries(TracyServer PUBLIC TracyCapstone TracyZstd)
|
||||
if(NO_STATISTICS)
|
||||
target_compile_definitions(TracyServer PUBLIC TRACY_NO_STATISTICS)
|
||||
endif()
|
||||
|
||||
if(NOT NO_PARALLEL_STL AND UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
|
||||
target_link_libraries(TracyServer PRIVATE TracyTbb)
|
||||
endif()
|
||||
238
libs/tracy/cmake/vendor.cmake
Normal file
@ -0,0 +1,238 @@
|
||||
# Vendor Specific CMake
|
||||
# The Tracy project keeps most vendor source locally
|
||||
|
||||
set (ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}/../")
|
||||
|
||||
# Dependencies are taken from the system first and if not found, they are pulled with CPM and built from source
|
||||
|
||||
include(FindPkgConfig)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/CPM.cmake)
|
||||
|
||||
option(DOWNLOAD_CAPSTONE "Force download capstone" ON)
|
||||
option(DOWNLOAD_GLFW "Force download glfw" OFF)
|
||||
option(DOWNLOAD_FREETYPE "Force download freetype" OFF)
|
||||
|
||||
# capstone
|
||||
|
||||
pkg_check_modules(CAPSTONE capstone)
|
||||
if(CAPSTONE_FOUND AND NOT DOWNLOAD_CAPSTONE)
|
||||
message(STATUS "Capstone found: ${CAPSTONE}")
|
||||
add_library(TracyCapstone INTERFACE)
|
||||
target_include_directories(TracyCapstone INTERFACE ${CAPSTONE_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyCapstone INTERFACE ${CAPSTONE_LINK_LIBRARIES})
|
||||
else()
|
||||
CPMAddPackage(
|
||||
NAME capstone
|
||||
GITHUB_REPOSITORY capstone-engine/capstone
|
||||
GIT_TAG 5.0.3
|
||||
)
|
||||
add_library(TracyCapstone INTERFACE)
|
||||
target_include_directories(TracyCapstone INTERFACE ${capstone_SOURCE_DIR}/include/capstone)
|
||||
target_link_libraries(TracyCapstone INTERFACE capstone)
|
||||
endif()
|
||||
|
||||
# GLFW
|
||||
|
||||
if(NOT USE_WAYLAND AND NOT EMSCRIPTEN)
|
||||
pkg_check_modules(GLFW glfw3)
|
||||
if (GLFW_FOUND AND NOT DOWNLOAD_GLFW)
|
||||
add_library(TracyGlfw3 INTERFACE)
|
||||
target_include_directories(TracyGlfw3 INTERFACE ${GLFW_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyGlfw3 INTERFACE ${GLFW_LINK_LIBRARIES})
|
||||
else()
|
||||
CPMAddPackage(
|
||||
NAME glfw
|
||||
GITHUB_REPOSITORY glfw/glfw
|
||||
GIT_TAG 3.4
|
||||
OPTIONS
|
||||
"GLFW_BUILD_EXAMPLES OFF"
|
||||
"GLFW_BUILD_TESTS OFF"
|
||||
"GLFW_BUILD_DOCS OFF"
|
||||
"GLFW_INSTALL OFF"
|
||||
)
|
||||
add_library(TracyGlfw3 INTERFACE)
|
||||
target_link_libraries(TracyGlfw3 INTERFACE glfw)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# freetype
|
||||
|
||||
pkg_check_modules(FREETYPE freetype2)
|
||||
if (FREETYPE_FOUND AND NOT DOWNLOAD_FREETYPE)
|
||||
add_library(TracyFreetype INTERFACE)
|
||||
target_include_directories(TracyFreetype INTERFACE ${FREETYPE_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyFreetype INTERFACE ${FREETYPE_LINK_LIBRARIES})
|
||||
else()
|
||||
CPMAddPackage(
|
||||
NAME freetype
|
||||
GITHUB_REPOSITORY freetype/freetype
|
||||
GIT_TAG VER-2-13-2
|
||||
OPTIONS
|
||||
"FT_DISABLE_HARFBUZZ ON"
|
||||
"FT_WITH_HARFBUZZ OFF"
|
||||
)
|
||||
add_library(TracyFreetype INTERFACE)
|
||||
target_link_libraries(TracyFreetype INTERFACE freetype)
|
||||
endif()
|
||||
|
||||
# zstd
|
||||
|
||||
set(ZSTD_DIR "${ROOT_DIR}/zstd")
|
||||
|
||||
set(ZSTD_SOURCES
|
||||
decompress/zstd_ddict.c
|
||||
decompress/zstd_decompress_block.c
|
||||
decompress/huf_decompress.c
|
||||
decompress/zstd_decompress.c
|
||||
common/zstd_common.c
|
||||
common/error_private.c
|
||||
common/xxhash.c
|
||||
common/entropy_common.c
|
||||
common/debug.c
|
||||
common/threading.c
|
||||
common/pool.c
|
||||
common/fse_decompress.c
|
||||
compress/zstd_ldm.c
|
||||
compress/zstd_compress_superblock.c
|
||||
compress/zstd_opt.c
|
||||
compress/zstd_compress_sequences.c
|
||||
compress/fse_compress.c
|
||||
compress/zstd_double_fast.c
|
||||
compress/zstd_compress.c
|
||||
compress/zstd_compress_literals.c
|
||||
compress/hist.c
|
||||
compress/zstdmt_compress.c
|
||||
compress/zstd_lazy.c
|
||||
compress/huf_compress.c
|
||||
compress/zstd_fast.c
|
||||
dictBuilder/zdict.c
|
||||
dictBuilder/cover.c
|
||||
dictBuilder/divsufsort.c
|
||||
dictBuilder/fastcover.c
|
||||
)
|
||||
|
||||
list(TRANSFORM ZSTD_SOURCES PREPEND "${ZSTD_DIR}/")
|
||||
|
||||
set_property(SOURCE ${ZSTD_DIR}/decompress/huf_decompress_amd64.S APPEND PROPERTY COMPILE_OPTIONS "-x" "assembler-with-cpp")
|
||||
|
||||
add_library(TracyZstd STATIC ${ZSTD_SOURCES})
|
||||
target_include_directories(TracyZstd PUBLIC ${ZSTD_DIR})
|
||||
target_compile_definitions(TracyZstd PRIVATE ZSTD_DISABLE_ASM)
|
||||
|
||||
|
||||
# Diff Template Library
|
||||
|
||||
set(DTL_DIR "${ROOT_DIR}/dtl")
|
||||
file(GLOB_RECURSE DTL_HEADERS CONFIGURE_DEPENDS RELATIVE ${DTL_DIR} "*.hpp")
|
||||
add_library(TracyDtl INTERFACE)
|
||||
target_sources(TracyDtl INTERFACE ${DTL_HEADERS})
|
||||
target_include_directories(TracyDtl INTERFACE ${DTL_DIR})
|
||||
|
||||
|
||||
# Get Opt
|
||||
|
||||
set(GETOPT_DIR "${ROOT_DIR}/getopt")
|
||||
set(GETOPT_SOURCES ${GETOPT_DIR}/getopt.c)
|
||||
set(GETOPT_HEADERS ${GETOPT_DIR}/getopt.h)
|
||||
add_library(TracyGetOpt STATIC ${GETOPT_SOURCES} ${GETOPT_HEADERS})
|
||||
target_include_directories(TracyGetOpt PUBLIC ${GETOPT_DIR})
|
||||
|
||||
|
||||
# ImGui
|
||||
|
||||
set(IMGUI_DIR "${ROOT_DIR}/imgui")
|
||||
|
||||
set(IMGUI_SOURCES
|
||||
imgui_widgets.cpp
|
||||
imgui_draw.cpp
|
||||
imgui_demo.cpp
|
||||
imgui.cpp
|
||||
imgui_tables.cpp
|
||||
misc/freetype/imgui_freetype.cpp
|
||||
)
|
||||
|
||||
list(TRANSFORM IMGUI_SOURCES PREPEND "${IMGUI_DIR}/")
|
||||
|
||||
add_definitions(-DIMGUI_ENABLE_FREETYPE)
|
||||
|
||||
add_library(TracyImGui STATIC ${IMGUI_SOURCES})
|
||||
target_include_directories(TracyImGui PUBLIC ${IMGUI_DIR})
|
||||
target_link_libraries(TracyImGui PUBLIC TracyFreetype)
|
||||
|
||||
# NFD
|
||||
|
||||
if (NOT NO_FILESELECTOR AND NOT EMSCRIPTEN)
|
||||
set(NFD_DIR "${ROOT_DIR}/nfd")
|
||||
|
||||
if (WIN32)
|
||||
set(NFD_SOURCES "${NFD_DIR}/nfd_win.cpp")
|
||||
elseif (APPLE)
|
||||
set(NFD_SOURCES "${NFD_DIR}/nfd_cocoa.m")
|
||||
else()
|
||||
if (GTK_FILESELECTOR)
|
||||
set(NFD_SOURCES "${NFD_DIR}/nfd_gtk.cpp")
|
||||
else()
|
||||
set(NFD_SOURCES "${NFD_DIR}/nfd_portal.cpp")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE NFD_HEADERS CONFIGURE_DEPENDS RELATIVE ${NFD_DIR} "*.h")
|
||||
add_library(TracyNfd STATIC ${NFD_SOURCES} ${NFD_HEADERS})
|
||||
target_include_directories(TracyNfd PUBLIC ${NFD_DIR})
|
||||
|
||||
if (APPLE)
|
||||
find_library(APPKIT_LIBRARY AppKit)
|
||||
find_library(UNIFORMTYPEIDENTIFIERS_LIBRARY UniformTypeIdentifiers)
|
||||
target_link_libraries(TracyNfd PUBLIC ${APPKIT_LIBRARY} ${UNIFORMTYPEIDENTIFIERS_LIBRARY})
|
||||
elseif (UNIX)
|
||||
if (GTK_FILESELECTOR)
|
||||
pkg_check_modules(GTK3 gtk+-3.0)
|
||||
if (NOT GTK3_FOUND)
|
||||
message(FATAL_ERROR "GTK3 not found. Please install it or set TRACY_GTK_FILESELECTOR to OFF.")
|
||||
endif()
|
||||
add_library(TracyGtk3 INTERFACE)
|
||||
target_include_directories(TracyGtk3 INTERFACE ${GTK3_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyGtk3 INTERFACE ${GTK3_LINK_LIBRARIES})
|
||||
target_link_libraries(TracyNfd PUBLIC TracyGtk3)
|
||||
else()
|
||||
pkg_check_modules(DBUS dbus-1)
|
||||
if (NOT DBUS_FOUND)
|
||||
message(FATAL_ERROR "D-Bus not found. Please install it or set TRACY_GTK_FILESELECTOR to ON.")
|
||||
endif()
|
||||
add_library(TracyDbus INTERFACE)
|
||||
target_include_directories(TracyDbus INTERFACE ${DBUS_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyDbus INTERFACE ${DBUS_LINK_LIBRARIES})
|
||||
target_link_libraries(TracyNfd PUBLIC TracyDbus)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# TBB
|
||||
if (NO_PARALLEL_STL)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNO_PARALLEL_SORT")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_PARALLEL_SORT")
|
||||
else()
|
||||
if (UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
|
||||
# Tracy does not use TBB directly, but the implementation of parallel algorithms
|
||||
# in some versions of libstdc++ depends on TBB. When it does, you must
|
||||
# explicitly link against -ltbb.
|
||||
#
|
||||
# Some distributions have pgk-config files for TBB, others don't.
|
||||
|
||||
pkg_check_modules(TBB tbb)
|
||||
if (TBB_FOUND)
|
||||
add_library(TracyTbb INTERFACE)
|
||||
target_include_directories(TracyTbb INTERFACE ${TBB_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyTbb INTERFACE ${TBB_LINK_LIBRARIES})
|
||||
else()
|
||||
CPMAddPackage(
|
||||
NAME tbb
|
||||
GITHUB_REPOSITORY oneapi-src/oneTBB
|
||||
GIT_TAG v2021.12.0-rc2
|
||||
OPTIONS "TBB_TEST OFF"
|
||||
)
|
||||
add_library(TracyTbb INTERFACE)
|
||||
target_link_libraries(TracyTbb INTERFACE tbb)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
24
libs/tracy/cmake/version.cmake
Normal file
@ -0,0 +1,24 @@
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
message("Parsing public/common/TracyVersion.hpp file")
|
||||
|
||||
file(READ "${CMAKE_CURRENT_LIST_DIR}/../public/common/TracyVersion.hpp" version)
|
||||
|
||||
# Note: This looks for a specific pattern in TracyVersion.hpp, if it changes
|
||||
# this needs updating.
|
||||
string(REGEX MATCH "Major = ([0-9]+)" _ ${version})
|
||||
|
||||
# This works do to the above () subexpression selection. See
|
||||
# https://cmake.org/cmake/help/latest/command/string.html#regex-match for more
|
||||
# details
|
||||
set(TRACY_VERSION_MAJOR ${CMAKE_MATCH_1})
|
||||
|
||||
string(REGEX MATCH "Minor = ([0-9]+)" _ ${version})
|
||||
set(TRACY_VERSION_MINOR ${CMAKE_MATCH_1})
|
||||
|
||||
string(REGEX MATCH "Patch = ([0-9]+)" _ ${version})
|
||||
set(TRACY_VERSION_PATCH ${CMAKE_MATCH_1})
|
||||
|
||||
set(TRACY_VERSION_STRING "${TRACY_VERSION_MAJOR}.${TRACY_VERSION_MINOR}.${TRACY_VERSION_PATCH}")
|
||||
|
||||
message("VERSION ${TRACY_VERSION_STRING}")
|
||||
28
libs/tracy/csvexport/CMakeLists.txt
Normal file
@ -0,0 +1,28 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
|
||||
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
|
||||
|
||||
set(NO_STATISTICS OFF)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
|
||||
project(
|
||||
tracy-csvexport
|
||||
LANGUAGES C CXX
|
||||
VERSION ${TRACY_VERSION_STRING}
|
||||
)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/config.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/vendor.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/server.cmake)
|
||||
|
||||
set(PROGRAM_FILES
|
||||
src/csvexport.cpp
|
||||
)
|
||||
|
||||
add_executable(${PROJECT_NAME} ${PROGRAM_FILES} ${COMMON_FILES} ${SERVER_FILES})
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE TracyServer TracyGetOpt)
|
||||
set_property(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})
|
||||
353
libs/tracy/csvexport/src/csvexport.cpp
Normal file
@ -0,0 +1,353 @@
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../../server/TracyFileRead.hpp"
|
||||
#include "../../server/TracyWorker.hpp"
|
||||
#include "../../getopt/getopt.h"
|
||||
|
||||
void print_usage_exit(int e)
|
||||
{
|
||||
fprintf(stderr, "Extract statistics from a trace to a CSV format\n");
|
||||
fprintf(stderr, "Usage:\n");
|
||||
fprintf(stderr, " extract [OPTION...] <trace file>\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " -h, --help Print usage\n");
|
||||
fprintf(stderr, " -f, --filter arg Filter zone names (default: "")\n");
|
||||
fprintf(stderr, " -s, --sep arg CSV separator (default: ,)\n");
|
||||
fprintf(stderr, " -c, --case Case sensitive filtering\n");
|
||||
fprintf(stderr, " -e, --self Get self times\n");
|
||||
fprintf(stderr, " -u, --unwrap Report each zone event\n");
|
||||
fprintf(stderr, " -m, --messages Report only messages\n");
|
||||
|
||||
exit(e);
|
||||
}
|
||||
|
||||
struct Args {
|
||||
const char* filter;
|
||||
const char* separator;
|
||||
const char* trace_file;
|
||||
bool case_sensitive;
|
||||
bool self_time;
|
||||
bool unwrap;
|
||||
bool unwrapMessages;
|
||||
};
|
||||
|
||||
Args parse_args(int argc, char** argv)
|
||||
{
|
||||
if (argc == 1)
|
||||
{
|
||||
print_usage_exit(1);
|
||||
}
|
||||
|
||||
Args args = { "", ",", "", false, false, false, false };
|
||||
|
||||
struct option long_opts[] = {
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "filter", optional_argument, NULL, 'f' },
|
||||
{ "sep", optional_argument, NULL, 's' },
|
||||
{ "case", no_argument, NULL, 'c' },
|
||||
{ "self", no_argument, NULL, 'e' },
|
||||
{ "unwrap", no_argument, NULL, 'u' },
|
||||
{ "messages", no_argument, NULL, 'm' },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
int c;
|
||||
while ((c = getopt_long(argc, argv, "hf:s:ceum", long_opts, NULL)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'h':
|
||||
print_usage_exit(0);
|
||||
break;
|
||||
case 'f':
|
||||
args.filter = optarg;
|
||||
break;
|
||||
case 's':
|
||||
args.separator = optarg;
|
||||
break;
|
||||
case 'c':
|
||||
args.case_sensitive = true;
|
||||
break;
|
||||
case 'e':
|
||||
args.self_time = true;
|
||||
break;
|
||||
case 'u':
|
||||
args.unwrap = true;
|
||||
break;
|
||||
case 'm':
|
||||
args.unwrapMessages = true;
|
||||
break;
|
||||
default:
|
||||
print_usage_exit(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (argc != optind + 1)
|
||||
{
|
||||
print_usage_exit(1);
|
||||
}
|
||||
|
||||
args.trace_file = argv[optind];
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
bool is_substring(
|
||||
const char* term,
|
||||
const char* s,
|
||||
bool case_sensitive = false
|
||||
){
|
||||
auto new_term = std::string(term);
|
||||
auto new_s = std::string(s);
|
||||
|
||||
if (!case_sensitive) {
|
||||
std::transform(
|
||||
new_term.begin(),
|
||||
new_term.end(),
|
||||
new_term.begin(),
|
||||
[](unsigned char c){ return std::tolower(c); }
|
||||
);
|
||||
|
||||
std::transform(
|
||||
new_s.begin(),
|
||||
new_s.end(),
|
||||
new_s.begin(),
|
||||
[](unsigned char c){ return std::tolower(c); }
|
||||
);
|
||||
}
|
||||
|
||||
return new_s.find(new_term) != std::string::npos;
|
||||
}
|
||||
|
||||
const char* get_name(int32_t id, const tracy::Worker& worker)
|
||||
{
|
||||
auto& srcloc = worker.GetSourceLocation(id);
|
||||
return worker.GetString(srcloc.name.active ? srcloc.name : srcloc.function);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string join(const T& v, const char* sep) {
|
||||
std::ostringstream s;
|
||||
for (const auto& i : v) {
|
||||
if (&i != &v[0]) {
|
||||
s << sep;
|
||||
}
|
||||
s << i;
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
|
||||
// From TracyView.cpp
|
||||
int64_t GetZoneChildTimeFast(
|
||||
const tracy::Worker& worker,
|
||||
const tracy::ZoneEvent& zone
|
||||
){
|
||||
int64_t time = 0;
|
||||
if( zone.HasChildren() )
|
||||
{
|
||||
auto& children = worker.GetZoneChildren( zone.Child() );
|
||||
if( children.is_magic() )
|
||||
{
|
||||
auto& vec = *(tracy::Vector<tracy::ZoneEvent>*)&children;
|
||||
for( auto& v : vec )
|
||||
{
|
||||
assert( v.IsEndValid() );
|
||||
time += v.End() - v.Start();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( auto& v : children )
|
||||
{
|
||||
assert( v->IsEndValid() );
|
||||
time += v->End() - v->Start();
|
||||
}
|
||||
}
|
||||
}
|
||||
return time;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if (!AttachConsole(ATTACH_PARENT_PROCESS))
|
||||
{
|
||||
AllocConsole();
|
||||
SetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), 0x07);
|
||||
}
|
||||
#endif
|
||||
|
||||
Args args = parse_args(argc, argv);
|
||||
|
||||
auto f = std::unique_ptr<tracy::FileRead>(
|
||||
tracy::FileRead::Open(args.trace_file)
|
||||
);
|
||||
if (!f)
|
||||
{
|
||||
fprintf(stderr, "Could not open file %s\n", args.trace_file);
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto worker = tracy::Worker(*f);
|
||||
|
||||
if (args.unwrapMessages)
|
||||
{
|
||||
const auto& msgs = worker.GetMessages();
|
||||
|
||||
if (msgs.size() > 0)
|
||||
{
|
||||
std::vector<const char*> columnsForMessages;
|
||||
columnsForMessages = {
|
||||
"MessageName", "total_ns"
|
||||
};
|
||||
std::string headerForMessages = join(columnsForMessages, args.separator);
|
||||
printf("%s\n", headerForMessages.data());
|
||||
|
||||
for(auto& it : msgs)
|
||||
{
|
||||
std::vector<std::string> values(columnsForMessages.size());
|
||||
|
||||
values[0] = worker.GetString(it->ref);
|
||||
values[1] = std::to_string(it->time);
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("There are currently no messages!\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (!worker.AreSourceLocationZonesReady())
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
|
||||
auto& slz = worker.GetSourceLocationZones();
|
||||
tracy::Vector<decltype(slz.begin())> slz_selected;
|
||||
slz_selected.reserve(slz.size());
|
||||
|
||||
uint32_t total_cnt = 0;
|
||||
for(auto it = slz.begin(); it != slz.end(); ++it)
|
||||
{
|
||||
if(it->second.total != 0)
|
||||
{
|
||||
++total_cnt;
|
||||
if(args.filter[0] == '\0')
|
||||
{
|
||||
slz_selected.push_back_no_space_check(it);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto name = get_name(it->first, worker);
|
||||
if(is_substring(args.filter, name, args.case_sensitive))
|
||||
{
|
||||
slz_selected.push_back_no_space_check(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const char*> columns;
|
||||
if (args.unwrap)
|
||||
{
|
||||
columns = {
|
||||
"name", "src_file", "src_line", "ns_since_start", "exec_time_ns", "thread"
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
columns = {
|
||||
"name", "src_file", "src_line", "total_ns", "total_perc",
|
||||
"counts", "mean_ns", "min_ns", "max_ns", "std_ns"
|
||||
};
|
||||
}
|
||||
std::string header = join(columns, args.separator);
|
||||
printf("%s\n", header.data());
|
||||
|
||||
const auto last_time = worker.GetLastTime();
|
||||
for(auto& it : slz_selected)
|
||||
{
|
||||
std::vector<std::string> values(columns.size());
|
||||
|
||||
values[0] = get_name(it->first, worker);
|
||||
|
||||
const auto& srcloc = worker.GetSourceLocation(it->first);
|
||||
values[1] = worker.GetString(srcloc.file);
|
||||
values[2] = std::to_string(srcloc.line);
|
||||
|
||||
const auto& zone_data = it->second;
|
||||
|
||||
if (args.unwrap)
|
||||
{
|
||||
int i = 0;
|
||||
for (const auto& zone_thread_data : zone_data.zones) {
|
||||
const auto zone_event = zone_thread_data.Zone();
|
||||
const auto tId = zone_thread_data.Thread();
|
||||
const auto start = zone_event->Start();
|
||||
const auto end = zone_event->End();
|
||||
|
||||
values[3] = std::to_string(start);
|
||||
|
||||
auto timespan = end - start;
|
||||
if (args.self_time) {
|
||||
timespan -= GetZoneChildTimeFast(worker, *zone_event);
|
||||
}
|
||||
values[4] = std::to_string(timespan);
|
||||
values[5] = std::to_string(tId);
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto time = args.self_time ? zone_data.selfTotal : zone_data.total;
|
||||
values[3] = std::to_string(time);
|
||||
values[4] = std::to_string(100. * time / last_time);
|
||||
|
||||
values[5] = std::to_string(zone_data.zones.size());
|
||||
|
||||
const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total)
|
||||
/ zone_data.zones.size();
|
||||
values[6] = std::to_string(avg);
|
||||
|
||||
const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min;
|
||||
const auto tmax = args.self_time ? zone_data.selfMax : zone_data.max;
|
||||
values[7] = std::to_string(tmin);
|
||||
values[8] = std::to_string(tmax);
|
||||
|
||||
const auto sz = zone_data.zones.size();
|
||||
const auto ss = zone_data.sumSq
|
||||
- 2. * zone_data.total * avg
|
||||
+ avg * avg * sz;
|
||||
double std = 0;
|
||||
if( sz > 1 )
|
||||
std = sqrt(ss / (sz - 1));
|
||||
values[9] = std::to_string(std);
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
BIN
libs/tracy/doc/profiler.png
Normal file
|
After Width: | Height: | Size: 213 KiB |
BIN
libs/tracy/doc/profiler2.png
Normal file
|
After Width: | Height: | Size: 250 KiB |
BIN
libs/tracy/doc/profiler3.png
Normal file
|
After Width: | Height: | Size: 234 KiB |
706
libs/tracy/dtl/Diff.hpp
Normal file
@ -0,0 +1,706 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_DIFF_H
|
||||
#define DTL_DIFF_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* diff class template
|
||||
* sequence must support random_access_iterator.
|
||||
*/
|
||||
template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
|
||||
class Diff
|
||||
{
|
||||
private :
|
||||
dtl_typedefs(elem, sequence)
|
||||
sequence A;
|
||||
sequence B;
|
||||
size_t M;
|
||||
size_t N;
|
||||
size_t delta;
|
||||
size_t offset;
|
||||
long long *fp;
|
||||
long long editDistance;
|
||||
Lcs< elem > lcs;
|
||||
Ses< elem > ses;
|
||||
editPath path;
|
||||
editPathCordinates pathCordinates;
|
||||
bool swapped;
|
||||
bool huge;
|
||||
bool trivial;
|
||||
bool editDistanceOnly;
|
||||
uniHunkVec uniHunks;
|
||||
comparator cmp;
|
||||
long long ox;
|
||||
long long oy;
|
||||
public :
|
||||
Diff () {}
|
||||
|
||||
Diff (const sequence& a,
|
||||
const sequence& b) : A(a), B(b), ses(false) {
|
||||
init();
|
||||
}
|
||||
|
||||
Diff (const sequence& a,
|
||||
const sequence& b,
|
||||
bool deletesFirst) : A(a), B(b), ses(deletesFirst) {
|
||||
init();
|
||||
}
|
||||
|
||||
Diff (const sequence& a,
|
||||
const sequence& b,
|
||||
const comparator& comp) : A(a), B(b), ses(false), cmp(comp) {
|
||||
init();
|
||||
}
|
||||
|
||||
Diff (const sequence& a,
|
||||
const sequence& b,
|
||||
bool deleteFirst,
|
||||
const comparator& comp) : A(a), B(b), ses(deleteFirst), cmp(comp) {
|
||||
init();
|
||||
}
|
||||
|
||||
~Diff() {}
|
||||
|
||||
long long getEditDistance () const {
|
||||
return editDistance;
|
||||
}
|
||||
|
||||
Lcs< elem > getLcs () const {
|
||||
return lcs;
|
||||
}
|
||||
|
||||
elemVec getLcsVec () const {
|
||||
return lcs.getSequence();
|
||||
}
|
||||
|
||||
Ses< elem > getSes () const {
|
||||
return ses;
|
||||
}
|
||||
|
||||
uniHunkVec getUniHunks () const {
|
||||
return uniHunks;
|
||||
}
|
||||
|
||||
/* These should be deprecated */
|
||||
bool isHuge () const {
|
||||
return huge;
|
||||
}
|
||||
|
||||
void onHuge () {
|
||||
this->huge = true;
|
||||
}
|
||||
|
||||
void offHuge () {
|
||||
this->huge = false;
|
||||
}
|
||||
|
||||
bool isUnserious () const {
|
||||
return trivial;
|
||||
}
|
||||
|
||||
void onUnserious () {
|
||||
this->trivial = true;
|
||||
}
|
||||
|
||||
void offUnserious () {
|
||||
this->trivial = false;
|
||||
}
|
||||
|
||||
void onOnlyEditDistance () {
|
||||
this->editDistanceOnly = true;
|
||||
}
|
||||
|
||||
/* These are the replacements for the above */
|
||||
bool hugeEnabled () const {
|
||||
return huge;
|
||||
}
|
||||
|
||||
void enableHuge () {
|
||||
this->huge = true;
|
||||
}
|
||||
|
||||
void disableHuge () {
|
||||
this->huge = false;
|
||||
}
|
||||
|
||||
bool trivialEnabled () const {
|
||||
return trivial;
|
||||
}
|
||||
|
||||
void enableTrivial () {
|
||||
this->trivial = true;
|
||||
}
|
||||
|
||||
void disableTrivial () {
|
||||
this->trivial = false;
|
||||
}
|
||||
|
||||
void editDistanceOnlyEnabled () {
|
||||
this->editDistanceOnly = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* patching with Unified Format Hunks
|
||||
*/
|
||||
sequence uniPatch (const sequence& seq) {
|
||||
elemList seqLst(seq.begin(), seq.end());
|
||||
sesElemVec shunk;
|
||||
sesElemVec_iter vsesIt;
|
||||
elemList_iter lstIt = seqLst.begin();
|
||||
long long inc_dec_total = 0;
|
||||
long long gap = 1;
|
||||
for (uniHunkVec_iter it=uniHunks.begin();it!=uniHunks.end();++it) {
|
||||
joinSesVec(shunk, it->common[0]);
|
||||
joinSesVec(shunk, it->change);
|
||||
joinSesVec(shunk, it->common[1]);
|
||||
it->a += inc_dec_total;
|
||||
inc_dec_total += it->inc_dec_count;
|
||||
for (long long i=0;i<it->a - gap;++i) {
|
||||
++lstIt;
|
||||
}
|
||||
gap = it->a + it->b + it->inc_dec_count;
|
||||
vsesIt = shunk.begin();
|
||||
while (vsesIt!=shunk.end()) {
|
||||
switch (vsesIt->second.type) {
|
||||
case SES_ADD :
|
||||
seqLst.insert(lstIt, vsesIt->first);
|
||||
break;
|
||||
case SES_DELETE :
|
||||
if (lstIt != seqLst.end()) {
|
||||
lstIt = seqLst.erase(lstIt);
|
||||
}
|
||||
break;
|
||||
case SES_COMMON :
|
||||
if (lstIt != seqLst.end()) {
|
||||
++lstIt;
|
||||
}
|
||||
break;
|
||||
default :
|
||||
// no fall-through
|
||||
break;
|
||||
}
|
||||
++vsesIt;
|
||||
}
|
||||
shunk.clear();
|
||||
}
|
||||
|
||||
sequence patchedSeq(seqLst.begin(), seqLst.end());
|
||||
return patchedSeq;
|
||||
}
|
||||
|
||||
/**
|
||||
* patching with Shortest Edit Script (SES)
|
||||
*/
|
||||
sequence patch (const sequence& seq) const {
|
||||
sesElemVec sesSeq = ses.getSequence();
|
||||
elemList seqLst(seq.begin(), seq.end());
|
||||
elemList_iter lstIt = seqLst.begin();
|
||||
for (sesElemVec_iter sesIt=sesSeq.begin();sesIt!=sesSeq.end();++sesIt) {
|
||||
switch (sesIt->second.type) {
|
||||
case SES_ADD :
|
||||
seqLst.insert(lstIt, sesIt->first);
|
||||
break;
|
||||
case SES_DELETE :
|
||||
lstIt = seqLst.erase(lstIt);
|
||||
break;
|
||||
case SES_COMMON :
|
||||
++lstIt;
|
||||
break;
|
||||
default :
|
||||
// no through
|
||||
break;
|
||||
}
|
||||
}
|
||||
sequence patchedSeq(seqLst.begin(), seqLst.end());
|
||||
return patchedSeq;
|
||||
}
|
||||
|
||||
/**
|
||||
* compose Longest Common Subsequence and Shortest Edit Script.
|
||||
* The algorithm implemented here is based on "An O(NP) Sequence Comparison Algorithm"
|
||||
* described by Sun Wu, Udi Manber and Gene Myers
|
||||
*/
|
||||
void compose() {
|
||||
|
||||
if (isHuge()) {
|
||||
pathCordinates.reserve(MAX_CORDINATES_SIZE);
|
||||
}
|
||||
ox = 0;
|
||||
oy = 0;
|
||||
long long p = -1;
|
||||
fp = new long long[M + N + 3];
|
||||
fill(&fp[0], &fp[M + N + 3], -1);
|
||||
path = editPath(M + N + 3);
|
||||
fill(path.begin(), path.end(), -1);
|
||||
ONP:
|
||||
do {
|
||||
++p;
|
||||
for (long long k=-p;k<=static_cast<long long>(delta)-1;++k) {
|
||||
fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
|
||||
}
|
||||
for (long long k=static_cast<long long>(delta)+p;k>=static_cast<long long>(delta)+1;--k) {
|
||||
fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
|
||||
}
|
||||
fp[delta+offset] = snake(static_cast<long long>(delta), fp[delta-1+offset]+1, fp[delta+1+offset]);
|
||||
} while (fp[delta+offset] != static_cast<long long>(N) && pathCordinates.size() < MAX_CORDINATES_SIZE);
|
||||
|
||||
editDistance += static_cast<long long>(delta) + 2 * p;
|
||||
long long r = path[delta+offset];
|
||||
P cordinate;
|
||||
editPathCordinates epc(0);
|
||||
|
||||
// recording edit distance only
|
||||
if (editDistanceOnly) {
|
||||
delete[] this->fp;
|
||||
return;
|
||||
}
|
||||
|
||||
while(r != -1) {
|
||||
cordinate.x = pathCordinates[(size_t)r].x;
|
||||
cordinate.y = pathCordinates[(size_t)r].y;
|
||||
epc.push_back(cordinate);
|
||||
r = pathCordinates[(size_t)r].k;
|
||||
}
|
||||
|
||||
// record Longest Common Subsequence & Shortest Edit Script
|
||||
if (!recordSequence(epc)) {
|
||||
pathCordinates.resize(0);
|
||||
epc.resize(0);
|
||||
p = -1;
|
||||
goto ONP;
|
||||
}
|
||||
delete[] this->fp;
|
||||
}
|
||||
|
||||
/**
|
||||
* print difference between A and B as an SES
|
||||
*/
|
||||
template < typename stream >
|
||||
void printSES (stream& out) const {
|
||||
sesElemVec ses_v = ses.getSequence();
|
||||
for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
|
||||
}
|
||||
|
||||
void printSES (ostream& out = cout) const {
|
||||
printSES< ostream >(out);
|
||||
}
|
||||
|
||||
/**
|
||||
* print differences given an SES
|
||||
*/
|
||||
template < typename stream >
|
||||
static void printSES (const Ses< elem >& s, stream& out) {
|
||||
sesElemVec ses_v = s.getSequence();
|
||||
for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
|
||||
}
|
||||
|
||||
static void printSES (const Ses< elem >& s, ostream& out = cout) {
|
||||
printSES< ostream >(s, out);
|
||||
}
|
||||
|
||||
/**
|
||||
* print difference between A and B as an SES with custom printer
|
||||
*/
|
||||
template < typename stream, template < typename SEET, typename STRT > class PT >
|
||||
void printSES (stream& out) const {
|
||||
sesElemVec ses_v = ses.getSequence ();
|
||||
for_each (ses_v.begin (), ses_v.end(), PT < sesElem, stream > (out));
|
||||
}
|
||||
|
||||
/**
|
||||
* store difference between A and B as an SES with custom storage
|
||||
*/
|
||||
template < typename storedData, template < typename SEET, typename STRT > class ST >
|
||||
void storeSES(storedData& sd) const {
|
||||
sesElemVec ses_v = ses.getSequence();
|
||||
for_each(ses_v.begin(), ses_v.end(), ST < sesElem, storedData >(sd));
|
||||
}
|
||||
|
||||
/**
|
||||
* print difference between A and B in the Unified Format
|
||||
*/
|
||||
template < typename stream >
|
||||
void printUnifiedFormat (stream& out) const {
|
||||
for_each(uniHunks.begin(), uniHunks.end(), UniHunkPrinter< sesElem, stream >(out));
|
||||
}
|
||||
|
||||
void printUnifiedFormat (ostream& out = cout) const {
|
||||
printUnifiedFormat< ostream >(out);
|
||||
}
|
||||
|
||||
/**
|
||||
* print unified format difference with given unified format hunks
|
||||
*/
|
||||
template < typename stream >
|
||||
static void printUnifiedFormat (const uniHunkVec& hunks, stream& out) {
|
||||
for_each(hunks.begin(), hunks.end(), UniHunkPrinter< sesElem >(out));
|
||||
}
|
||||
|
||||
static void printUnifiedFormat (const uniHunkVec& hunks, ostream& out = cout) {
|
||||
printUnifiedFormat< ostream >(hunks, out);
|
||||
}
|
||||
|
||||
/**
|
||||
* compose Unified Format Hunks from Shortest Edit Script
|
||||
*/
|
||||
void composeUnifiedHunks () {
|
||||
sesElemVec common[2];
|
||||
sesElemVec change;
|
||||
sesElemVec ses_v = ses.getSequence();
|
||||
long long l_cnt = 1;
|
||||
long long length = distance(ses_v.begin(), ses_v.end());
|
||||
long long middle = 0;
|
||||
bool isMiddle, isAfter;
|
||||
elemInfo einfo;
|
||||
long long a, b, c, d; // @@ -a,b +c,d @@
|
||||
long long inc_dec_count = 0;
|
||||
uniHunk< sesElem > hunk;
|
||||
sesElemVec adds;
|
||||
sesElemVec deletes;
|
||||
|
||||
isMiddle = isAfter = false;
|
||||
a = b = c = d = 0;
|
||||
|
||||
for (sesElemVec_iter it=ses_v.begin();it!=ses_v.end();++it, ++l_cnt) {
|
||||
einfo = it->second;
|
||||
switch (einfo.type) {
|
||||
case SES_ADD :
|
||||
middle = 0;
|
||||
++inc_dec_count;
|
||||
adds.push_back(*it);
|
||||
if (!isMiddle) isMiddle = true;
|
||||
if (isMiddle) ++d;
|
||||
if (l_cnt >= length) {
|
||||
joinSesVec(change, deletes);
|
||||
joinSesVec(change, adds);
|
||||
isAfter = true;
|
||||
}
|
||||
break;
|
||||
case SES_DELETE :
|
||||
middle = 0;
|
||||
--inc_dec_count;
|
||||
deletes.push_back(*it);
|
||||
if (!isMiddle) isMiddle = true;
|
||||
if (isMiddle) ++b;
|
||||
if (l_cnt >= length) {
|
||||
joinSesVec(change, deletes);
|
||||
joinSesVec(change, adds);
|
||||
isAfter = true;
|
||||
}
|
||||
break;
|
||||
case SES_COMMON :
|
||||
++b;++d;
|
||||
if (common[1].empty() && adds.empty() && deletes.empty() && change.empty()) {
|
||||
if (static_cast<long long>(common[0].size()) < DTL_CONTEXT_SIZE) {
|
||||
if (a == 0 && c == 0) {
|
||||
if (!wasSwapped()) {
|
||||
a = einfo.beforeIdx;
|
||||
c = einfo.afterIdx;
|
||||
} else {
|
||||
a = einfo.afterIdx;
|
||||
c = einfo.beforeIdx;
|
||||
}
|
||||
}
|
||||
common[0].push_back(*it);
|
||||
} else {
|
||||
rotate(common[0].begin(), common[0].begin() + 1, common[0].end());
|
||||
common[0].pop_back();
|
||||
common[0].push_back(*it);
|
||||
++a;++c;
|
||||
--b;--d;
|
||||
}
|
||||
}
|
||||
if (isMiddle && !isAfter) {
|
||||
++middle;
|
||||
joinSesVec(change, deletes);
|
||||
joinSesVec(change, adds);
|
||||
change.push_back(*it);
|
||||
if (middle >= DTL_SEPARATE_SIZE || l_cnt >= length) {
|
||||
isAfter = true;
|
||||
}
|
||||
adds.clear();
|
||||
deletes.clear();
|
||||
}
|
||||
break;
|
||||
default :
|
||||
// no through
|
||||
break;
|
||||
}
|
||||
// compose unified format hunk
|
||||
if (isAfter && !change.empty()) {
|
||||
sesElemVec_iter cit = it;
|
||||
long long cnt = 0;
|
||||
for (long long i=0;i<DTL_SEPARATE_SIZE && (cit != ses_v.end());++i, ++cit) {
|
||||
if (cit->second.type == SES_COMMON) {
|
||||
++cnt;
|
||||
}
|
||||
}
|
||||
if (cnt < DTL_SEPARATE_SIZE && l_cnt < length) {
|
||||
middle = 0;
|
||||
isAfter = false;
|
||||
continue;
|
||||
}
|
||||
if (static_cast<long long>(common[0].size()) >= DTL_SEPARATE_SIZE) {
|
||||
long long c0size = static_cast<long long>(common[0].size());
|
||||
rotate(common[0].begin(),
|
||||
common[0].begin() + (size_t)c0size - DTL_SEPARATE_SIZE,
|
||||
common[0].end());
|
||||
for (long long i=0;i<c0size - DTL_SEPARATE_SIZE;++i) {
|
||||
common[0].pop_back();
|
||||
}
|
||||
a += c0size - DTL_SEPARATE_SIZE;
|
||||
c += c0size - DTL_SEPARATE_SIZE;
|
||||
}
|
||||
if (a == 0) ++a;
|
||||
if (c == 0) ++c;
|
||||
if (wasSwapped()) swap(a, c);
|
||||
hunk.a = a;
|
||||
hunk.b = b;
|
||||
hunk.c = c;
|
||||
hunk.d = d;
|
||||
hunk.common[0] = common[0];
|
||||
hunk.change = change;
|
||||
hunk.common[1] = common[1];
|
||||
hunk.inc_dec_count = inc_dec_count;
|
||||
uniHunks.push_back(hunk);
|
||||
isMiddle = false;
|
||||
isAfter = false;
|
||||
common[0].clear();
|
||||
common[1].clear();
|
||||
adds.clear();
|
||||
deletes.clear();
|
||||
change.clear();
|
||||
a = b = c = d = middle = inc_dec_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* compose ses from stream
|
||||
*/
|
||||
template <typename stream>
|
||||
static Ses< elem > composeSesFromStream (stream& st)
|
||||
{
|
||||
elem line;
|
||||
Ses< elem > ret;
|
||||
long long x_idx, y_idx;
|
||||
x_idx = y_idx = 1;
|
||||
while (getline(st, line)) {
|
||||
elem mark(line.begin(), line.begin() + 1);
|
||||
elem e(line.begin() + 1, line.end());
|
||||
if (mark == SES_MARK_DELETE) {
|
||||
ret.addSequence(e, x_idx, 0, SES_DELETE);
|
||||
++x_idx;
|
||||
} else if (mark == SES_MARK_ADD) {
|
||||
ret.addSequence(e, y_idx, 0, SES_ADD);
|
||||
++y_idx;
|
||||
} else if (mark == SES_MARK_COMMON) {
|
||||
ret.addSequence(e, x_idx, y_idx, SES_COMMON);
|
||||
++x_idx;
|
||||
++y_idx;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
private :
|
||||
/**
|
||||
* initialize
|
||||
*/
|
||||
void init () {
|
||||
M = distance(A.begin(), A.end());
|
||||
N = distance(B.begin(), B.end());
|
||||
if (M < N) {
|
||||
swapped = false;
|
||||
} else {
|
||||
swap(A, B);
|
||||
swap(M, N);
|
||||
swapped = true;
|
||||
}
|
||||
editDistance = 0;
|
||||
delta = N - M;
|
||||
offset = M + 1;
|
||||
huge = false;
|
||||
trivial = false;
|
||||
editDistanceOnly = false;
|
||||
fp = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* search shortest path and record the path
|
||||
*/
|
||||
long long snake(const long long& k, const long long& above, const long long& below) {
|
||||
long long r = above > below ? path[(size_t)k-1+offset] : path[(size_t)k+1+offset];
|
||||
long long y = max(above, below);
|
||||
long long x = y - k;
|
||||
while ((size_t)x < M && (size_t)y < N && (swapped ? cmp.impl(B[(size_t)y], A[(size_t)x]) : cmp.impl(A[(size_t)x], B[(size_t)y]))) {
|
||||
++x;++y;
|
||||
}
|
||||
|
||||
path[(size_t)k+offset] = static_cast<long long>(pathCordinates.size());
|
||||
if (!editDistanceOnly) {
|
||||
P p;
|
||||
p.x = x;p.y = y;p.k = r;
|
||||
pathCordinates.push_back(p);
|
||||
}
|
||||
return y;
|
||||
}
|
||||
|
||||
/**
|
||||
* record SES and LCS
|
||||
*/
|
||||
bool recordSequence (const editPathCordinates& v) {
|
||||
sequence_const_iter x(A.begin());
|
||||
sequence_const_iter y(B.begin());
|
||||
long long x_idx, y_idx; // line number for Unified Format
|
||||
long long px_idx, py_idx; // cordinates
|
||||
bool complete = false;
|
||||
x_idx = y_idx = 1;
|
||||
px_idx = py_idx = 0;
|
||||
for (size_t i=v.size()-1;!complete;--i) {
|
||||
while(px_idx < v[i].x || py_idx < v[i].y) {
|
||||
if (v[i].y - v[i].x > py_idx - px_idx) {
|
||||
if (!wasSwapped()) {
|
||||
ses.addSequence(*y, 0, y_idx + oy, SES_ADD);
|
||||
} else {
|
||||
ses.addSequence(*y, y_idx + oy, 0, SES_DELETE);
|
||||
}
|
||||
++y;
|
||||
++y_idx;
|
||||
++py_idx;
|
||||
} else if (v[i].y - v[i].x < py_idx - px_idx) {
|
||||
if (!wasSwapped()) {
|
||||
ses.addSequence(*x, x_idx + ox, 0, SES_DELETE);
|
||||
} else {
|
||||
ses.addSequence(*x, 0, x_idx + ox, SES_ADD);
|
||||
}
|
||||
++x;
|
||||
++x_idx;
|
||||
++px_idx;
|
||||
} else {
|
||||
if (!wasSwapped()) {
|
||||
lcs.addSequence(*x);
|
||||
ses.addSequence(*x, x_idx + ox, y_idx + oy, SES_COMMON);
|
||||
} else {
|
||||
lcs.addSequence(*y);
|
||||
ses.addSequence(*y, y_idx + oy, x_idx + ox, SES_COMMON);
|
||||
}
|
||||
++x;
|
||||
++y;
|
||||
++x_idx;
|
||||
++y_idx;
|
||||
++px_idx;
|
||||
++py_idx;
|
||||
}
|
||||
}
|
||||
if (i == 0) complete = true;
|
||||
}
|
||||
|
||||
if (x_idx > static_cast<long long>(M) && y_idx > static_cast<long long>(N)) {
|
||||
// all recording succeeded
|
||||
} else {
|
||||
// trivial difference
|
||||
if (trivialEnabled()) {
|
||||
if (!wasSwapped()) {
|
||||
recordOddSequence(x_idx, M, x, SES_DELETE);
|
||||
recordOddSequence(y_idx, N, y, SES_ADD);
|
||||
} else {
|
||||
recordOddSequence(x_idx, M, x, SES_ADD);
|
||||
recordOddSequence(y_idx, N, y, SES_DELETE);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// nontrivial difference
|
||||
sequence A_(A.begin() + (size_t)x_idx - 1, A.end());
|
||||
sequence B_(B.begin() + (size_t)y_idx - 1, B.end());
|
||||
A = A_;
|
||||
B = B_;
|
||||
M = distance(A.begin(), A.end());
|
||||
N = distance(B.begin(), B.end());
|
||||
delta = N - M;
|
||||
offset = M + 1;
|
||||
delete[] fp;
|
||||
fp = new long long[M + N + 3];
|
||||
fill(&fp[0], &fp[M + N + 3], -1);
|
||||
fill(path.begin(), path.end(), -1);
|
||||
ox = x_idx - 1;
|
||||
oy = y_idx - 1;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* record odd sequence in SES
|
||||
*/
|
||||
void inline recordOddSequence (long long idx, long long length, sequence_const_iter it, const edit_t et) {
|
||||
while(idx < length){
|
||||
ses.addSequence(*it, idx, 0, et);
|
||||
++it;
|
||||
++idx;
|
||||
++editDistance;
|
||||
}
|
||||
ses.addSequence(*it, idx, 0, et);
|
||||
++editDistance;
|
||||
}
|
||||
|
||||
/**
|
||||
* join SES vectors
|
||||
*/
|
||||
void inline joinSesVec (sesElemVec& s1, sesElemVec& s2) const {
|
||||
if (!s2.empty()) {
|
||||
for (sesElemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
|
||||
s1.push_back(*vit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* check if the sequences have been swapped
|
||||
*/
|
||||
bool inline wasSwapped () const {
|
||||
return swapped;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_DIFF_H
|
||||
245
libs/tracy/dtl/Diff3.hpp
Normal file
@ -0,0 +1,245 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_DIFF3_H
|
||||
#define DTL_DIFF3_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* diff3 class template
|
||||
* sequence must support random_access_iterator.
|
||||
*/
|
||||
template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
|
||||
class Diff3
|
||||
{
|
||||
private:
|
||||
dtl_typedefs(elem, sequence)
|
||||
sequence A;
|
||||
sequence B;
|
||||
sequence C;
|
||||
sequence S;
|
||||
Diff< elem, sequence, comparator > diff_ba;
|
||||
Diff< elem, sequence, comparator > diff_bc;
|
||||
bool conflict;
|
||||
elem csepabegin;
|
||||
elem csepa;
|
||||
elem csepaend;
|
||||
public :
|
||||
Diff3 () {}
|
||||
Diff3 (const sequence& a,
|
||||
const sequence& b,
|
||||
const sequence& c) : A(a), B(b), C(c),
|
||||
diff_ba(b, a), diff_bc(b, c),
|
||||
conflict(false) {}
|
||||
|
||||
~Diff3 () {}
|
||||
|
||||
bool isConflict () const {
|
||||
return conflict;
|
||||
}
|
||||
|
||||
sequence getMergedSequence () const {
|
||||
return S;
|
||||
}
|
||||
|
||||
/**
|
||||
* merge changes B and C into A
|
||||
*/
|
||||
bool merge () {
|
||||
if (diff_ba.getEditDistance() == 0) { // A == B
|
||||
if (diff_bc.getEditDistance() == 0) { // A == B == C
|
||||
S = B;
|
||||
return true;
|
||||
}
|
||||
S = C;
|
||||
return true;
|
||||
} else { // A != B
|
||||
if (diff_bc.getEditDistance() == 0) { // A != B == C
|
||||
S = A;
|
||||
return true;
|
||||
} else { // A != B != C
|
||||
S = merge_();
|
||||
if (isConflict()) { // conflict occured
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* compose differences
|
||||
*/
|
||||
void compose () {
|
||||
diff_ba.compose();
|
||||
diff_bc.compose();
|
||||
}
|
||||
|
||||
private :
|
||||
/**
|
||||
* merge implementation
|
||||
*/
|
||||
sequence merge_ () {
|
||||
elemVec seq;
|
||||
Ses< elem > ses_ba = diff_ba.getSes();
|
||||
Ses< elem > ses_bc = diff_bc.getSes();
|
||||
sesElemVec ses_ba_v = ses_ba.getSequence();
|
||||
sesElemVec ses_bc_v = ses_bc.getSequence();
|
||||
sesElemVec_iter ba_it = ses_ba_v.begin();
|
||||
sesElemVec_iter bc_it = ses_bc_v.begin();
|
||||
sesElemVec_iter ba_end = ses_ba_v.end();
|
||||
sesElemVec_iter bc_end = ses_bc_v.end();
|
||||
|
||||
while (!isEnd(ba_end, ba_it) || !isEnd(bc_end, bc_it)) {
|
||||
while (true) {
|
||||
if (!isEnd(ba_end, ba_it) &&
|
||||
!isEnd(bc_end, bc_it) &&
|
||||
ba_it->first == bc_it->first &&
|
||||
ba_it->second.type == SES_COMMON &&
|
||||
bc_it->second.type == SES_COMMON) {
|
||||
// do nothing
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
if (!isEnd(ba_end, ba_it)) seq.push_back(ba_it->first);
|
||||
else if (!isEnd(bc_end, bc_it)) seq.push_back(bc_it->first);
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
}
|
||||
if (isEnd(ba_end, ba_it) || isEnd(bc_end, bc_it)) break;
|
||||
if ( ba_it->second.type == SES_COMMON
|
||||
&& bc_it->second.type == SES_DELETE) {
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
} else if (ba_it->second.type == SES_COMMON &&
|
||||
bc_it->second.type == SES_ADD) {
|
||||
seq.push_back(bc_it->first);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
} else if (ba_it->second.type == SES_DELETE &&
|
||||
bc_it->second.type == SES_COMMON) {
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
} else if (ba_it->second.type == SES_DELETE &&
|
||||
bc_it->second.type == SES_DELETE) {
|
||||
if (ba_it->first == bc_it->first) {
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
} else {
|
||||
// conflict
|
||||
conflict = true;
|
||||
return B;
|
||||
}
|
||||
} else if (ba_it->second.type == SES_DELETE &&
|
||||
bc_it->second.type == SES_ADD) {
|
||||
// conflict
|
||||
conflict = true;
|
||||
return B;
|
||||
} else if (ba_it->second.type == SES_ADD &&
|
||||
bc_it->second.type == SES_COMMON) {
|
||||
seq.push_back(ba_it->first);
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
} else if (ba_it->second.type == SES_ADD &&
|
||||
bc_it->second.type == SES_DELETE) {
|
||||
// conflict
|
||||
conflict = true;
|
||||
return B;
|
||||
} else if (ba_it->second.type == SES_ADD &&
|
||||
bc_it->second.type == SES_ADD) {
|
||||
if (ba_it->first == bc_it->first) {
|
||||
seq.push_back(ba_it->first);
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
} else {
|
||||
// conflict
|
||||
conflict = true;
|
||||
return B;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isEnd(ba_end, ba_it)) {
|
||||
addDecentSequence(bc_end, bc_it, seq);
|
||||
} else if (isEnd(bc_end, bc_it)) {
|
||||
addDecentSequence(ba_end, ba_it, seq);
|
||||
}
|
||||
|
||||
sequence mergedSeq(seq.begin(), seq.end());
|
||||
return mergedSeq;
|
||||
}
|
||||
|
||||
/**
|
||||
* join elem vectors
|
||||
*/
|
||||
void inline joinElemVec (elemVec& s1, elemVec& s2) const {
|
||||
if (!s2.empty()) {
|
||||
for (elemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
|
||||
s1.push_back(*vit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* check if sequence is at end
|
||||
*/
|
||||
template <typename T_iter>
|
||||
bool inline isEnd (const T_iter& end, const T_iter& it) const {
|
||||
return it == end ? true : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* increment iterator until iterator is at end
|
||||
*/
|
||||
template <typename T_iter>
|
||||
void inline forwardUntilEnd (const T_iter& end, T_iter& it) const {
|
||||
if (!isEnd(end, it)) ++it;
|
||||
}
|
||||
|
||||
/**
|
||||
* add elements whose SES's type is ADD
|
||||
*/
|
||||
void inline addDecentSequence (const sesElemVec_iter& end, sesElemVec_iter& it, elemVec& seq) const {
|
||||
while (!isEnd(end, it)) {
|
||||
if (it->second.type == SES_ADD) seq.push_back(it->first);
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_DIFF3_H
|
||||
55
libs/tracy/dtl/Lcs.hpp
Normal file
@ -0,0 +1,55 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_LCS_H
|
||||
#define DTL_LCS_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* Longest Common Subsequence template class
|
||||
*/
|
||||
template <typename elem>
|
||||
class Lcs : public Sequence< elem >
|
||||
{
|
||||
public :
|
||||
Lcs () {}
|
||||
~Lcs () {}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_LCS_H
|
||||
65
libs/tracy/dtl/Sequence.hpp
Normal file
@ -0,0 +1,65 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_SEQUENCE_H
|
||||
#define DTL_SEQUENCE_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* sequence class template
|
||||
*/
|
||||
template <typename elem>
|
||||
class Sequence
|
||||
{
|
||||
public :
|
||||
typedef vector< elem > elemVec;
|
||||
Sequence () {}
|
||||
virtual ~Sequence () {}
|
||||
|
||||
elemVec getSequence () const {
|
||||
return sequence;
|
||||
}
|
||||
void addSequence (elem e) {
|
||||
sequence.push_back(e);
|
||||
}
|
||||
protected :
|
||||
elemVec sequence;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_SEQUENCE_H
|
||||
132
libs/tracy/dtl/Ses.hpp
Normal file
@ -0,0 +1,132 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_SES_H
|
||||
#define DTL_SES_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* Shortest Edit Script template class
|
||||
*/
|
||||
template <typename elem>
|
||||
class Ses : public Sequence< elem >
|
||||
{
|
||||
private :
|
||||
typedef pair< elem, elemInfo > sesElem;
|
||||
typedef vector< sesElem > sesElemVec;
|
||||
public :
|
||||
|
||||
Ses () : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(false) {
|
||||
nextDeleteIdx = 0;
|
||||
}
|
||||
Ses (bool moveDel) : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(moveDel) {
|
||||
nextDeleteIdx = 0;
|
||||
}
|
||||
~Ses () {}
|
||||
|
||||
bool isOnlyAdd () const {
|
||||
return onlyAdd;
|
||||
}
|
||||
|
||||
bool isOnlyDelete () const {
|
||||
return onlyDelete;
|
||||
}
|
||||
|
||||
bool isOnlyCopy () const {
|
||||
return onlyCopy;
|
||||
}
|
||||
|
||||
bool isOnlyOneOperation () const {
|
||||
return isOnlyAdd() || isOnlyDelete() || isOnlyCopy();
|
||||
}
|
||||
|
||||
bool isChange () const {
|
||||
return !onlyCopy;
|
||||
}
|
||||
|
||||
using Sequence< elem >::addSequence;
|
||||
void addSequence (elem e, long long beforeIdx, long long afterIdx, const edit_t type) {
|
||||
elemInfo info;
|
||||
info.beforeIdx = beforeIdx;
|
||||
info.afterIdx = afterIdx;
|
||||
info.type = type;
|
||||
sesElem pe(e, info);
|
||||
if (!deletesFirst) {
|
||||
sequence.push_back(pe);
|
||||
}
|
||||
switch (type) {
|
||||
case SES_DELETE:
|
||||
onlyCopy = false;
|
||||
onlyAdd = false;
|
||||
if (deletesFirst) {
|
||||
sequence.insert(sequence.begin() + nextDeleteIdx, pe);
|
||||
nextDeleteIdx++;
|
||||
}
|
||||
break;
|
||||
case SES_COMMON:
|
||||
onlyAdd = false;
|
||||
onlyDelete = false;
|
||||
if (deletesFirst) {
|
||||
sequence.push_back(pe);
|
||||
nextDeleteIdx = sequence.size();
|
||||
}
|
||||
break;
|
||||
case SES_ADD:
|
||||
onlyDelete = false;
|
||||
onlyCopy = false;
|
||||
if (deletesFirst) {
|
||||
sequence.push_back(pe);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
sesElemVec getSequence () const {
|
||||
return sequence;
|
||||
}
|
||||
private :
|
||||
sesElemVec sequence;
|
||||
bool onlyAdd;
|
||||
bool onlyDelete;
|
||||
bool onlyCopy;
|
||||
bool deletesFirst;
|
||||
size_t nextDeleteIdx;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_SES_H
|
||||
47
libs/tracy/dtl/dtl.hpp
Normal file
@ -0,0 +1,47 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DTL_H
|
||||
#define DTL_H
|
||||
|
||||
#include "variables.hpp"
|
||||
#include "functors.hpp"
|
||||
#include "Sequence.hpp"
|
||||
#include "Lcs.hpp"
|
||||
#include "Ses.hpp"
|
||||
#include "Diff.hpp"
|
||||
#include "Diff3.hpp"
|
||||
|
||||
#endif // DTL_H
|
||||
151
libs/tracy/dtl/functors.hpp
Normal file
@ -0,0 +1,151 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_FUNCTORS_H
|
||||
#define DTL_FUNCTORS_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* printer class template
|
||||
*/
|
||||
template <typename sesElem, typename stream = ostream >
|
||||
class Printer
|
||||
{
|
||||
public :
|
||||
Printer () : out_(cout) {}
|
||||
Printer (stream& out) : out_(out) {}
|
||||
virtual ~Printer () {}
|
||||
virtual void operator() (const sesElem& se) const = 0;
|
||||
protected :
|
||||
stream& out_;
|
||||
};
|
||||
|
||||
/**
|
||||
* common element printer class template
|
||||
*/
|
||||
template <typename sesElem, typename stream = ostream >
|
||||
class CommonPrinter : public Printer < sesElem, stream >
|
||||
{
|
||||
public :
|
||||
CommonPrinter () : Printer < sesElem, stream > () {}
|
||||
CommonPrinter (stream& out) : Printer < sesElem, stream > (out) {}
|
||||
~CommonPrinter () {}
|
||||
void operator() (const sesElem& se) const {
|
||||
this->out_ << SES_MARK_COMMON << se.first << endl;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* ses element printer class template
|
||||
*/
|
||||
template <typename sesElem, typename stream = ostream >
|
||||
class ChangePrinter : public Printer < sesElem, stream >
|
||||
{
|
||||
public :
|
||||
ChangePrinter () : Printer < sesElem, stream > () {}
|
||||
ChangePrinter (stream& out) : Printer < sesElem, stream > (out) {}
|
||||
~ChangePrinter () {}
|
||||
void operator() (const sesElem& se) const {
|
||||
switch (se.second.type) {
|
||||
case SES_ADD:
|
||||
this->out_ << SES_MARK_ADD << se.first << endl;
|
||||
break;
|
||||
case SES_DELETE:
|
||||
this->out_ << SES_MARK_DELETE << se.first << endl;
|
||||
break;
|
||||
case SES_COMMON:
|
||||
this->out_ << SES_MARK_COMMON << se.first << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* unified format element printer class template
|
||||
*/
|
||||
template <typename sesElem, typename stream = ostream >
|
||||
class UniHunkPrinter
|
||||
{
|
||||
public :
|
||||
UniHunkPrinter () : out_(cout) {}
|
||||
UniHunkPrinter (stream& out) : out_(out) {}
|
||||
~UniHunkPrinter () {}
|
||||
void operator() (const uniHunk< sesElem >& hunk) const {
|
||||
out_ << "@@"
|
||||
<< " -" << hunk.a << "," << hunk.b
|
||||
<< " +" << hunk.c << "," << hunk.d
|
||||
<< " @@" << endl;
|
||||
|
||||
for_each(hunk.common[0].begin(), hunk.common[0].end(), CommonPrinter< sesElem, stream >(out_));
|
||||
for_each(hunk.change.begin(), hunk.change.end(), ChangePrinter< sesElem, stream >(out_));
|
||||
for_each(hunk.common[1].begin(), hunk.common[1].end(), CommonPrinter< sesElem, stream >(out_));
|
||||
}
|
||||
private :
|
||||
stream& out_;
|
||||
};
|
||||
|
||||
/**
|
||||
* storage class template
|
||||
*/
|
||||
template <typename sesElem, typename storedData >
|
||||
class Storage
|
||||
{
|
||||
public:
|
||||
Storage(storedData& sd) : storedData_(sd) {}
|
||||
virtual ~Storage() {}
|
||||
virtual void operator() (const sesElem& se) const = 0;
|
||||
protected:
|
||||
storedData& storedData_;
|
||||
};
|
||||
|
||||
/**
|
||||
* compare class template
|
||||
*/
|
||||
template <typename elem>
|
||||
class Compare
|
||||
{
|
||||
public :
|
||||
Compare () {}
|
||||
virtual ~Compare () {}
|
||||
virtual inline bool impl (const elem& e1, const elem& e2) const {
|
||||
return e1 == e2;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_FUNCTORS_H
|
||||
142
libs/tracy/dtl/variables.hpp
Normal file
@ -0,0 +1,142 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_VARIABLES_H
|
||||
#define DTL_VARIABLES_H
|
||||
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
|
||||
namespace dtl {
|
||||
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using std::pair;
|
||||
using std::ostream;
|
||||
using std::list;
|
||||
using std::for_each;
|
||||
using std::distance;
|
||||
using std::fill;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
using std::rotate;
|
||||
using std::swap;
|
||||
using std::max;
|
||||
|
||||
/**
|
||||
* version string
|
||||
*/
|
||||
const string version = "1.20";
|
||||
|
||||
/**
|
||||
* type of edit for SES
|
||||
*/
|
||||
typedef int edit_t;
|
||||
const edit_t SES_DELETE = -1;
|
||||
const edit_t SES_COMMON = 0;
|
||||
const edit_t SES_ADD = 1;
|
||||
|
||||
/**
|
||||
* mark of SES
|
||||
*/
|
||||
#define SES_MARK_DELETE "-"
|
||||
#define SES_MARK_COMMON " "
|
||||
#define SES_MARK_ADD "+"
|
||||
|
||||
/**
|
||||
* info for Unified Format
|
||||
*/
|
||||
typedef struct eleminfo {
|
||||
long long beforeIdx; // index of prev sequence
|
||||
long long afterIdx; // index of after sequence
|
||||
edit_t type; // type of edit(Add, Delete, Common)
|
||||
bool operator==(const eleminfo& other) const{
|
||||
return (this->beforeIdx == other.beforeIdx && this->afterIdx == other.afterIdx && this->type == other.type);
|
||||
}
|
||||
} elemInfo;
|
||||
|
||||
const long long DTL_SEPARATE_SIZE = 3;
|
||||
const long long DTL_CONTEXT_SIZE = 3;
|
||||
|
||||
/**
|
||||
* cordinate for registering route
|
||||
*/
|
||||
typedef struct Point {
|
||||
long long x; // x cordinate
|
||||
long long y; // y cordinate
|
||||
long long k; // vertex
|
||||
} P;
|
||||
|
||||
/**
|
||||
* limit of cordinate size
|
||||
*/
|
||||
const unsigned long long MAX_CORDINATES_SIZE = 2000000;
|
||||
|
||||
typedef vector< long long > editPath;
|
||||
typedef vector< P > editPathCordinates;
|
||||
|
||||
/**
|
||||
* Structure of Unified Format Hunk
|
||||
*/
|
||||
template <typename sesElem>
|
||||
struct uniHunk {
|
||||
long long a, b, c, d; // @@ -a,b +c,d @@
|
||||
vector< sesElem > common[2]; // anteroposterior commons on changes
|
||||
vector< sesElem > change; // changes
|
||||
long long inc_dec_count; // count of increace and decrease
|
||||
};
|
||||
|
||||
#define dtl_typedefs(elem, sequence) \
|
||||
typedef pair< elem, elemInfo > sesElem; \
|
||||
typedef vector< sesElem > sesElemVec; \
|
||||
typedef vector< uniHunk< sesElem > > uniHunkVec; \
|
||||
typedef list< elem > elemList; \
|
||||
typedef vector< elem > elemVec; \
|
||||
typedef typename uniHunkVec::iterator uniHunkVec_iter; \
|
||||
typedef typename sesElemVec::iterator sesElemVec_iter; \
|
||||
typedef typename elemList::iterator elemList_iter; \
|
||||
typedef typename sequence::iterator sequence_iter; \
|
||||
typedef typename sequence::const_iterator sequence_const_iter; \
|
||||
typedef typename elemVec::iterator elemVec_iter;
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // DTL_VARIABLES_H
|
||||
17
libs/tracy/examples/OpenCLVectorAdd/CMakeLists.txt
Normal file
@ -0,0 +1,17 @@
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
project(OpenCLVectorAdd)
|
||||
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
find_package(OpenCL REQUIRED)
|
||||
|
||||
add_executable(OpenCLVectorAdd OpenCLVectorAdd.cpp)
|
||||
|
||||
add_library(TracyClient STATIC ../../public/TracyClient.cpp
|
||||
../../public/tracy/TracyOpenCL.hpp)
|
||||
target_include_directories(TracyClient PUBLIC ../../public/tracy)
|
||||
target_compile_definitions(TracyClient PUBLIC TRACY_ENABLE=1)
|
||||
|
||||
target_link_libraries(OpenCLVectorAdd PUBLIC OpenCL::OpenCL TracyClient ${CMAKE_DL_LIBS} Threads::Threads)
|
||||
220
libs/tracy/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp
Normal file
@ -0,0 +1,220 @@
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <Tracy.hpp>
|
||||
#include <TracyOpenCL.hpp>
|
||||
|
||||
#define CL_ASSERT(err) \
|
||||
if((err) != CL_SUCCESS) \
|
||||
{ \
|
||||
std::cerr << "OpenCL Call Returned " << err << std::endl; \
|
||||
assert(false); \
|
||||
}
|
||||
|
||||
const char kernelSource[] =
|
||||
" void __kernel vectorAdd(global float* C, global float* A, global float* B, int N) "
|
||||
" { "
|
||||
" int i = get_global_id(0); "
|
||||
" if (i < N) { "
|
||||
" C[i] = A[i] + B[i]; "
|
||||
" } "
|
||||
" } ";
|
||||
|
||||
int main()
|
||||
{
|
||||
cl_platform_id platform;
|
||||
cl_device_id device;
|
||||
cl_context context;
|
||||
cl_command_queue commandQueue;
|
||||
cl_kernel vectorAddKernel;
|
||||
cl_program program;
|
||||
cl_int err;
|
||||
cl_mem bufferA, bufferB, bufferC;
|
||||
|
||||
TracyCLCtx tracyCLCtx;
|
||||
|
||||
{
|
||||
ZoneScopedN("OpenCL Init");
|
||||
|
||||
cl_uint numPlatforms = 0;
|
||||
CL_ASSERT(clGetPlatformIDs(0, nullptr, &numPlatforms));
|
||||
|
||||
if (numPlatforms == 0)
|
||||
{
|
||||
std::cerr << "Cannot find OpenCL platform to run this application" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
CL_ASSERT(clGetPlatformIDs(1, &platform, nullptr));
|
||||
|
||||
size_t platformNameBufferSize = 0;
|
||||
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, nullptr, &platformNameBufferSize));
|
||||
std::string platformName(platformNameBufferSize, '\0');
|
||||
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, platformNameBufferSize, &platformName[0], nullptr));
|
||||
|
||||
std::cout << "OpenCL Platform: " << platformName << std::endl;
|
||||
|
||||
CL_ASSERT(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr));
|
||||
size_t deviceNameBufferSize = 0;
|
||||
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, nullptr, &deviceNameBufferSize));
|
||||
std::string deviceName(deviceNameBufferSize, '\0');
|
||||
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, deviceNameBufferSize, &deviceName[0], nullptr));
|
||||
|
||||
std::cout << "OpenCL Device: " << deviceName << std::endl;
|
||||
|
||||
err = CL_SUCCESS;
|
||||
context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
size_t kernelSourceLength = sizeof(kernelSource);
|
||||
const char* kernelSourceArray = { kernelSource };
|
||||
program = clCreateProgramWithSource(context, 1, &kernelSourceArray, &kernelSourceLength, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
if (clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr) != CL_SUCCESS)
|
||||
{
|
||||
size_t programBuildLogBufferSize = 0;
|
||||
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &programBuildLogBufferSize));
|
||||
std::string programBuildLog(programBuildLogBufferSize, '\0');
|
||||
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, programBuildLogBufferSize, &programBuildLog[0], nullptr));
|
||||
std::clog << programBuildLog << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vectorAddKernel = clCreateKernel(program, "vectorAdd", &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
commandQueue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
|
||||
CL_ASSERT(err);
|
||||
}
|
||||
|
||||
tracyCLCtx = TracyCLContext(context, device);
|
||||
|
||||
size_t N = 10 * 1024 * 1024 / sizeof(float); // 10MB of floats
|
||||
std::vector<float> hostA, hostB, hostC;
|
||||
|
||||
{
|
||||
ZoneScopedN("Host Data Init");
|
||||
hostA.resize(N);
|
||||
hostB.resize(N);
|
||||
hostC.resize(N);
|
||||
|
||||
std::iota(std::begin(hostA), std::end(hostA), 0.0f);
|
||||
std::iota(std::begin(hostB), std::end(hostB), 0.0f);
|
||||
}
|
||||
|
||||
{
|
||||
ZoneScopedN("Host to Device Memory Copy");
|
||||
|
||||
bufferA = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
bufferB = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
bufferC = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
cl_event writeBufferAEvent, writeBufferBEvent;
|
||||
{
|
||||
ZoneScopedN("Write Buffer A");
|
||||
TracyCLZoneS(tracyCLCtx, "Write BufferA", 5);
|
||||
|
||||
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferA, CL_FALSE, 0, N * sizeof(float), hostA.data(), 0, nullptr, &writeBufferAEvent));
|
||||
|
||||
TracyCLZoneSetEvent(writeBufferAEvent);
|
||||
}
|
||||
{
|
||||
ZoneScopedN("Write Buffer B");
|
||||
TracyCLZone(tracyCLCtx, "Write BufferB");
|
||||
|
||||
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferB, CL_FALSE, 0, N * sizeof(float), hostB.data(), 0, nullptr, &writeBufferBEvent));
|
||||
|
||||
TracyCLZoneSetEvent(writeBufferBEvent);
|
||||
}
|
||||
}
|
||||
|
||||
cl_int clN = static_cast<cl_int>(N);
|
||||
const int numFrames = 10;
|
||||
const int launchsPerFrame = 10;
|
||||
constexpr int numLaunchs = numFrames * launchsPerFrame;
|
||||
std::vector<cl_event> kernelLaunchEvts;
|
||||
kernelLaunchEvts.reserve(numLaunchs);
|
||||
for (int i = 0; i < numFrames; ++i)
|
||||
{
|
||||
FrameMark;
|
||||
for (int j = 0; j < launchsPerFrame; ++j) {
|
||||
ZoneScopedN("VectorAdd Kernel Launch");
|
||||
TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4);
|
||||
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(cl_int), &clN));
|
||||
|
||||
cl_event vectorAddKernelEvent;
|
||||
CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent));
|
||||
TracyCLZoneSetEvent(vectorAddKernelEvent);
|
||||
CL_ASSERT(clRetainEvent(vectorAddKernelEvent));
|
||||
kernelLaunchEvts.push_back(vectorAddKernelEvent);
|
||||
std::cout << "VectorAdd Kernel Enqueued" << std::endl;
|
||||
}
|
||||
{
|
||||
// Wait frame events to be finished
|
||||
ZoneScopedN("clFinish");
|
||||
CL_ASSERT(clFinish(commandQueue));
|
||||
}
|
||||
// You should collect on each 'frame' ends, so that streaming can be achieved.
|
||||
TracyCLCollect(tracyCLCtx);
|
||||
}
|
||||
|
||||
{
|
||||
ZoneScopedN("Device to Host Memory Copy");
|
||||
TracyCLZone(tracyCLCtx, "Read Buffer C");
|
||||
|
||||
cl_event readbufferCEvent;
|
||||
CL_ASSERT(clEnqueueReadBuffer(commandQueue, bufferC, CL_TRUE, 0, N * sizeof(float), hostC.data(), 0, nullptr, &readbufferCEvent));
|
||||
TracyCLZoneSetEvent(readbufferCEvent);
|
||||
}
|
||||
|
||||
CL_ASSERT(clFinish(commandQueue));
|
||||
std::vector<float> durations(kernelLaunchEvts.size());
|
||||
for (int i=0; i<kernelLaunchEvts.size(); i++) {
|
||||
cl_event evt = kernelLaunchEvts[i];
|
||||
cl_ulong start;
|
||||
cl_ulong end;
|
||||
CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr));
|
||||
CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr));
|
||||
CL_ASSERT(clReleaseEvent(evt));
|
||||
durations[i] = (end - start) * 0.001f;
|
||||
std::cout << "VectorAdd Kernel " << i << " tooks " << static_cast<int>(durations[i]) << "us" << std::endl;
|
||||
};
|
||||
float avg = std::accumulate(durations.cbegin(), durations.cend(), 0.0f) / durations.size();
|
||||
float stddev2 = std::accumulate(durations.cbegin(), durations.cend(), 0.0f, [avg](const float& acc, const float& v) {
|
||||
auto d = v - avg;
|
||||
return acc + d*d;
|
||||
}) / (durations.size() - 1.0f);
|
||||
std::cout << "VectorAdd runtime avg: " << avg << "us, std: " << sqrt(stddev2) << "us over " << numLaunchs << " runs." << std::endl;
|
||||
|
||||
// User should ensure all events are finished, in this case, collect after the clFinish will do the trick.
|
||||
TracyCLCollect(tracyCLCtx);
|
||||
|
||||
{
|
||||
ZoneScopedN("Checking results");
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
assert(hostC[i] == hostA[i] + hostB[i]);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Results are correct!" << std::endl;
|
||||
|
||||
TracyCLDestroy(tracyCLCtx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
1
libs/tracy/examples/ToyPathTracer/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
Windows/Compiled*Shader.h
|
||||
4
libs/tracy/examples/ToyPathTracer/README
Normal file
@ -0,0 +1,4 @@
|
||||
https://github.com/aras-p/ToyPathTracer
|
||||
|
||||
Modified to render only 10 frames. Client part requires 12 GB, server part
|
||||
requires 6.4 GB.
|
||||
33
libs/tracy/examples/ToyPathTracer/Source/Config.h
Normal file
@ -0,0 +1,33 @@
|
||||
|
||||
#if defined(__APPLE__) && !defined(__METAL_VERSION__)
|
||||
#include <TargetConditionals.h>
|
||||
#endif
|
||||
|
||||
#define kBackbufferWidth 1280
|
||||
#define kBackbufferHeight 720
|
||||
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
#define CPU_CAN_DO_SIMD 0
|
||||
#define CPU_CAN_DO_THREADS 0
|
||||
#else
|
||||
#define CPU_CAN_DO_SIMD 1
|
||||
#define CPU_CAN_DO_THREADS 1
|
||||
#endif
|
||||
|
||||
|
||||
#define DO_SAMPLES_PER_PIXEL 4
|
||||
#define DO_ANIMATE_SMOOTHING 0.9f
|
||||
#define DO_LIGHT_SAMPLING 1
|
||||
#define DO_MITSUBA_COMPARE 0
|
||||
|
||||
// Should path tracing be done on the GPU with a compute shader?
|
||||
#define DO_COMPUTE_GPU 0
|
||||
#define kCSGroupSizeX 8
|
||||
#define kCSGroupSizeY 8
|
||||
#define kCSMaxObjects 64
|
||||
|
||||
// Should float3 struct use SSE/NEON?
|
||||
#define DO_FLOAT3_WITH_SIMD (!(DO_COMPUTE_GPU) && CPU_CAN_DO_SIMD && 1)
|
||||
|
||||
// Should HitSpheres function use SSE/NEON?
|
||||
#define DO_HIT_SPHERES_SIMD (CPU_CAN_DO_SIMD && 1)
|
||||
192
libs/tracy/examples/ToyPathTracer/Source/MathSimd.h
Normal file
@ -0,0 +1,192 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define VM_INLINE __forceinline
|
||||
#else
|
||||
#define VM_INLINE __attribute__((unused, always_inline, nodebug)) inline
|
||||
#endif
|
||||
|
||||
#define kSimdWidth 4
|
||||
|
||||
#if !defined(__arm__) && !defined(__arm64__) && !defined(__EMSCRIPTEN__)
|
||||
|
||||
// ---- SSE implementation
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define SHUFFLE4(V, X,Y,Z,W) float4(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(W,Z,Y,X)))
|
||||
|
||||
struct float4
|
||||
{
|
||||
VM_INLINE float4() {}
|
||||
VM_INLINE explicit float4(const float *p) { m = _mm_loadu_ps(p); }
|
||||
VM_INLINE explicit float4(float x, float y, float z, float w) { m = _mm_set_ps(w, z, y, x); }
|
||||
VM_INLINE explicit float4(float v) { m = _mm_set_ps1(v); }
|
||||
VM_INLINE explicit float4(__m128 v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
|
||||
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
|
||||
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
|
||||
VM_INLINE float getW() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 3, 3, 3))); }
|
||||
|
||||
__m128 m;
|
||||
};
|
||||
|
||||
typedef float4 bool4;
|
||||
|
||||
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = _mm_and_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = _mm_or_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a) { a.m = _mm_xor_ps(a.m, _mm_set1_ps(-0.0f)); return a; }
|
||||
VM_INLINE float4 min(float4 a, float4 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 max(float4 a, float4 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float4 v)
|
||||
{
|
||||
v = min(v, SHUFFLE4(v, 2, 3, 0, 0));
|
||||
v = min(v, SHUFFLE4(v, 1, 0, 0, 0));
|
||||
return v.getX();
|
||||
}
|
||||
|
||||
// Returns a 4-bit code where bit0..bit3 is X..W
|
||||
VM_INLINE unsigned mask(float4 v) { return _mm_movemask_ps(v.m); }
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
|
||||
|
||||
// "select", i.e. hibit(cond) ? b : a
|
||||
// on SSE4.1 and up this can be done easily via "blend" instruction;
|
||||
// on older SSEs has to do a bunch of hoops, see
|
||||
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
|
||||
|
||||
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
|
||||
{
|
||||
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
|
||||
a.m = _mm_blendv_ps(a.m, b.m, cond.m);
|
||||
#else
|
||||
__m128 d = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(cond.m), 31));
|
||||
a.m = _mm_or_ps(_mm_and_ps(d, b.m), _mm_andnot_ps(d, a.m));
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
VM_INLINE __m128i select(__m128i a, __m128i b, bool4 cond)
|
||||
{
|
||||
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
|
||||
return _mm_blendv_epi8(a, b, _mm_castps_si128(cond.m));
|
||||
#else
|
||||
__m128i d = _mm_srai_epi32(_mm_castps_si128(cond.m), 31);
|
||||
return _mm_or_si128(_mm_and_si128(d, b), _mm_andnot_si128(d, a));
|
||||
#endif
|
||||
}
|
||||
|
||||
VM_INLINE float4 sqrtf(float4 v) { return float4(_mm_sqrt_ps(v.m)); }
|
||||
|
||||
#elif !defined(__EMSCRIPTEN__)
|
||||
|
||||
// ---- NEON implementation
|
||||
|
||||
#define USE_NEON 1
|
||||
#include <arm_neon.h>
|
||||
|
||||
struct float4
|
||||
{
|
||||
VM_INLINE float4() {}
|
||||
VM_INLINE explicit float4(const float *p) { m = vld1q_f32(p); }
|
||||
VM_INLINE explicit float4(float x, float y, float z, float w) { float v[4] = {x, y, z, w}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float4(float v) { m = vdupq_n_f32(v); }
|
||||
VM_INLINE explicit float4(float32x4_t v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
|
||||
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
|
||||
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
|
||||
VM_INLINE float getW() const { return vgetq_lane_f32(m, 3); }
|
||||
|
||||
float32x4_t m;
|
||||
};
|
||||
|
||||
typedef float4 bool4;
|
||||
|
||||
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = vaddq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = vsubq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = vmulq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = vceqq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
|
||||
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = vcltq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = vcleq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = vandq_u32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = vorrq_u32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a) { a.m = vnegq_f32(a.m); return a; }
|
||||
VM_INLINE float4 min(float4 a, float4 b) { a.m = vminq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 max(float4 a, float4 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float4 v)
|
||||
{
|
||||
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
|
||||
return vget_lane_f32(minOfMinOfHalfs, 0);
|
||||
}
|
||||
|
||||
// Returns a 4-bit code where bit0..bit3 is X..W
|
||||
VM_INLINE unsigned mask(float4 v)
|
||||
{
|
||||
static const uint32x4_t movemask = { 1, 2, 4, 8 };
|
||||
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
|
||||
uint32x4_t t1 = vtstq_u32(t0, highbit);
|
||||
uint32x4_t t2 = vandq_u32(t1, movemask);
|
||||
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
|
||||
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
|
||||
}
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
|
||||
|
||||
// "select", i.e. hibit(cond) ? b : a
|
||||
// on SSE4.1 and up this can be done easily via "blend" instruction;
|
||||
// on older SSEs has to do a bunch of hoops, see
|
||||
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
|
||||
|
||||
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
|
||||
{
|
||||
a.m = vbslq_f32(cond.m, b.m, a.m);
|
||||
return a;
|
||||
}
|
||||
VM_INLINE int32x4_t select(int32x4_t a, int32x4_t b, bool4 cond)
|
||||
{
|
||||
return vbslq_f32(cond.m, b, a);
|
||||
}
|
||||
|
||||
VM_INLINE float4 sqrtf(float4 v)
|
||||
{
|
||||
float32x4_t V = v.m;
|
||||
float32x4_t S0 = vrsqrteq_f32(V);
|
||||
float32x4_t P0 = vmulq_f32( V, S0 );
|
||||
float32x4_t R0 = vrsqrtsq_f32( P0, S0 );
|
||||
float32x4_t S1 = vmulq_f32( S0, R0 );
|
||||
float32x4_t P1 = vmulq_f32( V, S1 );
|
||||
float32x4_t R1 = vrsqrtsq_f32( P1, S1 );
|
||||
float32x4_t S2 = vmulq_f32( S1, R1 );
|
||||
float32x4_t P2 = vmulq_f32( V, S2 );
|
||||
float32x4_t R2 = vrsqrtsq_f32( P2, S2 );
|
||||
float32x4_t S3 = vmulq_f32( S2, R2 );
|
||||
return float4(vmulq_f32(V, S3));
|
||||
}
|
||||
|
||||
VM_INLINE float4 splatX(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 0)); }
|
||||
VM_INLINE float4 splatY(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 1)); }
|
||||
VM_INLINE float4 splatZ(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 0)); }
|
||||
VM_INLINE float4 splatW(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 1)); }
|
||||
|
||||
#endif
|
||||
203
libs/tracy/examples/ToyPathTracer/Source/Maths.cpp
Normal file
@ -0,0 +1,203 @@
|
||||
#include "Maths.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static uint32_t XorShift32(uint32_t& state)
|
||||
{
|
||||
uint32_t x = state;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
x ^= x << 15;
|
||||
state = x;
|
||||
return x;
|
||||
}
|
||||
|
||||
float RandomFloat01(uint32_t& state)
|
||||
{
|
||||
return (XorShift32(state) & 0xFFFFFF) / 16777216.0f;
|
||||
}
|
||||
|
||||
float3 RandomInUnitDisk(uint32_t& state)
|
||||
{
|
||||
float3 p;
|
||||
do
|
||||
{
|
||||
p = 2.0 * float3(RandomFloat01(state),RandomFloat01(state),0) - float3(1,1,0);
|
||||
} while (dot(p,p) >= 1.0);
|
||||
return p;
|
||||
}
|
||||
|
||||
float3 RandomInUnitSphere(uint32_t& state)
|
||||
{
|
||||
float3 p;
|
||||
do {
|
||||
p = 2.0*float3(RandomFloat01(state),RandomFloat01(state),RandomFloat01(state)) - float3(1,1,1);
|
||||
} while (sqLength(p) >= 1.0);
|
||||
return p;
|
||||
}
|
||||
|
||||
float3 RandomUnitVector(uint32_t& state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float a = RandomFloat01(state) * 2.0f * kPI;
|
||||
float r = sqrtf(1.0f - z * z);
|
||||
float x = r * cosf(a);
|
||||
float y = r * sinf(a);
|
||||
return float3(x, y, z);
|
||||
}
|
||||
|
||||
|
||||
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit)
|
||||
{
|
||||
#if DO_HIT_SPHERES_SIMD
|
||||
float4 hitT = float4(tMax);
|
||||
#if USE_NEON
|
||||
int32x4_t id = vdupq_n_s32(-1);
|
||||
#else
|
||||
__m128i id = _mm_set1_epi32(-1);
|
||||
#endif
|
||||
|
||||
#if DO_FLOAT3_WITH_SIMD && !USE_NEON
|
||||
float4 rOrigX = SHUFFLE4(r.orig, 0, 0, 0, 0);
|
||||
float4 rOrigY = SHUFFLE4(r.orig, 1, 1, 1, 1);
|
||||
float4 rOrigZ = SHUFFLE4(r.orig, 2, 2, 2, 2);
|
||||
float4 rDirX = SHUFFLE4(r.dir, 0, 0, 0, 0);
|
||||
float4 rDirY = SHUFFLE4(r.dir, 1, 1, 1, 1);
|
||||
float4 rDirZ = SHUFFLE4(r.dir, 2, 2, 2, 2);
|
||||
#elif DO_FLOAT3_WITH_SIMD
|
||||
float4 rOrigX = splatX(r.orig.m);
|
||||
float4 rOrigY = splatY(r.orig.m);
|
||||
float4 rOrigZ = splatZ(r.orig.m);
|
||||
float4 rDirX = splatX(r.dir.m);
|
||||
float4 rDirY = splatY(r.dir.m);
|
||||
float4 rDirZ = splatZ(r.dir.m);
|
||||
#else
|
||||
float4 rOrigX = float4(r.orig.x);
|
||||
float4 rOrigY = float4(r.orig.y);
|
||||
float4 rOrigZ = float4(r.orig.z);
|
||||
float4 rDirX = float4(r.dir.x);
|
||||
float4 rDirY = float4(r.dir.y);
|
||||
float4 rDirZ = float4(r.dir.z);
|
||||
#endif
|
||||
float4 tMin4 = float4(tMin);
|
||||
#if USE_NEON
|
||||
int32x4_t curId = vcombine_u32(vcreate_u32(0ULL | (1ULL<<32)), vcreate_u32(2ULL | (3ULL<<32)));
|
||||
#else
|
||||
__m128i curId = _mm_set_epi32(3, 2, 1, 0);
|
||||
#endif
|
||||
// process 4 spheres at once
|
||||
for (int i = 0; i < spheres.simdCount; i += kSimdWidth)
|
||||
{
|
||||
// load data for 4 spheres
|
||||
float4 sCenterX = float4(spheres.centerX + i);
|
||||
float4 sCenterY = float4(spheres.centerY + i);
|
||||
float4 sCenterZ = float4(spheres.centerZ + i);
|
||||
float4 sSqRadius = float4(spheres.sqRadius + i);
|
||||
// note: we flip this vector and calculate -b (nb) since that happens to be slightly preferable computationally
|
||||
float4 coX = sCenterX - rOrigX;
|
||||
float4 coY = sCenterY - rOrigY;
|
||||
float4 coZ = sCenterZ - rOrigZ;
|
||||
float4 nb = coX * rDirX + coY * rDirY + coZ * rDirZ;
|
||||
float4 c = coX * coX + coY * coY + coZ * coZ - sSqRadius;
|
||||
float4 discr = nb * nb - c;
|
||||
bool4 discrPos = discr > float4(0.0f);
|
||||
// if ray hits any of the 4 spheres
|
||||
if (any(discrPos))
|
||||
{
|
||||
float4 discrSq = sqrtf(discr);
|
||||
|
||||
// ray could hit spheres at t0 & t1
|
||||
float4 t0 = nb - discrSq;
|
||||
float4 t1 = nb + discrSq;
|
||||
|
||||
float4 t = select(t1, t0, t0 > tMin4); // if t0 is above min, take it (since it's the earlier hit); else try t1.
|
||||
bool4 msk = discrPos & (t > tMin4) & (t < hitT);
|
||||
// if hit, take it
|
||||
id = select(id, curId, msk);
|
||||
hitT = select(hitT, t, msk);
|
||||
}
|
||||
#if USE_NEON
|
||||
curId = vaddq_s32(curId, vdupq_n_s32(kSimdWidth));
|
||||
#else
|
||||
curId = _mm_add_epi32(curId, _mm_set1_epi32(kSimdWidth));
|
||||
#endif
|
||||
}
|
||||
// now we have up to 4 hits, find and return closest one
|
||||
float minT = hmin(hitT);
|
||||
if (minT < tMax) // any actual hits?
|
||||
{
|
||||
int minMask = mask(hitT == float4(minT));
|
||||
if (minMask != 0)
|
||||
{
|
||||
int id_scalar[4];
|
||||
float hitT_scalar[4];
|
||||
#if USE_NEON
|
||||
vst1q_s32(id_scalar, id);
|
||||
vst1q_f32(hitT_scalar, hitT.m);
|
||||
#else
|
||||
_mm_storeu_si128((__m128i *)id_scalar, id);
|
||||
_mm_storeu_ps(hitT_scalar, hitT.m);
|
||||
#endif
|
||||
|
||||
// In general, you would do this with a bit scan (first set/trailing zero count).
|
||||
// But who cares, it's only 16 options.
|
||||
static const int laneId[16] =
|
||||
{
|
||||
0, 0, 1, 0, // 00xx
|
||||
2, 0, 1, 0, // 01xx
|
||||
3, 0, 1, 0, // 10xx
|
||||
2, 0, 1, 0, // 11xx
|
||||
};
|
||||
|
||||
int lane = laneId[minMask];
|
||||
int hitId = id_scalar[lane];
|
||||
float finalHitT = hitT_scalar[lane];
|
||||
|
||||
outHit.pos = r.pointAt(finalHitT);
|
||||
outHit.normal = (outHit.pos - float3(spheres.centerX[hitId], spheres.centerY[hitId], spheres.centerZ[hitId])) * spheres.invRadius[hitId];
|
||||
outHit.t = finalHitT;
|
||||
return hitId;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
|
||||
#else // #if DO_HIT_SPHERES_SIMD
|
||||
|
||||
float hitT = tMax;
|
||||
int id = -1;
|
||||
for (int i = 0; i < spheres.count; ++i)
|
||||
{
|
||||
float coX = spheres.centerX[i] - r.orig.getX();
|
||||
float coY = spheres.centerY[i] - r.orig.getY();
|
||||
float coZ = spheres.centerZ[i] - r.orig.getZ();
|
||||
float nb = coX * r.dir.getX() + coY * r.dir.getY() + coZ * r.dir.getZ();
|
||||
float c = coX * coX + coY * coY + coZ * coZ - spheres.sqRadius[i];
|
||||
float discr = nb * nb - c;
|
||||
if (discr > 0)
|
||||
{
|
||||
float discrSq = sqrtf(discr);
|
||||
|
||||
// Try earlier t
|
||||
float t = nb - discrSq;
|
||||
if (t <= tMin) // before min, try later t!
|
||||
t = nb + discrSq;
|
||||
|
||||
if (t > tMin && t < hitT)
|
||||
{
|
||||
id = i;
|
||||
hitT = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (id != -1)
|
||||
{
|
||||
outHit.pos = r.pointAt(hitT);
|
||||
outHit.normal = (outHit.pos - float3(spheres.centerX[id], spheres.centerY[id], spheres.centerZ[id])) * spheres.invRadius[id];
|
||||
outHit.t = hitT;
|
||||
return id;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
#endif // #else of #if DO_HIT_SPHERES_SIMD
|
||||
}
|
||||
436
libs/tracy/examples/ToyPathTracer/Source/Maths.h
Normal file
@ -0,0 +1,436 @@
|
||||
#pragma once
|
||||
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include "Config.h"
|
||||
#include "MathSimd.h"
|
||||
|
||||
#define kPI 3.1415926f
|
||||
|
||||
// SSE/SIMD vector largely based on http://www.codersnotes.com/notes/maths-lib-2016/
|
||||
#if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
|
||||
#if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
// ---- SSE implementation
|
||||
|
||||
// SHUFFLE3(v, 0,1,2) leaves the vector unchanged (v.xyz).
|
||||
// SHUFFLE3(v, 0,0,0) splats the X (v.xxx).
|
||||
#define SHUFFLE3(V, X,Y,Z) float3(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(Z,Z,Y,X)))
|
||||
|
||||
struct float3
|
||||
{
|
||||
VM_INLINE float3() {}
|
||||
VM_INLINE explicit float3(const float *p) { m = _mm_set_ps(p[2], p[2], p[1], p[0]); }
|
||||
VM_INLINE explicit float3(float x, float y, float z) { m = _mm_set_ps(z, z, y, x); }
|
||||
VM_INLINE explicit float3(float v) { m = _mm_set1_ps(v); }
|
||||
VM_INLINE explicit float3(__m128 v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
|
||||
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
|
||||
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
|
||||
|
||||
VM_INLINE float3 yzx() const { return SHUFFLE3(*this, 1, 2, 0); }
|
||||
VM_INLINE float3 zxy() const { return SHUFFLE3(*this, 2, 0, 1); }
|
||||
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
void setX(float x)
|
||||
{
|
||||
m = _mm_move_ss(m, _mm_set_ss(x));
|
||||
}
|
||||
void setY(float y)
|
||||
{
|
||||
__m128 t = _mm_move_ss(m, _mm_set_ss(y));
|
||||
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 2, 0, 0));
|
||||
m = _mm_move_ss(t, m);
|
||||
}
|
||||
void setZ(float z)
|
||||
{
|
||||
__m128 t = _mm_move_ss(m, _mm_set_ss(z));
|
||||
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 0, 1, 0));
|
||||
m = _mm_move_ss(t, m);
|
||||
}
|
||||
|
||||
__m128 m;
|
||||
};
|
||||
|
||||
typedef float3 bool3;
|
||||
|
||||
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float3 b) { a.m = _mm_div_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float b) { a.m = _mm_mul_ps(a.m, _mm_set1_ps(b)); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float b) { a.m = _mm_div_ps(a.m, _mm_set1_ps(b)); return a; }
|
||||
VM_INLINE float3 operator* (float a, float3 b) { b.m = _mm_mul_ps(_mm_set1_ps(a), b.m); return b; }
|
||||
VM_INLINE float3 operator/ (float a, float3 b) { b.m = _mm_div_ps(_mm_set1_ps(a), b.m); return b; }
|
||||
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
|
||||
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
|
||||
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 min(float3 a, float3 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 max(float3 a, float3 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float3 operator- (float3 a) { return float3(_mm_setzero_ps()) - a; }
|
||||
|
||||
VM_INLINE float hmin(float3 v)
|
||||
{
|
||||
v = min(v, SHUFFLE3(v, 1, 0, 2));
|
||||
return min(v, SHUFFLE3(v, 2, 0, 1)).getX();
|
||||
}
|
||||
VM_INLINE float hmax(float3 v)
|
||||
{
|
||||
v = max(v, SHUFFLE3(v, 1, 0, 2));
|
||||
return max(v, SHUFFLE3(v, 2, 0, 1)).getX();
|
||||
}
|
||||
|
||||
VM_INLINE float3 cross(float3 a, float3 b)
|
||||
{
|
||||
// x <- a.y*b.z - a.z*b.y
|
||||
// y <- a.z*b.x - a.x*b.z
|
||||
// z <- a.x*b.y - a.y*b.x
|
||||
// We can save a shuffle by grouping it in this wacky order:
|
||||
return (a.zxy()*b - a*b.zxy()).zxy();
|
||||
}
|
||||
|
||||
// Returns a 3-bit code where bit0..bit2 is X..Z
|
||||
VM_INLINE unsigned mask(float3 v) { return _mm_movemask_ps(v.m) & 7; }
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
|
||||
|
||||
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
|
||||
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
|
||||
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
|
||||
|
||||
#else // #if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
// ---- NEON implementation
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
struct float3
|
||||
{
|
||||
VM_INLINE float3() {}
|
||||
VM_INLINE explicit float3(const float *p) { float v[4] = {p[0], p[1], p[2], 0}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float3(float x, float y, float z) { float v[4] = {x, y, z, 0}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float3(float v) { m = vdupq_n_f32(v); }
|
||||
VM_INLINE explicit float3(float32x4_t v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
|
||||
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
|
||||
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
|
||||
|
||||
VM_INLINE float3 yzx() const
|
||||
{
|
||||
float32x2_t low = vget_low_f32(m);
|
||||
float32x4_t yzx = vcombine_f32(vext_f32(low, vget_high_f32(m), 1), low);
|
||||
return float3(yzx);
|
||||
}
|
||||
VM_INLINE float3 zxy() const
|
||||
{
|
||||
float32x4_t p = m;
|
||||
p = vuzpq_f32(vreinterpretq_f32_s32(vextq_s32(vreinterpretq_s32_f32(p), vreinterpretq_s32_f32(p), 1)), p).val[1];
|
||||
return float3(p);
|
||||
}
|
||||
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
void setX(float x)
|
||||
{
|
||||
m = vsetq_lane_f32(x, m, 0);
|
||||
}
|
||||
void setY(float y)
|
||||
{
|
||||
m = vsetq_lane_f32(y, m, 1);
|
||||
}
|
||||
void setZ(float z)
|
||||
{
|
||||
m = vsetq_lane_f32(z, m, 2);
|
||||
}
|
||||
|
||||
float32x4_t m;
|
||||
};
|
||||
|
||||
typedef float3 bool3;
|
||||
|
||||
VM_INLINE float32x4_t rcp_2(float32x4_t v)
|
||||
{
|
||||
float32x4_t e = vrecpeq_f32(v);
|
||||
e = vmulq_f32(vrecpsq_f32(e, v), e);
|
||||
e = vmulq_f32(vrecpsq_f32(e, v), e);
|
||||
return e;
|
||||
}
|
||||
|
||||
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = vaddq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = vsubq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = vmulq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float3 b) { float32x4_t recip = rcp_2(b.m); a.m = vmulq_f32(a.m, recip); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float b) { a.m = vmulq_f32(a.m, vdupq_n_f32(b)); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float b) { float32x4_t recip = rcp_2(vdupq_n_f32(b)); a.m = vmulq_f32(a.m, recip); return a; }
|
||||
VM_INLINE float3 operator* (float a, float3 b) { b.m = vmulq_f32(vdupq_n_f32(a), b.m); return b; }
|
||||
VM_INLINE float3 operator/ (float a, float3 b) { float32x4_t recip = rcp_2(b.m); b.m = vmulq_f32(vdupq_n_f32(a), recip); return b; }
|
||||
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
|
||||
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
|
||||
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = vceqq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
|
||||
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = vcltq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = vcleq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 min(float3 a, float3 b) { a.m = vminq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 max(float3 a, float3 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float3 operator- (float3 a) { a.m = vnegq_f32(a.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float3 v)
|
||||
{
|
||||
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
|
||||
return vget_lane_f32(minOfMinOfHalfs, 0);
|
||||
}
|
||||
VM_INLINE float hmax(float3 v)
|
||||
{
|
||||
float32x2_t maxOfHalfs = vpmax_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t maxOfMaxOfHalfs = vpmax_f32(maxOfHalfs, maxOfHalfs);
|
||||
return vget_lane_f32(maxOfMaxOfHalfs, 0);
|
||||
}
|
||||
|
||||
VM_INLINE float3 cross(float3 a, float3 b)
|
||||
{
|
||||
// x <- a.y*b.z - a.z*b.y
|
||||
// y <- a.z*b.x - a.x*b.z
|
||||
// z <- a.x*b.y - a.y*b.x
|
||||
// We can save a shuffle by grouping it in this wacky order:
|
||||
return (a.zxy()*b - a*b.zxy()).zxy();
|
||||
}
|
||||
|
||||
// Returns a 3-bit code where bit0..bit2 is X..Z
|
||||
VM_INLINE unsigned mask(float3 v)
|
||||
{
|
||||
static const uint32x4_t movemask = { 1, 2, 4, 8 };
|
||||
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
|
||||
uint32x4_t t1 = vtstq_u32(t0, highbit);
|
||||
uint32x4_t t2 = vandq_u32(t1, movemask);
|
||||
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
|
||||
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
|
||||
}
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
|
||||
|
||||
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
|
||||
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
|
||||
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
|
||||
|
||||
|
||||
#endif // #else of #if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
#else // #if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
// ---- Simple scalar C implementation
|
||||
|
||||
|
||||
struct float3
|
||||
{
|
||||
float3() : x(0), y(0), z(0) {}
|
||||
float3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
|
||||
|
||||
float3 operator-() const { return float3(-x, -y, -z); }
|
||||
float3& operator+=(const float3& o) { x+=o.x; y+=o.y; z+=o.z; return *this; }
|
||||
float3& operator-=(const float3& o) { x-=o.x; y-=o.y; z-=o.z; return *this; }
|
||||
float3& operator*=(const float3& o) { x*=o.x; y*=o.y; z*=o.z; return *this; }
|
||||
float3& operator*=(float o) { x*=o; y*=o; z*=o; return *this; }
|
||||
|
||||
VM_INLINE float getX() const { return x; }
|
||||
VM_INLINE float getY() const { return y; }
|
||||
VM_INLINE float getZ() const { return z; }
|
||||
VM_INLINE void setX(float x_) { x = x_; }
|
||||
VM_INLINE void setY(float y_) { y = y_; }
|
||||
VM_INLINE void setZ(float z_) { z = z_; }
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
VM_INLINE float3 operator+(const float3& a, const float3& b) { return float3(a.x+b.x,a.y+b.y,a.z+b.z); }
|
||||
VM_INLINE float3 operator-(const float3& a, const float3& b) { return float3(a.x-b.x,a.y-b.y,a.z-b.z); }
|
||||
VM_INLINE float3 operator*(const float3& a, const float3& b) { return float3(a.x*b.x,a.y*b.y,a.z*b.z); }
|
||||
VM_INLINE float3 operator*(const float3& a, float b) { return float3(a.x*b,a.y*b,a.z*b); }
|
||||
VM_INLINE float3 operator*(float a, const float3& b) { return float3(a*b.x,a*b.y,a*b.z); }
|
||||
VM_INLINE float dot(const float3& a, const float3& b) { return a.x*b.x+a.y*b.y+a.z*b.z; }
|
||||
VM_INLINE float3 cross(const float3& a, const float3& b)
|
||||
{
|
||||
return float3(
|
||||
a.y*b.z - a.z*b.y,
|
||||
-(a.x*b.z - a.z*b.x),
|
||||
a.x*b.y - a.y*b.x
|
||||
);
|
||||
}
|
||||
#endif // #else of #if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
VM_INLINE float length(float3 v) { return sqrtf(dot(v, v)); }
|
||||
VM_INLINE float sqLength(float3 v) { return dot(v, v); }
|
||||
VM_INLINE float3 normalize(float3 v) { return v * (1.0f / length(v)); }
|
||||
VM_INLINE float3 lerp(float3 a, float3 b, float t) { return a + (b-a)*t; }
|
||||
|
||||
|
||||
inline void AssertUnit(float3 v)
|
||||
{
|
||||
assert(fabsf(sqLength(v) - 1.0f) < 0.01f);
|
||||
}
|
||||
|
||||
inline float3 reflect(float3 v, float3 n)
|
||||
{
|
||||
return v - 2*dot(v,n)*n;
|
||||
}
|
||||
|
||||
inline bool refract(float3 v, float3 n, float nint, float3& outRefracted)
|
||||
{
|
||||
AssertUnit(v);
|
||||
float dt = dot(v, n);
|
||||
float discr = 1.0f - nint*nint*(1-dt*dt);
|
||||
if (discr > 0)
|
||||
{
|
||||
outRefracted = nint * (v - n*dt) - n*sqrtf(discr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
inline float schlick(float cosine, float ri)
|
||||
{
|
||||
float r0 = (1-ri) / (1+ri);
|
||||
r0 = r0*r0;
|
||||
return r0 + (1-r0)*powf(1-cosine, 5);
|
||||
}
|
||||
|
||||
struct Ray
|
||||
{
|
||||
Ray() {}
|
||||
Ray(float3 orig_, float3 dir_) : orig(orig_), dir(dir_) { AssertUnit(dir); }
|
||||
|
||||
float3 pointAt(float t) const { return orig + dir * t; }
|
||||
|
||||
float3 orig;
|
||||
float3 dir;
|
||||
};
|
||||
|
||||
|
||||
struct Hit
|
||||
{
|
||||
float3 pos;
|
||||
float3 normal;
|
||||
float t;
|
||||
};
|
||||
|
||||
|
||||
struct Sphere
|
||||
{
|
||||
Sphere() : radius(1.0f), invRadius(0.0f) {}
|
||||
Sphere(float3 center_, float radius_) : center(center_), radius(radius_), invRadius(0.0f) {}
|
||||
|
||||
void UpdateDerivedData() { invRadius = 1.0f/radius; }
|
||||
|
||||
float3 center;
|
||||
float radius;
|
||||
float invRadius;
|
||||
};
|
||||
|
||||
|
||||
// data for all spheres in a "structure of arrays" layout
|
||||
struct SpheresSoA
|
||||
{
|
||||
SpheresSoA(int c)
|
||||
{
|
||||
count = c;
|
||||
// we'll be processing spheres in kSimdWidth chunks, so make sure to allocate
|
||||
// enough space
|
||||
simdCount = (c + (kSimdWidth - 1)) / kSimdWidth * kSimdWidth;
|
||||
centerX = new float[simdCount];
|
||||
centerY = new float[simdCount];
|
||||
centerZ = new float[simdCount];
|
||||
sqRadius = new float[simdCount];
|
||||
invRadius = new float[simdCount];
|
||||
// set all data to "impossible sphere" state
|
||||
for (int i = count; i < simdCount; ++i)
|
||||
{
|
||||
centerX[i] = centerY[i] = centerZ[i] = 10000.0f;
|
||||
sqRadius[i] = 0.0f;
|
||||
invRadius[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
~SpheresSoA()
|
||||
{
|
||||
delete[] centerX;
|
||||
delete[] centerY;
|
||||
delete[] centerZ;
|
||||
delete[] sqRadius;
|
||||
delete[] invRadius;
|
||||
}
|
||||
float* centerX;
|
||||
float* centerY;
|
||||
float* centerZ;
|
||||
float* sqRadius;
|
||||
float* invRadius;
|
||||
int simdCount;
|
||||
int count;
|
||||
};
|
||||
|
||||
|
||||
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit);
|
||||
|
||||
float RandomFloat01(uint32_t& state);
|
||||
float3 RandomInUnitDisk(uint32_t& state);
|
||||
float3 RandomInUnitSphere(uint32_t& state);
|
||||
float3 RandomUnitVector(uint32_t& state);
|
||||
|
||||
struct Camera
|
||||
{
|
||||
Camera() {}
|
||||
// vfov is top to bottom in degrees
|
||||
Camera(const float3& lookFrom, const float3& lookAt, const float3& vup, float vfov, float aspect, float aperture, float focusDist)
|
||||
{
|
||||
lensRadius = aperture / 2;
|
||||
float theta = vfov*kPI/180;
|
||||
float halfHeight = tanf(theta/2);
|
||||
float halfWidth = aspect * halfHeight;
|
||||
origin = lookFrom;
|
||||
w = normalize(lookFrom - lookAt);
|
||||
u = normalize(cross(vup, w));
|
||||
v = cross(w, u);
|
||||
lowerLeftCorner = origin - halfWidth*focusDist*u - halfHeight*focusDist*v - focusDist*w;
|
||||
horizontal = 2*halfWidth*focusDist*u;
|
||||
vertical = 2*halfHeight*focusDist*v;
|
||||
}
|
||||
|
||||
Ray GetRay(float s, float t, uint32_t& state) const
|
||||
{
|
||||
float3 rd = lensRadius * RandomInUnitDisk(state);
|
||||
float3 offset = u * rd.getX() + v * rd.getY();
|
||||
return Ray(origin + offset, normalize(lowerLeftCorner + s*horizontal + t*vertical - origin - offset));
|
||||
}
|
||||
|
||||
float3 origin;
|
||||
float3 lowerLeftCorner;
|
||||
float3 horizontal;
|
||||
float3 vertical;
|
||||
float3 u, v, w;
|
||||
float lensRadius;
|
||||
};
|
||||
|
||||
392
libs/tracy/examples/ToyPathTracer/Source/Test.cpp
Normal file
@ -0,0 +1,392 @@
|
||||
#include "Config.h"
|
||||
#include "Test.h"
|
||||
#include "Maths.h"
|
||||
#include <algorithm>
|
||||
#if CPU_CAN_DO_THREADS
|
||||
#include "enkiTS/TaskScheduler_c.h"
|
||||
#include <thread>
|
||||
#endif
|
||||
#include <atomic>
|
||||
|
||||
#include "../../../public/tracy/Tracy.hpp"
|
||||
|
||||
// 46 spheres (2 emissive) when enabled; 9 spheres (1 emissive) when disabled
|
||||
#define DO_BIG_SCENE 1
|
||||
|
||||
static Sphere s_Spheres[] =
|
||||
{
|
||||
{float3(0,-100.5,-1), 100},
|
||||
{float3(2,0,-1), 0.5f},
|
||||
{float3(0,0,-1), 0.5f},
|
||||
{float3(-2,0,-1), 0.5f},
|
||||
{float3(2,0,1), 0.5f},
|
||||
{float3(0,0,1), 0.5f},
|
||||
{float3(-2,0,1), 0.5f},
|
||||
{float3(0.5f,1,0.5f), 0.5f},
|
||||
{float3(-1.5f,1.5f,0.f), 0.3f},
|
||||
#if DO_BIG_SCENE
|
||||
{float3(4,0,-3), 0.5f}, {float3(3,0,-3), 0.5f}, {float3(2,0,-3), 0.5f}, {float3(1,0,-3), 0.5f}, {float3(0,0,-3), 0.5f}, {float3(-1,0,-3), 0.5f}, {float3(-2,0,-3), 0.5f}, {float3(-3,0,-3), 0.5f}, {float3(-4,0,-3), 0.5f},
|
||||
{float3(4,0,-4), 0.5f}, {float3(3,0,-4), 0.5f}, {float3(2,0,-4), 0.5f}, {float3(1,0,-4), 0.5f}, {float3(0,0,-4), 0.5f}, {float3(-1,0,-4), 0.5f}, {float3(-2,0,-4), 0.5f}, {float3(-3,0,-4), 0.5f}, {float3(-4,0,-4), 0.5f},
|
||||
{float3(4,0,-5), 0.5f}, {float3(3,0,-5), 0.5f}, {float3(2,0,-5), 0.5f}, {float3(1,0,-5), 0.5f}, {float3(0,0,-5), 0.5f}, {float3(-1,0,-5), 0.5f}, {float3(-2,0,-5), 0.5f}, {float3(-3,0,-5), 0.5f}, {float3(-4,0,-5), 0.5f},
|
||||
{float3(4,0,-6), 0.5f}, {float3(3,0,-6), 0.5f}, {float3(2,0,-6), 0.5f}, {float3(1,0,-6), 0.5f}, {float3(0,0,-6), 0.5f}, {float3(-1,0,-6), 0.5f}, {float3(-2,0,-6), 0.5f}, {float3(-3,0,-6), 0.5f}, {float3(-4,0,-6), 0.5f},
|
||||
{float3(1.5f,1.5f,-2), 0.3f},
|
||||
#endif // #if DO_BIG_SCENE
|
||||
};
|
||||
const int kSphereCount = sizeof(s_Spheres) / sizeof(s_Spheres[0]);
|
||||
|
||||
static SpheresSoA s_SpheresSoA(kSphereCount);
|
||||
|
||||
struct Material
|
||||
{
|
||||
enum Type { Lambert, Metal, Dielectric };
|
||||
Type type;
|
||||
float3 albedo;
|
||||
float3 emissive;
|
||||
float roughness;
|
||||
float ri;
|
||||
};
|
||||
|
||||
static Material s_SphereMats[kSphereCount] =
|
||||
{
|
||||
{ Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.8f, 0.4f, 0.4f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.4f, 0.4f, 0.8f), float3(0,0,0), 0, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.2f, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.6f, 0 },
|
||||
{ Material::Dielectric, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 1.5f },
|
||||
{ Material::Lambert, float3(0.8f, 0.6f, 0.2f), float3(30,25,15), 0, 0 },
|
||||
#if DO_BIG_SCENE
|
||||
{ Material::Lambert, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.1f, 0.2f, 0.5f), float3(3,10,20), 0, 0 },
|
||||
#endif
|
||||
};
|
||||
|
||||
static int s_EmissiveSpheres[kSphereCount];
|
||||
static int s_EmissiveSphereCount;
|
||||
|
||||
static Camera s_Cam;
|
||||
|
||||
const float kMinT = 0.001f;
|
||||
const float kMaxT = 1.0e7f;
|
||||
const int kMaxDepth = 10;
|
||||
|
||||
|
||||
bool HitWorld(const Ray& r, float tMin, float tMax, Hit& outHit, int& outID)
|
||||
{
|
||||
outID = HitSpheres(r, s_SpheresSoA, tMin, tMax, outHit);
|
||||
return outID != -1;
|
||||
}
|
||||
|
||||
|
||||
static bool Scatter(const Material& mat, const Ray& r_in, const Hit& rec, float3& attenuation, Ray& scattered, float3& outLightE, int& inoutRayCount, uint32_t& state)
|
||||
{
|
||||
ZoneScoped;
|
||||
outLightE = float3(0,0,0);
|
||||
if (mat.type == Material::Lambert)
|
||||
{
|
||||
// random point on unit sphere that is tangent to the hit point
|
||||
float3 target = rec.pos + rec.normal + RandomUnitVector(state);
|
||||
scattered = Ray(rec.pos, normalize(target - rec.pos));
|
||||
attenuation = mat.albedo;
|
||||
|
||||
// sample lights
|
||||
#if DO_LIGHT_SAMPLING
|
||||
for (int j = 0; j < s_EmissiveSphereCount; ++j)
|
||||
{
|
||||
int i = s_EmissiveSpheres[j];
|
||||
const Material& smat = s_SphereMats[i];
|
||||
if (&mat == &smat)
|
||||
continue; // skip self
|
||||
const Sphere& s = s_Spheres[i];
|
||||
|
||||
// create a random direction towards sphere
|
||||
// coord system for sampling: sw, su, sv
|
||||
float3 sw = normalize(s.center - rec.pos);
|
||||
float3 su = normalize(cross(fabs(sw.getX())>0.01f ? float3(0,1,0):float3(1,0,0), sw));
|
||||
float3 sv = cross(sw, su);
|
||||
// sample sphere by solid angle
|
||||
float cosAMax = sqrtf(1.0f - s.radius*s.radius / sqLength(rec.pos-s.center));
|
||||
float eps1 = RandomFloat01(state), eps2 = RandomFloat01(state);
|
||||
float cosA = 1.0f - eps1 + eps1 * cosAMax;
|
||||
float sinA = sqrtf(1.0f - cosA*cosA);
|
||||
float phi = 2 * kPI * eps2;
|
||||
float3 l = su * (cosf(phi) * sinA) + sv * (sinf(phi) * sinA) + sw * cosA;
|
||||
//l = normalize(l); // NOTE(fg): This is already normalized, by construction.
|
||||
|
||||
// shoot shadow ray
|
||||
Hit lightHit;
|
||||
int hitID;
|
||||
++inoutRayCount;
|
||||
if (HitWorld(Ray(rec.pos, l), kMinT, kMaxT, lightHit, hitID) && hitID == i)
|
||||
{
|
||||
float omega = 2 * kPI * (1-cosAMax);
|
||||
|
||||
float3 rdir = r_in.dir;
|
||||
AssertUnit(rdir);
|
||||
float3 nl = dot(rec.normal, rdir) < 0 ? rec.normal : -rec.normal;
|
||||
outLightE += (mat.albedo * smat.emissive) * (std::max(0.0f, dot(l, nl)) * omega / kPI);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
else if (mat.type == Material::Metal)
|
||||
{
|
||||
AssertUnit(r_in.dir); AssertUnit(rec.normal);
|
||||
float3 refl = reflect(r_in.dir, rec.normal);
|
||||
// reflected ray, and random inside of sphere based on roughness
|
||||
float roughness = mat.roughness;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
roughness = 0; // until we get better BRDF for metals
|
||||
#endif
|
||||
scattered = Ray(rec.pos, normalize(refl + roughness*RandomInUnitSphere(state)));
|
||||
attenuation = mat.albedo;
|
||||
return dot(scattered.dir, rec.normal) > 0;
|
||||
}
|
||||
else if (mat.type == Material::Dielectric)
|
||||
{
|
||||
AssertUnit(r_in.dir); AssertUnit(rec.normal);
|
||||
float3 outwardN;
|
||||
float3 rdir = r_in.dir;
|
||||
float3 refl = reflect(rdir, rec.normal);
|
||||
float nint;
|
||||
attenuation = float3(1,1,1);
|
||||
float3 refr;
|
||||
float reflProb;
|
||||
float cosine;
|
||||
if (dot(rdir, rec.normal) > 0)
|
||||
{
|
||||
outwardN = -rec.normal;
|
||||
nint = mat.ri;
|
||||
cosine = mat.ri * dot(rdir, rec.normal);
|
||||
}
|
||||
else
|
||||
{
|
||||
outwardN = rec.normal;
|
||||
nint = 1.0f / mat.ri;
|
||||
cosine = -dot(rdir, rec.normal);
|
||||
}
|
||||
if (refract(rdir, outwardN, nint, refr))
|
||||
{
|
||||
reflProb = schlick(cosine, mat.ri);
|
||||
}
|
||||
else
|
||||
{
|
||||
reflProb = 1;
|
||||
}
|
||||
if (RandomFloat01(state) < reflProb)
|
||||
scattered = Ray(rec.pos, normalize(refl));
|
||||
else
|
||||
scattered = Ray(rec.pos, normalize(refr));
|
||||
}
|
||||
else
|
||||
{
|
||||
attenuation = float3(1,0,1);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static float3 Trace(const Ray& r, int depth, int& inoutRayCount, uint32_t& state, bool doMaterialE = true)
|
||||
{
|
||||
ZoneScoped;
|
||||
Hit rec;
|
||||
int id = 0;
|
||||
++inoutRayCount;
|
||||
if (HitWorld(r, kMinT, kMaxT, rec, id))
|
||||
{
|
||||
Ray scattered;
|
||||
float3 attenuation;
|
||||
float3 lightE;
|
||||
const Material& mat = s_SphereMats[id];
|
||||
float3 matE = mat.emissive;
|
||||
if (depth < kMaxDepth && Scatter(mat, r, rec, attenuation, scattered, lightE, inoutRayCount, state))
|
||||
{
|
||||
#if DO_LIGHT_SAMPLING
|
||||
if (!doMaterialE) matE = float3(0,0,0); // don't add material emission if told so
|
||||
// dor Lambert materials, we just did explicit light (emissive) sampling and already
|
||||
// for their contribution, so if next ray bounce hits the light again, don't add
|
||||
// emission
|
||||
doMaterialE = (mat.type != Material::Lambert);
|
||||
#endif
|
||||
return matE + lightE + attenuation * Trace(scattered, depth+1, inoutRayCount, state, doMaterialE);
|
||||
}
|
||||
else
|
||||
{
|
||||
return matE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// sky
|
||||
#if DO_MITSUBA_COMPARE
|
||||
return float3(0.15f,0.21f,0.3f); // easier compare with Mitsuba's constant environment light
|
||||
#else
|
||||
float3 unitDir = r.dir;
|
||||
float t = 0.5f*(unitDir.getY() + 1.0f);
|
||||
return ((1.0f-t)*float3(1.0f, 1.0f, 1.0f) + t*float3(0.5f, 0.7f, 1.0f)) * 0.3f;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if CPU_CAN_DO_THREADS
|
||||
static enkiTaskScheduler* g_TS;
|
||||
#endif
|
||||
|
||||
void InitializeTest()
|
||||
{
|
||||
ZoneScoped;
|
||||
#if CPU_CAN_DO_THREADS
|
||||
g_TS = enkiNewTaskScheduler();
|
||||
enkiInitTaskSchedulerNumThreads(g_TS, std::max<int>( 2, std::thread::hardware_concurrency() - 2));
|
||||
#endif
|
||||
}
|
||||
|
||||
void ShutdownTest()
|
||||
{
|
||||
ZoneScoped;
|
||||
#if CPU_CAN_DO_THREADS
|
||||
enkiDeleteTaskScheduler(g_TS);
|
||||
#endif
|
||||
}
|
||||
|
||||
struct JobData
|
||||
{
|
||||
float time;
|
||||
int frameCount;
|
||||
int screenWidth, screenHeight;
|
||||
float* backbuffer;
|
||||
Camera* cam;
|
||||
std::atomic<int> rayCount;
|
||||
unsigned testFlags;
|
||||
};
|
||||
|
||||
static void TraceRowJob(uint32_t start, uint32_t end, uint32_t threadnum, void* data_)
|
||||
{
|
||||
ZoneScoped;
|
||||
JobData& data = *(JobData*)data_;
|
||||
float* backbuffer = data.backbuffer + start * data.screenWidth * 4;
|
||||
float invWidth = 1.0f / data.screenWidth;
|
||||
float invHeight = 1.0f / data.screenHeight;
|
||||
float lerpFac = float(data.frameCount) / float(data.frameCount+1);
|
||||
if (data.testFlags & kFlagAnimate)
|
||||
lerpFac *= DO_ANIMATE_SMOOTHING;
|
||||
if (!(data.testFlags & kFlagProgressive))
|
||||
lerpFac = 0;
|
||||
int rayCount = 0;
|
||||
for (uint32_t y = start; y < end; ++y)
|
||||
{
|
||||
uint32_t state = (y * 9781 + data.frameCount * 6271) | 1;
|
||||
for (int x = 0; x < data.screenWidth; ++x)
|
||||
{
|
||||
float3 col(0, 0, 0);
|
||||
for (int s = 0; s < DO_SAMPLES_PER_PIXEL; s++)
|
||||
{
|
||||
float u = float(x + RandomFloat01(state)) * invWidth;
|
||||
float v = float(y + RandomFloat01(state)) * invHeight;
|
||||
Ray r = data.cam->GetRay(u, v, state);
|
||||
col += Trace(r, 0, rayCount, state);
|
||||
}
|
||||
col *= 1.0f / float(DO_SAMPLES_PER_PIXEL);
|
||||
|
||||
float3 prev(backbuffer[0], backbuffer[1], backbuffer[2]);
|
||||
col = prev * lerpFac + col * (1-lerpFac);
|
||||
col.store(backbuffer);
|
||||
backbuffer += 4;
|
||||
}
|
||||
}
|
||||
data.rayCount += rayCount;
|
||||
}
|
||||
|
||||
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags)
|
||||
{
|
||||
ZoneScoped;
|
||||
if (testFlags & kFlagAnimate)
|
||||
{
|
||||
s_Spheres[1].center.setY(cosf(time) + 1.0f);
|
||||
s_Spheres[8].center.setZ(sinf(time)*0.3f);
|
||||
}
|
||||
float3 lookfrom(0, 2, 3);
|
||||
float3 lookat(0, 0, 0);
|
||||
float distToFocus = 3;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
float aperture = 0.0f;
|
||||
#else
|
||||
float aperture = 0.1f;
|
||||
#endif
|
||||
#if DO_BIG_SCENE
|
||||
aperture *= 0.2f;
|
||||
#endif
|
||||
|
||||
s_EmissiveSphereCount = 0;
|
||||
for (int i = 0; i < kSphereCount; ++i)
|
||||
{
|
||||
Sphere& s = s_Spheres[i];
|
||||
s.UpdateDerivedData();
|
||||
s_SpheresSoA.centerX[i] = s.center.getX();
|
||||
s_SpheresSoA.centerY[i] = s.center.getY();
|
||||
s_SpheresSoA.centerZ[i] = s.center.getZ();
|
||||
s_SpheresSoA.sqRadius[i] = s.radius * s.radius;
|
||||
s_SpheresSoA.invRadius[i] = s.invRadius;
|
||||
|
||||
// Remember IDs of emissive spheres (light sources)
|
||||
const Material& smat = s_SphereMats[i];
|
||||
if (smat.emissive.getX() > 0 || smat.emissive.getY() > 0 || smat.emissive.getZ() > 0)
|
||||
{
|
||||
s_EmissiveSpheres[s_EmissiveSphereCount] = i;
|
||||
s_EmissiveSphereCount++;
|
||||
}
|
||||
}
|
||||
|
||||
s_Cam = Camera(lookfrom, lookat, float3(0, 1, 0), 60, float(screenWidth) / float(screenHeight), aperture, distToFocus);
|
||||
}
|
||||
|
||||
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags)
|
||||
{
|
||||
ZoneScoped;
|
||||
JobData args;
|
||||
args.time = time;
|
||||
args.frameCount = frameCount;
|
||||
args.screenWidth = screenWidth;
|
||||
args.screenHeight = screenHeight;
|
||||
args.backbuffer = backbuffer;
|
||||
args.cam = &s_Cam;
|
||||
args.testFlags = testFlags;
|
||||
args.rayCount = 0;
|
||||
|
||||
#if CPU_CAN_DO_THREADS
|
||||
enkiTaskSet* task = enkiCreateTaskSet(g_TS, TraceRowJob);
|
||||
bool threaded = true;
|
||||
enkiAddTaskSetToPipeMinRange(g_TS, task, &args, screenHeight, threaded ? 4 : screenHeight);
|
||||
enkiWaitForTaskSet(g_TS, task);
|
||||
enkiDeleteTaskSet(task);
|
||||
#else
|
||||
TraceRowJob(0, screenHeight, 0, &args);
|
||||
#endif
|
||||
|
||||
outRayCount = args.rayCount;
|
||||
}
|
||||
|
||||
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize)
|
||||
{
|
||||
ZoneScoped;
|
||||
outCount = kSphereCount;
|
||||
outObjectSize = sizeof(Sphere);
|
||||
outMaterialSize = sizeof(Material);
|
||||
outCamSize = sizeof(Camera);
|
||||
}
|
||||
|
||||
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount)
|
||||
{
|
||||
ZoneScoped;
|
||||
memcpy(outObjects, s_Spheres, kSphereCount * sizeof(s_Spheres[0]));
|
||||
memcpy(outMaterials, s_SphereMats, kSphereCount * sizeof(s_SphereMats[0]));
|
||||
memcpy(outCam, &s_Cam, sizeof(s_Cam));
|
||||
memcpy(outEmissives, s_EmissiveSpheres, s_EmissiveSphereCount * sizeof(s_EmissiveSpheres[0]));
|
||||
*outEmissiveCount = s_EmissiveSphereCount;
|
||||
}
|
||||
17
libs/tracy/examples/ToyPathTracer/Source/Test.h
Normal file
@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
enum TestFlags
|
||||
{
|
||||
kFlagAnimate = (1 << 0),
|
||||
kFlagProgressive = (1 << 1),
|
||||
};
|
||||
|
||||
void InitializeTest();
|
||||
void ShutdownTest();
|
||||
|
||||
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags);
|
||||
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags);
|
||||
|
||||
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize);
|
||||
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount);
|
||||
79
libs/tracy/examples/ToyPathTracer/Source/enkiTS/Atomics.h
Normal file
@ -0,0 +1,79 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <Windows.h>
|
||||
#undef GetObject
|
||||
#include <intrin.h>
|
||||
|
||||
extern "C" void _ReadWriteBarrier();
|
||||
#pragma intrinsic(_ReadWriteBarrier)
|
||||
#pragma intrinsic(_InterlockedCompareExchange)
|
||||
#pragma intrinsic(_InterlockedExchangeAdd)
|
||||
|
||||
// Memory Barriers to prevent CPU and Compiler re-ordering
|
||||
#define BASE_MEMORYBARRIER_ACQUIRE() _ReadWriteBarrier()
|
||||
#define BASE_MEMORYBARRIER_RELEASE() _ReadWriteBarrier()
|
||||
#define BASE_ALIGN(x) __declspec( align( x ) )
|
||||
|
||||
#else
|
||||
#define BASE_MEMORYBARRIER_ACQUIRE() __asm__ __volatile__("": : :"memory")
|
||||
#define BASE_MEMORYBARRIER_RELEASE() __asm__ __volatile__("": : :"memory")
|
||||
#define BASE_ALIGN(x) __attribute__ ((aligned( x )))
|
||||
#endif
|
||||
|
||||
namespace enki
|
||||
{
|
||||
// Atomically performs: if( *pDest == compareWith ) { *pDest = swapTo; }
|
||||
// returns old *pDest (so if successfull, returns compareWith)
|
||||
inline uint32_t AtomicCompareAndSwap( volatile uint32_t* pDest, uint32_t swapTo, uint32_t compareWith )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
|
||||
return _InterlockedCompareExchange( (volatile long*)pDest,swapTo, compareWith );
|
||||
#else
|
||||
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint64_t AtomicCompareAndSwap( volatile uint64_t* pDest, uint64_t swapTo, uint64_t compareWith )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
|
||||
return _InterlockedCompareExchange64( (__int64 volatile*)pDest, swapTo, compareWith );
|
||||
#else
|
||||
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
|
||||
#endif
|
||||
}
|
||||
|
||||
// Atomically performs: tmp = *pDest; *pDest += value; return tmp;
|
||||
inline int32_t AtomicAdd( volatile int32_t* pDest, int32_t value )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return _InterlockedExchangeAdd( (long*)pDest, value );
|
||||
#else
|
||||
return __sync_fetch_and_add( pDest, value );
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,240 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "Atomics.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
namespace enki
|
||||
{
|
||||
// LockLessMultiReadPipe - Single writer, multiple reader thread safe pipe using (semi) lockless programming
|
||||
// Readers can only read from the back of the pipe
|
||||
// The single writer can write to the front of the pipe, and read from both ends (a writer can be a reader)
|
||||
// for many of the principles used here, see http://msdn.microsoft.com/en-us/library/windows/desktop/ee418650(v=vs.85).aspx
|
||||
// Note: using log2 sizes so we do not need to clamp (multi-operation)
|
||||
// T is the contained type
|
||||
// Note this is not true lockless as the use of flags as a form of lock state.
|
||||
template<uint8_t cSizeLog2, typename T> class LockLessMultiReadPipe
|
||||
{
|
||||
public:
|
||||
LockLessMultiReadPipe();
|
||||
~LockLessMultiReadPipe() {}
|
||||
|
||||
// ReaderTryReadBack returns false if we were unable to read
|
||||
// This is thread safe for both multiple readers and the writer
|
||||
bool ReaderTryReadBack( T* pOut );
|
||||
|
||||
// WriterTryReadFront returns false if we were unable to read
|
||||
// This is thread safe for the single writer, but should not be called by readers
|
||||
bool WriterTryReadFront( T* pOut );
|
||||
|
||||
// WriterTryWriteFront returns false if we were unable to write
|
||||
// This is thread safe for the single writer, but should not be called by readers
|
||||
bool WriterTryWriteFront( const T& in );
|
||||
|
||||
// IsPipeEmpty() is a utility function, not intended for general use
|
||||
// Should only be used very prudently.
|
||||
bool IsPipeEmpty() const
|
||||
{
|
||||
return 0 == m_WriteIndex - m_ReadCount;
|
||||
}
|
||||
|
||||
void Clear()
|
||||
{
|
||||
m_WriteIndex = 0;
|
||||
m_ReadIndex = 0;
|
||||
m_ReadCount = 0;
|
||||
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
|
||||
}
|
||||
|
||||
private:
|
||||
const static uint32_t ms_cSize = ( 1 << cSizeLog2 );
|
||||
const static uint32_t ms_cIndexMask = ms_cSize - 1;
|
||||
const static uint32_t FLAG_INVALID = 0xFFFFFFFF; // 32bit for CAS
|
||||
const static uint32_t FLAG_CAN_WRITE = 0x00000000; // 32bit for CAS
|
||||
const static uint32_t FLAG_CAN_READ = 0x11111111; // 32bit for CAS
|
||||
|
||||
T m_Buffer[ ms_cSize ];
|
||||
|
||||
// read and write indexes allow fast access to the pipe, but actual access
|
||||
// controlled by the access flags.
|
||||
volatile uint32_t BASE_ALIGN(4) m_WriteIndex;
|
||||
volatile uint32_t BASE_ALIGN(4) m_ReadCount;
|
||||
volatile uint32_t m_Flags[ ms_cSize ];
|
||||
volatile uint32_t BASE_ALIGN(4) m_ReadIndex;
|
||||
};
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
LockLessMultiReadPipe<cSizeLog2,T>::LockLessMultiReadPipe()
|
||||
: m_WriteIndex(0)
|
||||
, m_ReadIndex(0)
|
||||
, m_ReadCount(0)
|
||||
{
|
||||
assert( cSizeLog2 < 32 );
|
||||
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
|
||||
}
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::ReaderTryReadBack( T* pOut )
|
||||
{
|
||||
|
||||
uint32_t actualReadIndex;
|
||||
|
||||
uint32_t readCount = m_ReadCount;
|
||||
|
||||
// We get hold of read index for consistency,
|
||||
// and do first pass starting at read count
|
||||
uint32_t readIndexToUse = readCount;
|
||||
|
||||
|
||||
while(true)
|
||||
{
|
||||
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
// power of two sizes ensures we can use a simple calc without modulus
|
||||
uint32_t numInPipe = writeIndex - readCount;
|
||||
if( 0 == numInPipe )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if( readIndexToUse >= writeIndex )
|
||||
{
|
||||
// move back to start
|
||||
readIndexToUse = m_ReadIndex;
|
||||
}
|
||||
|
||||
|
||||
// power of two sizes ensures we can perform AND for a modulus
|
||||
actualReadIndex = readIndexToUse & ms_cIndexMask;
|
||||
|
||||
// Multiple potential readers mean we should check if the data is valid,
|
||||
// using an atomic compare exchange
|
||||
uint32_t previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
|
||||
if( FLAG_CAN_READ == previous )
|
||||
{
|
||||
break;
|
||||
}
|
||||
++readIndexToUse;
|
||||
|
||||
//update known readcount
|
||||
readCount = m_ReadCount;
|
||||
}
|
||||
|
||||
// we update the read index using an atomic add, as we've only read one piece of data.
|
||||
// this ensure consistency of the read index, and the above loop ensures readers
|
||||
// only read from unread data
|
||||
AtomicAdd( (volatile int32_t*)&m_ReadCount, 1 );
|
||||
|
||||
BASE_MEMORYBARRIER_ACQUIRE();
|
||||
// now read data, ensuring we do so after above reads & CAS
|
||||
*pOut = m_Buffer[ actualReadIndex ];
|
||||
|
||||
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryReadFront( T* pOut )
|
||||
{
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
uint32_t frontReadIndex = writeIndex;
|
||||
|
||||
// Multiple potential readers mean we should check if the data is valid,
|
||||
// using an atomic compare exchange - which acts as a form of lock (so not quite lockless really).
|
||||
uint32_t previous = FLAG_INVALID;
|
||||
uint32_t actualReadIndex = 0;
|
||||
while( true )
|
||||
{
|
||||
// power of two sizes ensures we can use a simple calc without modulus
|
||||
uint32_t readCount = m_ReadCount;
|
||||
uint32_t numInPipe = writeIndex - readCount;
|
||||
if( 0 == numInPipe || 0 == frontReadIndex )
|
||||
{
|
||||
// frontReadIndex can get to 0 here if that item was just being read by another thread.
|
||||
m_ReadIndex = readCount;
|
||||
return false;
|
||||
}
|
||||
--frontReadIndex;
|
||||
actualReadIndex = frontReadIndex & ms_cIndexMask;
|
||||
previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
|
||||
if( FLAG_CAN_READ == previous )
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if( m_ReadIndex >= frontReadIndex )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// now read data, ensuring we do so after above reads & CAS
|
||||
*pOut = m_Buffer[ actualReadIndex ];
|
||||
|
||||
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
|
||||
|
||||
BASE_MEMORYBARRIER_RELEASE();
|
||||
|
||||
// 32-bit aligned stores are atomic, and writer owns the write index
|
||||
// we only move one back as this is as many as we have read, not where we have read from.
|
||||
--m_WriteIndex;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryWriteFront( const T& in )
|
||||
{
|
||||
// The writer 'owns' the write index, and readers can only reduce
|
||||
// the amount of data in the pipe.
|
||||
// We get hold of both values for consistency and to reduce false sharing
|
||||
// impacting more than one access
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
|
||||
|
||||
// power of two sizes ensures we can perform AND for a modulus
|
||||
uint32_t actualWriteIndex = writeIndex & ms_cIndexMask;
|
||||
|
||||
// a reader may still be reading this item, as there are multiple readers
|
||||
if( m_Flags[ actualWriteIndex ] != FLAG_CAN_WRITE )
|
||||
{
|
||||
return false; // still being read, so have caught up with tail.
|
||||
}
|
||||
|
||||
|
||||
// as we are the only writer we can update the data without atomics
|
||||
// whilst the write index has not been updated
|
||||
m_Buffer[ actualWriteIndex ] = in;
|
||||
m_Flags[ actualWriteIndex ] = FLAG_CAN_READ;
|
||||
|
||||
// We need to ensure the above writes occur prior to updating the write index,
|
||||
// otherwise another thread might read before it's finished
|
||||
BASE_MEMORYBARRIER_RELEASE();
|
||||
|
||||
// 32-bit aligned stores are atomic, and the writer controls the write index
|
||||
++writeIndex;
|
||||
m_WriteIndex = writeIndex;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,437 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "TaskScheduler.h"
|
||||
#include "LockLessMultiReadPipe.h"
|
||||
|
||||
|
||||
|
||||
using namespace enki;
|
||||
|
||||
|
||||
static const uint32_t PIPESIZE_LOG2 = 8;
|
||||
static const uint32_t SPIN_COUNT = 100;
|
||||
static const uint32_t SPIN_BACKOFF_MULTIPLIER = 10;
|
||||
static const uint32_t MAX_NUM_INITIAL_PARTITIONS = 8;
|
||||
|
||||
// each software thread gets it's own copy of gtl_threadNum, so this is safe to use as a static variable
|
||||
static THREAD_LOCAL uint32_t gtl_threadNum = 0;
|
||||
|
||||
namespace enki
|
||||
{
|
||||
struct SubTaskSet
|
||||
{
|
||||
ITaskSet* pTask;
|
||||
TaskSetPartition partition;
|
||||
};
|
||||
|
||||
// we derive class TaskPipe rather than typedef to get forward declaration working easily
|
||||
class TaskPipe : public LockLessMultiReadPipe<PIPESIZE_LOG2,enki::SubTaskSet> {};
|
||||
|
||||
struct ThreadArgs
|
||||
{
|
||||
uint32_t threadNum;
|
||||
TaskScheduler* pTaskScheduler;
|
||||
};
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
SubTaskSet SplitTask( SubTaskSet& subTask_, uint32_t rangeToSplit_ )
|
||||
{
|
||||
SubTaskSet splitTask = subTask_;
|
||||
uint32_t rangeLeft = subTask_.partition.end - subTask_.partition.start;
|
||||
|
||||
if( rangeToSplit_ > rangeLeft )
|
||||
{
|
||||
rangeToSplit_ = rangeLeft;
|
||||
}
|
||||
splitTask.partition.end = subTask_.partition.start + rangeToSplit_;
|
||||
subTask_.partition.start = splitTask.partition.end;
|
||||
return splitTask;
|
||||
}
|
||||
|
||||
#if defined _WIN32
|
||||
#if defined _M_IX86 || defined _M_X64
|
||||
#pragma intrinsic(_mm_pause)
|
||||
inline void Pause() { _mm_pause(); }
|
||||
#endif
|
||||
#elif defined __i386__ || defined __x86_64__
|
||||
inline void Pause() { __asm__ __volatile__("pause;"); }
|
||||
#else
|
||||
inline void Pause() { ;} // may have NOP or yield equiv
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void SafeCallback(ProfilerCallbackFunc func_, uint32_t threadnum_)
|
||||
{
|
||||
if( func_ )
|
||||
{
|
||||
func_(threadnum_);
|
||||
}
|
||||
}
|
||||
|
||||
ProfilerCallbacks* TaskScheduler::GetProfilerCallbacks()
|
||||
{
|
||||
return &m_ProfilerCallbacks;
|
||||
}
|
||||
|
||||
THREADFUNC_DECL TaskScheduler::TaskingThreadFunction( void* pArgs )
|
||||
{
|
||||
ThreadArgs args = *(ThreadArgs*)pArgs;
|
||||
uint32_t threadNum = args.threadNum;
|
||||
TaskScheduler* pTS = args.pTaskScheduler;
|
||||
gtl_threadNum = threadNum;
|
||||
|
||||
SafeCallback( pTS->m_ProfilerCallbacks.threadStart, threadNum );
|
||||
|
||||
uint32_t spinCount = 0;
|
||||
uint32_t hintPipeToCheck_io = threadNum + 1; // does not need to be clamped.
|
||||
while( pTS->m_bRunning )
|
||||
{
|
||||
if(!pTS->TryRunTask( threadNum, hintPipeToCheck_io ) )
|
||||
{
|
||||
// no tasks, will spin then wait
|
||||
++spinCount;
|
||||
if( spinCount > SPIN_COUNT )
|
||||
{
|
||||
pTS->WaitForTasks( threadNum );
|
||||
spinCount = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t spinBackoffCount = spinCount * SPIN_BACKOFF_MULTIPLIER;
|
||||
while( spinBackoffCount )
|
||||
{
|
||||
Pause();
|
||||
--spinBackoffCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
spinCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
AtomicAdd( &pTS->m_NumThreadsRunning, -1 );
|
||||
SafeCallback( pTS->m_ProfilerCallbacks.threadStop, threadNum );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void TaskScheduler::StartThreads()
|
||||
{
|
||||
if( m_bHaveThreads )
|
||||
{
|
||||
return;
|
||||
}
|
||||
m_bRunning = true;
|
||||
|
||||
SemaphoreCreate( m_NewTaskSemaphore );
|
||||
|
||||
// we create one less thread than m_NumThreads as the main thread counts as one
|
||||
m_pThreadNumStore = new ThreadArgs[m_NumThreads];
|
||||
m_pThreadIDs = new threadid_t[m_NumThreads];
|
||||
m_pThreadNumStore[0].threadNum = 0;
|
||||
m_pThreadNumStore[0].pTaskScheduler = this;
|
||||
m_pThreadIDs[0] = 0;
|
||||
m_NumThreadsWaiting = 0;
|
||||
m_NumThreadsRunning = 1;// acount for main thread
|
||||
for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
m_pThreadNumStore[thread].threadNum = thread;
|
||||
m_pThreadNumStore[thread].pTaskScheduler = this;
|
||||
ThreadCreate( &m_pThreadIDs[thread], TaskingThreadFunction, &m_pThreadNumStore[thread] );
|
||||
++m_NumThreadsRunning;
|
||||
}
|
||||
|
||||
// ensure we have sufficient tasks to equally fill either all threads including main
|
||||
// or just the threads we've launched, this is outside the firstinit as we want to be able
|
||||
// to runtime change it
|
||||
if( 1 == m_NumThreads )
|
||||
{
|
||||
m_NumPartitions = 1;
|
||||
m_NumInitialPartitions = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_NumPartitions = m_NumThreads * (m_NumThreads - 1);
|
||||
m_NumInitialPartitions = m_NumThreads - 1;
|
||||
if( m_NumInitialPartitions > MAX_NUM_INITIAL_PARTITIONS )
|
||||
{
|
||||
m_NumInitialPartitions = MAX_NUM_INITIAL_PARTITIONS;
|
||||
}
|
||||
}
|
||||
|
||||
m_bHaveThreads = true;
|
||||
}
|
||||
|
||||
void TaskScheduler::StopThreads( bool bWait_ )
|
||||
{
|
||||
if( m_bHaveThreads )
|
||||
{
|
||||
// wait for them threads quit before deleting data
|
||||
m_bRunning = false;
|
||||
while( bWait_ && m_NumThreadsRunning > 1 )
|
||||
{
|
||||
// keep firing event to ensure all threads pick up state of m_bRunning
|
||||
SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsRunning );
|
||||
}
|
||||
|
||||
for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
ThreadTerminate( m_pThreadIDs[thread] );
|
||||
}
|
||||
|
||||
m_NumThreads = 0;
|
||||
delete[] m_pThreadNumStore;
|
||||
delete[] m_pThreadIDs;
|
||||
m_pThreadNumStore = 0;
|
||||
m_pThreadIDs = 0;
|
||||
SemaphoreClose( m_NewTaskSemaphore );
|
||||
|
||||
m_bHaveThreads = false;
|
||||
m_NumThreadsWaiting = 0;
|
||||
m_NumThreadsRunning = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool TaskScheduler::TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ )
|
||||
{
|
||||
// check for tasks
|
||||
SubTaskSet subTask;
|
||||
bool bHaveTask = m_pPipesPerThread[ threadNum ].WriterTryReadFront( &subTask );
|
||||
|
||||
uint32_t threadToCheck = hintPipeToCheck_io_;
|
||||
uint32_t checkCount = 0;
|
||||
while( !bHaveTask && checkCount < m_NumThreads )
|
||||
{
|
||||
threadToCheck = ( hintPipeToCheck_io_ + checkCount ) % m_NumThreads;
|
||||
if( threadToCheck != threadNum )
|
||||
{
|
||||
bHaveTask = m_pPipesPerThread[ threadToCheck ].ReaderTryReadBack( &subTask );
|
||||
}
|
||||
++checkCount;
|
||||
}
|
||||
|
||||
if( bHaveTask )
|
||||
{
|
||||
// update hint, will preserve value unless actually got task from another thread.
|
||||
hintPipeToCheck_io_ = threadToCheck;
|
||||
|
||||
uint32_t partitionSize = subTask.partition.end - subTask.partition.start;
|
||||
if( subTask.pTask->m_RangeToRun < partitionSize )
|
||||
{
|
||||
SubTaskSet taskToRun = SplitTask( subTask, subTask.pTask->m_RangeToRun );
|
||||
SplitAndAddTask( gtl_threadNum, subTask, subTask.pTask->m_RangeToRun, 0 );
|
||||
taskToRun.pTask->ExecuteRange( taskToRun.partition, threadNum );
|
||||
AtomicAdd( &taskToRun.pTask->m_RunningCount, -1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// the task has already been divided up by AddTaskSetToPipe, so just run it
|
||||
subTask.pTask->ExecuteRange( subTask.partition, threadNum );
|
||||
AtomicAdd( &subTask.pTask->m_RunningCount, -1 );
|
||||
}
|
||||
}
|
||||
|
||||
return bHaveTask;
|
||||
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitForTasks( uint32_t threadNum )
|
||||
{
|
||||
// We incrememt the number of threads waiting here in order
|
||||
// to ensure that the check for tasks occurs after the increment
|
||||
// to prevent a task being added after a check, then the thread waiting.
|
||||
// This will occasionally result in threads being mistakenly awoken,
|
||||
// but they will then go back to sleep.
|
||||
AtomicAdd( &m_NumThreadsWaiting, 1 );
|
||||
|
||||
bool bHaveTasks = false;
|
||||
for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
|
||||
{
|
||||
bHaveTasks = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( !bHaveTasks )
|
||||
{
|
||||
SafeCallback( m_ProfilerCallbacks.waitStart, threadNum );
|
||||
SemaphoreWait( m_NewTaskSemaphore );
|
||||
SafeCallback( m_ProfilerCallbacks.waitStop, threadNum );
|
||||
}
|
||||
|
||||
int32_t prev = AtomicAdd( &m_NumThreadsWaiting, -1 );
|
||||
assert( prev != 0 );
|
||||
}
|
||||
|
||||
void TaskScheduler::WakeThreads()
|
||||
{
|
||||
SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsWaiting );
|
||||
}
|
||||
|
||||
void TaskScheduler::SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
|
||||
uint32_t rangeToSplit_, int32_t runningCountOffset_ )
|
||||
{
|
||||
int32_t numAdded = 0;
|
||||
while( subTask_.partition.start != subTask_.partition.end )
|
||||
{
|
||||
SubTaskSet taskToAdd = SplitTask( subTask_, rangeToSplit_ );
|
||||
|
||||
// add the partition to the pipe
|
||||
++numAdded;
|
||||
if( !m_pPipesPerThread[ gtl_threadNum ].WriterTryWriteFront( taskToAdd ) )
|
||||
{
|
||||
if( numAdded > 1 )
|
||||
{
|
||||
WakeThreads();
|
||||
}
|
||||
// alter range to run the appropriate fraction
|
||||
if( taskToAdd.pTask->m_RangeToRun < rangeToSplit_ )
|
||||
{
|
||||
taskToAdd.partition.end = taskToAdd.partition.start + taskToAdd.pTask->m_RangeToRun;
|
||||
subTask_.partition.start = taskToAdd.partition.end;
|
||||
}
|
||||
taskToAdd.pTask->ExecuteRange( taskToAdd.partition, threadNum_ );
|
||||
--numAdded;
|
||||
}
|
||||
}
|
||||
|
||||
// increment running count by number added
|
||||
AtomicAdd( &subTask_.pTask->m_RunningCount, numAdded + runningCountOffset_ );
|
||||
|
||||
WakeThreads();
|
||||
}
|
||||
|
||||
void TaskScheduler::AddTaskSetToPipe( ITaskSet* pTaskSet )
|
||||
{
|
||||
// set running count to -1 to guarantee it won't be found complete until all subtasks added
|
||||
pTaskSet->m_RunningCount = -1;
|
||||
|
||||
// divide task up and add to pipe
|
||||
pTaskSet->m_RangeToRun = pTaskSet->m_SetSize / m_NumPartitions;
|
||||
if( pTaskSet->m_RangeToRun < pTaskSet->m_MinRange ) { pTaskSet->m_RangeToRun = pTaskSet->m_MinRange; }
|
||||
|
||||
uint32_t rangeToSplit = pTaskSet->m_SetSize / m_NumInitialPartitions;
|
||||
if( rangeToSplit < pTaskSet->m_MinRange ) { rangeToSplit = pTaskSet->m_MinRange; }
|
||||
|
||||
SubTaskSet subTask;
|
||||
subTask.pTask = pTaskSet;
|
||||
subTask.partition.start = 0;
|
||||
subTask.partition.end = pTaskSet->m_SetSize;
|
||||
SplitAndAddTask( gtl_threadNum, subTask, rangeToSplit, 1 );
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforTaskSet( const ITaskSet* pTaskSet )
|
||||
{
|
||||
uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
|
||||
if( pTaskSet )
|
||||
{
|
||||
while( pTaskSet->m_RunningCount )
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
// should add a spin then wait for task completion event.
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
}
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforAll()
|
||||
{
|
||||
bool bHaveTasks = true;
|
||||
uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
|
||||
int32_t threadsRunning = m_NumThreadsRunning - 1;
|
||||
while( bHaveTasks || m_NumThreadsWaiting < threadsRunning )
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
bHaveTasks = false;
|
||||
for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
|
||||
{
|
||||
bHaveTasks = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforAllAndShutdown()
|
||||
{
|
||||
WaitforAll();
|
||||
StopThreads(true);
|
||||
delete[] m_pPipesPerThread;
|
||||
m_pPipesPerThread = 0;
|
||||
}
|
||||
|
||||
uint32_t TaskScheduler::GetNumTaskThreads() const
|
||||
{
|
||||
return m_NumThreads;
|
||||
}
|
||||
|
||||
TaskScheduler::TaskScheduler()
|
||||
: m_pPipesPerThread(NULL)
|
||||
, m_NumThreads(0)
|
||||
, m_pThreadNumStore(NULL)
|
||||
, m_pThreadIDs(NULL)
|
||||
, m_bRunning(false)
|
||||
, m_NumThreadsRunning(0)
|
||||
, m_NumThreadsWaiting(0)
|
||||
, m_NumPartitions(0)
|
||||
, m_bHaveThreads(false)
|
||||
{
|
||||
memset(&m_ProfilerCallbacks, 0, sizeof(m_ProfilerCallbacks));
|
||||
}
|
||||
|
||||
TaskScheduler::~TaskScheduler()
|
||||
{
|
||||
StopThreads( true ); // Stops threads, waiting for them.
|
||||
|
||||
delete[] m_pPipesPerThread;
|
||||
m_pPipesPerThread = 0;
|
||||
}
|
||||
|
||||
void TaskScheduler::Initialize( uint32_t numThreads_ )
|
||||
{
|
||||
assert( numThreads_ );
|
||||
StopThreads( true ); // Stops threads, waiting for them.
|
||||
delete[] m_pPipesPerThread;
|
||||
|
||||
m_NumThreads = numThreads_;
|
||||
|
||||
m_pPipesPerThread = new TaskPipe[ m_NumThreads ];
|
||||
|
||||
StartThreads();
|
||||
}
|
||||
|
||||
void TaskScheduler::Initialize()
|
||||
{
|
||||
Initialize( GetNumHardwareThreads() );
|
||||
}
|
||||
177
libs/tracy/examples/ToyPathTracer/Source/enkiTS/TaskScheduler.h
Normal file
@ -0,0 +1,177 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include "Threads.h"
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct TaskSetPartition
|
||||
{
|
||||
uint32_t start;
|
||||
uint32_t end;
|
||||
};
|
||||
|
||||
class TaskScheduler;
|
||||
class TaskPipe;
|
||||
struct ThreadArgs;
|
||||
struct SubTaskSet;
|
||||
|
||||
// Subclass ITaskSet to create tasks.
|
||||
// TaskSets can be re-used, but check
|
||||
class ITaskSet
|
||||
{
|
||||
public:
|
||||
ITaskSet()
|
||||
: m_SetSize(1)
|
||||
, m_MinRange(1)
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(1)
|
||||
{}
|
||||
|
||||
ITaskSet( uint32_t setSize_ )
|
||||
: m_SetSize( setSize_ )
|
||||
, m_MinRange(1)
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(1)
|
||||
{}
|
||||
|
||||
ITaskSet( uint32_t setSize_, uint32_t minRange_ )
|
||||
: m_SetSize( setSize_ )
|
||||
, m_MinRange( minRange_ )
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(minRange_)
|
||||
{}
|
||||
|
||||
// Execute range should be overloaded to process tasks. It will be called with a
|
||||
// range_ where range.start >= 0; range.start < range.end; and range.end < m_SetSize;
|
||||
// The range values should be mapped so that linearly processing them in order is cache friendly
|
||||
// i.e. neighbouring values should be close together.
|
||||
// threadnum should not be used for changing processing of data, it's intended purpose
|
||||
// is to allow per-thread data buckets for output.
|
||||
virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum ) = 0;
|
||||
|
||||
// Size of set - usually the number of data items to be processed, see ExecuteRange. Defaults to 1
|
||||
uint32_t m_SetSize;
|
||||
|
||||
// Minimum size of of TaskSetPartition range when splitting a task set into partitions.
|
||||
// This should be set to a value which results in computation effort of at least 10k
|
||||
// clock cycles to minimize tast scheduler overhead.
|
||||
// NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
|
||||
// of m_MinRange.
|
||||
// Also known as grain size in literature.
|
||||
uint32_t m_MinRange;
|
||||
|
||||
bool GetIsComplete()
|
||||
{
|
||||
return 0 == m_RunningCount;
|
||||
}
|
||||
private:
|
||||
friend class TaskScheduler;
|
||||
volatile int32_t m_RunningCount;
|
||||
uint32_t m_RangeToRun;
|
||||
};
|
||||
|
||||
// TaskScheduler implements several callbacks intended for profilers
|
||||
typedef void (*ProfilerCallbackFunc)( uint32_t threadnum_ );
|
||||
struct ProfilerCallbacks
|
||||
{
|
||||
ProfilerCallbackFunc threadStart;
|
||||
ProfilerCallbackFunc threadStop;
|
||||
ProfilerCallbackFunc waitStart;
|
||||
ProfilerCallbackFunc waitStop;
|
||||
};
|
||||
|
||||
class TaskScheduler
|
||||
{
|
||||
public:
|
||||
TaskScheduler();
|
||||
~TaskScheduler();
|
||||
|
||||
// Call either Initialize() or Initialize( numThreads_ ) before adding tasks.
|
||||
|
||||
// Initialize() will create GetNumHardwareThreads()-1 threads, which is
|
||||
// sufficient to fill the system when including the main thread.
|
||||
// Initialize can be called multiple times - it will wait for completion
|
||||
// before re-initializing.
|
||||
void Initialize();
|
||||
|
||||
// Initialize( numThreads_ ) - numThreads_ (must be > 0)
|
||||
// will create numThreads_-1 threads, as thread 0 is
|
||||
// the thread on which the initialize was called.
|
||||
void Initialize( uint32_t numThreads_ );
|
||||
|
||||
|
||||
// Adds the TaskSet to pipe and returns if the pipe is not full.
|
||||
// If the pipe is full, pTaskSet is run.
|
||||
// should only be called from main thread, or within a task
|
||||
void AddTaskSetToPipe( ITaskSet* pTaskSet );
|
||||
|
||||
// Runs the TaskSets in pipe until true == pTaskSet->GetIsComplete();
|
||||
// should only be called from thread which created the taskscheduler , or within a task
|
||||
// if called with 0 it will try to run tasks, and return if none available.
|
||||
void WaitforTaskSet( const ITaskSet* pTaskSet );
|
||||
|
||||
// Waits for all task sets to complete - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void WaitforAll();
|
||||
|
||||
// Waits for all task sets to complete and shutdown threads - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void WaitforAllAndShutdown();
|
||||
|
||||
// Returns the number of threads created for running tasks + 1
|
||||
// to account for the main thread.
|
||||
uint32_t GetNumTaskThreads() const;
|
||||
|
||||
// Returns the ProfilerCallbacks structure so that it can be modified to
|
||||
// set the callbacks.
|
||||
ProfilerCallbacks* GetProfilerCallbacks();
|
||||
|
||||
private:
|
||||
static THREADFUNC_DECL TaskingThreadFunction( void* pArgs );
|
||||
void WaitForTasks( uint32_t threadNum );
|
||||
bool TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ );
|
||||
void StartThreads();
|
||||
void StopThreads( bool bWait_ );
|
||||
void SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
|
||||
uint32_t rangeToSplit_, int32_t runningCountOffset_ );
|
||||
void WakeThreads();
|
||||
|
||||
TaskPipe* m_pPipesPerThread;
|
||||
|
||||
uint32_t m_NumThreads;
|
||||
ThreadArgs* m_pThreadNumStore;
|
||||
threadid_t* m_pThreadIDs;
|
||||
volatile bool m_bRunning;
|
||||
volatile int32_t m_NumThreadsRunning;
|
||||
volatile int32_t m_NumThreadsWaiting;
|
||||
uint32_t m_NumPartitions;
|
||||
uint32_t m_NumInitialPartitions;
|
||||
semaphoreid_t m_NewTaskSemaphore;
|
||||
bool m_bHaveThreads;
|
||||
ProfilerCallbacks m_ProfilerCallbacks;
|
||||
|
||||
TaskScheduler( const TaskScheduler& nocopy );
|
||||
TaskScheduler& operator=( const TaskScheduler& nocopy );
|
||||
};
|
||||
|
||||
}
|
||||
@ -0,0 +1,122 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#include "TaskScheduler_c.h"
|
||||
#include "TaskScheduler.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
using namespace enki;
|
||||
|
||||
struct enkiTaskScheduler : TaskScheduler
|
||||
{
|
||||
};
|
||||
|
||||
struct enkiTaskSet : ITaskSet
|
||||
{
|
||||
enkiTaskSet( enkiTaskExecuteRange taskFun_ ) : taskFun(taskFun_), pArgs(NULL) {}
|
||||
|
||||
virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum )
|
||||
{
|
||||
taskFun( range.start, range.end, threadnum, pArgs );
|
||||
}
|
||||
|
||||
enkiTaskExecuteRange taskFun;
|
||||
void* pArgs;
|
||||
};
|
||||
|
||||
enkiTaskScheduler* enkiNewTaskScheduler()
|
||||
{
|
||||
enkiTaskScheduler* pETS = new enkiTaskScheduler();
|
||||
return pETS;
|
||||
}
|
||||
|
||||
void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
pETS_->Initialize();
|
||||
}
|
||||
|
||||
void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ )
|
||||
{
|
||||
pETS_->Initialize( numThreads_ );
|
||||
}
|
||||
|
||||
void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
delete pETS_;
|
||||
}
|
||||
|
||||
enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ )
|
||||
{
|
||||
return new enkiTaskSet( taskFunc_ );
|
||||
}
|
||||
|
||||
void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
delete pTaskSet_;
|
||||
}
|
||||
|
||||
void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_, void* pArgs_, uint32_t setSize_ )
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
assert( pTaskSet_->taskFun );
|
||||
|
||||
pTaskSet_->m_SetSize = setSize_;
|
||||
pTaskSet_->pArgs = pArgs_;
|
||||
pETS_->AddTaskSetToPipe( pTaskSet_ );
|
||||
}
|
||||
|
||||
void enkiAddTaskSetToPipeMinRange(enkiTaskScheduler * pETS_, enkiTaskSet * pTaskSet_, void * pArgs_, uint32_t setSize_, uint32_t minRange_)
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
assert( pTaskSet_->taskFun );
|
||||
|
||||
pTaskSet_->m_SetSize = setSize_;
|
||||
pTaskSet_->m_MinRange = minRange_;
|
||||
pTaskSet_->pArgs = pArgs_;
|
||||
pETS_->AddTaskSetToPipe( pTaskSet_ );
|
||||
}
|
||||
|
||||
int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
return ( pTaskSet_->GetIsComplete() ) ? 1 : 0;
|
||||
}
|
||||
|
||||
void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
pETS_->WaitforTaskSet( pTaskSet_ );
|
||||
}
|
||||
|
||||
void enkiWaitForAll( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
pETS_->WaitforAll();
|
||||
}
|
||||
|
||||
|
||||
uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
return pETS_->GetNumTaskThreads();
|
||||
}
|
||||
|
||||
enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
assert( sizeof(enkiProfilerCallbacks) == sizeof(enki::ProfilerCallbacks) );
|
||||
return (enkiProfilerCallbacks*)pETS_->GetProfilerCallbacks();
|
||||
}
|
||||
|
||||
@ -0,0 +1,104 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct enkiTaskScheduler enkiTaskScheduler;
|
||||
typedef struct enkiTaskSet enkiTaskSet;
|
||||
|
||||
typedef void (* enkiTaskExecuteRange)( uint32_t start_, uint32_t end, uint32_t threadnum_, void* pArgs_ );
|
||||
|
||||
|
||||
// Create a new task scheduler
|
||||
enkiTaskScheduler* enkiNewTaskScheduler();
|
||||
|
||||
// Initialize task scheduler - will create GetNumHardwareThreads()-1 threads, which is
|
||||
// sufficient to fill the system when including the main thread.
|
||||
// Initialize can be called multiple times - it will wait for completion
|
||||
// before re-initializing.
|
||||
void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// Initialize a task scheduler with numThreads_ (must be > 0)
|
||||
// will create numThreads_-1 threads, as thread 0 is
|
||||
// the thread on which the initialize was called.
|
||||
void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ );
|
||||
|
||||
|
||||
// Delete a task scheduler
|
||||
void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// Create a task set.
|
||||
enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ );
|
||||
|
||||
// Delete a task set.
|
||||
void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ );
|
||||
|
||||
// Schedule the task
|
||||
void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
|
||||
void* pArgs_, uint32_t setSize_ );
|
||||
|
||||
// Schedule the task with a minimum range.
|
||||
// This should be set to a value which results in computation effort of at least 10k
|
||||
// clock cycles to minimize tast scheduler overhead.
|
||||
// NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
|
||||
// of m_MinRange.
|
||||
// Also known as grain size in literature.
|
||||
void enkiAddTaskSetToPipeMinRange( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
|
||||
void* pArgs_, uint32_t setSize_, uint32_t minRange_ );
|
||||
|
||||
|
||||
// Check if TaskSet is complete. Doesn't wait. Returns 1 if complete, 0 if not.
|
||||
int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
|
||||
|
||||
|
||||
// Wait for a given task.
|
||||
// should only be called from thread which created the taskscheduler , or within a task
|
||||
// if called with 0 it will try to run tasks, and return if none available.
|
||||
void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
|
||||
|
||||
|
||||
// Waits for all task sets to complete - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void enkiWaitForAll( enkiTaskScheduler* pETS_ );
|
||||
|
||||
|
||||
// get number of threads
|
||||
uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// TaskScheduler implements several callbacks intended for profilers
|
||||
typedef void (*enkiProfilerCallbackFunc)( uint32_t threadnum_ );
|
||||
struct enkiProfilerCallbacks
|
||||
{
|
||||
enkiProfilerCallbackFunc threadStart;
|
||||
enkiProfilerCallbackFunc threadStop;
|
||||
enkiProfilerCallbackFunc waitStart;
|
||||
enkiProfilerCallbackFunc waitStop;
|
||||
};
|
||||
|
||||
// Get the callback structure so it can be set
|
||||
struct enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
210
libs/tracy/examples/ToyPathTracer/Source/enkiTS/Threads.h
Normal file
@ -0,0 +1,210 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include "Atomics.h"
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <Windows.h>
|
||||
|
||||
#define THREADFUNC_DECL DWORD WINAPI
|
||||
#define THREAD_LOCAL __declspec( thread )
|
||||
|
||||
namespace enki
|
||||
{
|
||||
typedef HANDLE threadid_t;
|
||||
|
||||
// declare the thread start function as:
|
||||
// THREADFUNC_DECL MyThreadStart( void* pArg );
|
||||
inline bool ThreadCreate( threadid_t* returnid, DWORD ( WINAPI *StartFunc) (void* ), void* pArg )
|
||||
{
|
||||
// posix equiv pthread_create
|
||||
DWORD threadid;
|
||||
*returnid = CreateThread( 0, 0, StartFunc, pArg, 0, &threadid );
|
||||
return *returnid != NULL;
|
||||
}
|
||||
|
||||
inline bool ThreadTerminate( threadid_t threadid )
|
||||
{
|
||||
// posix equiv pthread_cancel
|
||||
return CloseHandle( threadid ) == 0;
|
||||
}
|
||||
|
||||
inline uint32_t GetNumHardwareThreads()
|
||||
{
|
||||
SYSTEM_INFO sysInfo;
|
||||
GetSystemInfo(&sysInfo);
|
||||
return sysInfo.dwNumberOfProcessors;
|
||||
}
|
||||
}
|
||||
|
||||
#else // posix
|
||||
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#define THREADFUNC_DECL void*
|
||||
#define THREAD_LOCAL __thread
|
||||
|
||||
namespace enki
|
||||
{
|
||||
typedef pthread_t threadid_t;
|
||||
|
||||
// declare the thread start function as:
|
||||
// THREADFUNC_DECL MyThreadStart( void* pArg );
|
||||
inline bool ThreadCreate( threadid_t* returnid, void* ( *StartFunc) (void* ), void* pArg )
|
||||
{
|
||||
// posix equiv pthread_create
|
||||
int32_t retval = pthread_create( returnid, NULL, StartFunc, pArg );
|
||||
|
||||
return retval == 0;
|
||||
}
|
||||
|
||||
inline bool ThreadTerminate( threadid_t threadid )
|
||||
{
|
||||
// posix equiv pthread_cancel
|
||||
return pthread_cancel( threadid ) == 0;
|
||||
}
|
||||
|
||||
inline uint32_t GetNumHardwareThreads()
|
||||
{
|
||||
return (uint32_t)sysconf( _SC_NPROCESSORS_ONLN );
|
||||
}
|
||||
}
|
||||
|
||||
#endif // posix
|
||||
|
||||
|
||||
// Semaphore implementation
|
||||
#ifdef _WIN32
|
||||
|
||||
namespace enki
|
||||
{
|
||||
struct semaphoreid_t
|
||||
{
|
||||
HANDLE sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphoreid.sem = CreateSemaphore(NULL, 0, MAXLONG, NULL );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
CloseHandle( semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
DWORD retval = WaitForSingleObject( semaphoreid.sem, INFINITE );
|
||||
|
||||
assert( retval != WAIT_FAILED );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
if( countWaiting )
|
||||
{
|
||||
ReleaseSemaphore( semaphoreid.sem, countWaiting, NULL );
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(__MACH__)
|
||||
|
||||
// OS X does not have POSIX semaphores
|
||||
// see https://developer.apple.com/library/content/documentation/Darwin/Conceptual/KernelProgramming/synchronization/synchronization.html
|
||||
#include <mach/mach.h>
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct semaphoreid_t
|
||||
{
|
||||
semaphore_t sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_create( mach_task_self(), &semaphoreid.sem, SYNC_POLICY_FIFO, 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_destroy( mach_task_self(), semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_wait( semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
while( countWaiting-- > 0 )
|
||||
{
|
||||
semaphore_signal( semaphoreid.sem );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else // POSIX
|
||||
|
||||
#include <semaphore.h>
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct semaphoreid_t
|
||||
{
|
||||
sem_t sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
int err = sem_init( &semaphoreid.sem, 0, 0 );
|
||||
assert( err == 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
sem_destroy( &semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
int err = sem_wait( &semaphoreid.sem );
|
||||
assert( err == 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
while( countWaiting-- > 0 )
|
||||
{
|
||||
sem_post( &semaphoreid.sem );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
395
libs/tracy/examples/ToyPathTracer/Windows/ComputeShader.hlsl
Normal file
@ -0,0 +1,395 @@
|
||||
#include "../Source/Config.h"
|
||||
|
||||
inline uint RNG(inout uint state)
|
||||
{
|
||||
uint x = state;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
x ^= x << 15;
|
||||
state = x;
|
||||
return x;
|
||||
}
|
||||
|
||||
float RandomFloat01(inout uint state)
|
||||
{
|
||||
return (RNG(state) & 0xFFFFFF) / 16777216.0f;
|
||||
}
|
||||
|
||||
float3 RandomInUnitDisk(inout uint state)
|
||||
{
|
||||
float a = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float2 xy = float2(cos(a), sin(a));
|
||||
xy *= sqrt(RandomFloat01(state));
|
||||
return float3(xy, 0);
|
||||
}
|
||||
float3 RandomInUnitSphere(inout uint state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float t = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float r = sqrt(max(0.0, 1.0f - z * z));
|
||||
float x = r * cos(t);
|
||||
float y = r * sin(t);
|
||||
float3 res = float3(x, y, z);
|
||||
res *= pow(RandomFloat01(state), 1.0 / 3.0);
|
||||
return res;
|
||||
}
|
||||
float3 RandomUnitVector(inout uint state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float a = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float r = sqrt(1.0f - z * z);
|
||||
float x = r * cos(a);
|
||||
float y = r * sin(a);
|
||||
return float3(x, y, z);
|
||||
}
|
||||
|
||||
|
||||
|
||||
struct Ray
|
||||
{
|
||||
float3 orig;
|
||||
float3 dir;
|
||||
};
|
||||
Ray MakeRay(float3 orig_, float3 dir_) { Ray r; r.orig = orig_; r.dir = dir_; return r; }
|
||||
float3 RayPointAt(Ray r, float t) { return r.orig + r.dir * t; }
|
||||
|
||||
|
||||
inline bool refract(float3 v, float3 n, float nint, out float3 outRefracted)
|
||||
{
|
||||
float dt = dot(v, n);
|
||||
float discr = 1.0f - nint * nint*(1 - dt * dt);
|
||||
if (discr > 0)
|
||||
{
|
||||
outRefracted = nint * (v - n * dt) - n * sqrt(discr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
inline float schlick(float cosine, float ri)
|
||||
{
|
||||
float r0 = (1 - ri) / (1 + ri);
|
||||
r0 = r0 * r0;
|
||||
// note: saturate to guard against possible tiny negative numbers
|
||||
return r0 + (1 - r0)*pow(saturate(1 - cosine), 5);
|
||||
}
|
||||
|
||||
struct Hit
|
||||
{
|
||||
float3 pos;
|
||||
float3 normal;
|
||||
float t;
|
||||
};
|
||||
|
||||
struct Sphere
|
||||
{
|
||||
float3 center;
|
||||
float radius;
|
||||
float invRadius;
|
||||
};
|
||||
|
||||
#define MatLambert 0
|
||||
#define MatMetal 1
|
||||
#define MatDielectric 2
|
||||
|
||||
struct Material
|
||||
{
|
||||
int type;
|
||||
float3 albedo;
|
||||
float3 emissive;
|
||||
float roughness;
|
||||
float ri;
|
||||
};
|
||||
|
||||
groupshared Sphere s_GroupSpheres[kCSMaxObjects];
|
||||
groupshared Material s_GroupMaterials[kCSMaxObjects];
|
||||
groupshared int s_GroupEmissives[kCSMaxObjects];
|
||||
|
||||
|
||||
struct Camera
|
||||
{
|
||||
float3 origin;
|
||||
float3 lowerLeftCorner;
|
||||
float3 horizontal;
|
||||
float3 vertical;
|
||||
float3 u, v, w;
|
||||
float lensRadius;
|
||||
};
|
||||
|
||||
Ray CameraGetRay(Camera cam, float s, float t, inout uint state)
|
||||
{
|
||||
float3 rd = cam.lensRadius * RandomInUnitDisk(state);
|
||||
float3 offset = cam.u * rd.x + cam.v * rd.y;
|
||||
return MakeRay(cam.origin + offset, normalize(cam.lowerLeftCorner + s * cam.horizontal + t * cam.vertical - cam.origin - offset));
|
||||
}
|
||||
|
||||
|
||||
int HitSpheres(Ray r, int sphereCount, float tMin, float tMax, inout Hit outHit)
|
||||
{
|
||||
float hitT = tMax;
|
||||
int id = -1;
|
||||
for (int i = 0; i < sphereCount; ++i)
|
||||
{
|
||||
Sphere s = s_GroupSpheres[i];
|
||||
float3 co = s.center - r.orig;
|
||||
float nb = dot(co, r.dir);
|
||||
float c = dot(co, co) - s.radius*s.radius;
|
||||
float discr = nb * nb - c;
|
||||
if (discr > 0)
|
||||
{
|
||||
float discrSq = sqrt(discr);
|
||||
|
||||
// Try earlier t
|
||||
float t = nb - discrSq;
|
||||
if (t <= tMin) // before min, try later t!
|
||||
t = nb + discrSq;
|
||||
|
||||
if (t > tMin && t < hitT)
|
||||
{
|
||||
id = i;
|
||||
hitT = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (id != -1)
|
||||
{
|
||||
outHit.pos = RayPointAt(r, hitT);
|
||||
outHit.normal = (outHit.pos - s_GroupSpheres[id].center) * s_GroupSpheres[id].invRadius;
|
||||
outHit.t = hitT;
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
struct Params
|
||||
{
|
||||
Camera cam;
|
||||
int sphereCount;
|
||||
int screenWidth;
|
||||
int screenHeight;
|
||||
int frames;
|
||||
float invWidth;
|
||||
float invHeight;
|
||||
float lerpFac;
|
||||
int emissiveCount;
|
||||
};
|
||||
|
||||
|
||||
#define kMinT 0.001f
|
||||
#define kMaxT 1.0e7f
|
||||
#define kMaxDepth 10
|
||||
|
||||
|
||||
static int HitWorld(int sphereCount, Ray r, float tMin, float tMax, inout Hit outHit)
|
||||
{
|
||||
return HitSpheres(r, sphereCount, tMin, tMax, outHit);
|
||||
}
|
||||
|
||||
|
||||
static bool Scatter(int sphereCount, int emissiveCount, int matID, Ray r_in, Hit rec, out float3 attenuation, out Ray scattered, out float3 outLightE, inout int inoutRayCount, inout uint state)
|
||||
{
|
||||
outLightE = float3(0, 0, 0);
|
||||
Material mat = s_GroupMaterials[matID];
|
||||
if (mat.type == MatLambert)
|
||||
{
|
||||
// random point on unit sphere that is tangent to the hit point
|
||||
float3 target = rec.pos + rec.normal + RandomUnitVector(state);
|
||||
scattered = MakeRay(rec.pos, normalize(target - rec.pos));
|
||||
attenuation = mat.albedo;
|
||||
|
||||
// sample lights
|
||||
#if DO_LIGHT_SAMPLING
|
||||
for (int j = 0; j < emissiveCount; ++j)
|
||||
{
|
||||
int i = s_GroupEmissives[j];
|
||||
if (matID == i)
|
||||
continue; // skip self
|
||||
Material smat = s_GroupMaterials[i];
|
||||
Sphere s = s_GroupSpheres[i];
|
||||
|
||||
// create a random direction towards sphere
|
||||
// coord system for sampling: sw, su, sv
|
||||
float3 sw = normalize(s.center - rec.pos);
|
||||
float3 su = normalize(cross(abs(sw.x)>0.01f ? float3(0, 1, 0) : float3(1, 0, 0), sw));
|
||||
float3 sv = cross(sw, su);
|
||||
// sample sphere by solid angle
|
||||
float cosAMax = sqrt(1.0f - s.radius*s.radius / dot(rec.pos - s.center, rec.pos - s.center));
|
||||
float eps1 = RandomFloat01(state), eps2 = RandomFloat01(state);
|
||||
float cosA = 1.0f - eps1 + eps1 * cosAMax;
|
||||
float sinA = sqrt(1.0f - cosA * cosA);
|
||||
float phi = 2 * 3.1415926 * eps2;
|
||||
float3 l = su * cos(phi) * sinA + sv * sin(phi) * sinA + sw * cosA;
|
||||
|
||||
// shoot shadow ray
|
||||
Hit lightHit;
|
||||
++inoutRayCount;
|
||||
int hitID = HitWorld(sphereCount, MakeRay(rec.pos, l), kMinT, kMaxT, lightHit);
|
||||
if (hitID == i)
|
||||
{
|
||||
float omega = 2 * 3.1415926 * (1 - cosAMax);
|
||||
|
||||
float3 rdir = r_in.dir;
|
||||
float3 nl = dot(rec.normal, rdir) < 0 ? rec.normal : -rec.normal;
|
||||
outLightE += (mat.albedo * smat.emissive) * (max(0.0f, dot(l, nl)) * omega / 3.1415926);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
else if (mat.type == MatMetal)
|
||||
{
|
||||
float3 refl = reflect(r_in.dir, rec.normal);
|
||||
// reflected ray, and random inside of sphere based on roughness
|
||||
float roughness = mat.roughness;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
roughness = 0; // until we get better BRDF for metals
|
||||
#endif
|
||||
scattered = MakeRay(rec.pos, normalize(refl + roughness*RandomInUnitSphere(state)));
|
||||
attenuation = mat.albedo;
|
||||
return dot(scattered.dir, rec.normal) > 0;
|
||||
}
|
||||
else if (mat.type == MatDielectric)
|
||||
{
|
||||
float3 outwardN;
|
||||
float3 rdir = r_in.dir;
|
||||
float3 refl = reflect(rdir, rec.normal);
|
||||
float nint;
|
||||
attenuation = float3(1, 1, 1);
|
||||
float3 refr;
|
||||
float reflProb;
|
||||
float cosine;
|
||||
if (dot(rdir, rec.normal) > 0)
|
||||
{
|
||||
outwardN = -rec.normal;
|
||||
nint = mat.ri;
|
||||
cosine = mat.ri * dot(rdir, rec.normal);
|
||||
}
|
||||
else
|
||||
{
|
||||
outwardN = rec.normal;
|
||||
nint = 1.0f / mat.ri;
|
||||
cosine = -dot(rdir, rec.normal);
|
||||
}
|
||||
if (refract(rdir, outwardN, nint, refr))
|
||||
{
|
||||
reflProb = schlick(cosine, mat.ri);
|
||||
}
|
||||
else
|
||||
{
|
||||
reflProb = 1;
|
||||
}
|
||||
if (RandomFloat01(state) < reflProb)
|
||||
scattered = MakeRay(rec.pos, normalize(refl));
|
||||
else
|
||||
scattered = MakeRay(rec.pos, normalize(refr));
|
||||
}
|
||||
else
|
||||
{
|
||||
attenuation = float3(1, 0, 1);
|
||||
scattered = MakeRay(float3(0,0,0), float3(0, 0, 1));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static float3 Trace(int sphereCount, int emissiveCount, Ray r, inout int inoutRayCount, inout uint state)
|
||||
{
|
||||
float3 col = 0;
|
||||
float3 curAtten = 1;
|
||||
bool doMaterialE = true;
|
||||
// GPUs don't support recursion, so do tracing iterations in a loop up to max depth
|
||||
for (int depth = 0; depth < kMaxDepth; ++depth)
|
||||
{
|
||||
Hit rec;
|
||||
++inoutRayCount;
|
||||
int id = HitWorld(sphereCount, r, kMinT, kMaxT, rec);
|
||||
if (id >= 0)
|
||||
{
|
||||
Ray scattered;
|
||||
float3 attenuation;
|
||||
float3 lightE;
|
||||
Material mat = s_GroupMaterials[id];
|
||||
float3 matE = mat.emissive;
|
||||
if (Scatter(sphereCount, emissiveCount, id, r, rec, attenuation, scattered, lightE, inoutRayCount, state))
|
||||
{
|
||||
#if DO_LIGHT_SAMPLING
|
||||
if (!doMaterialE) matE = 0;
|
||||
doMaterialE = (mat.type != MatLambert);
|
||||
#endif
|
||||
col += curAtten * (matE + lightE);
|
||||
curAtten *= attenuation;
|
||||
r = scattered;
|
||||
}
|
||||
else
|
||||
{
|
||||
col += curAtten * matE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// sky
|
||||
#if DO_MITSUBA_COMPARE
|
||||
col += curAtten * float3(0.15f, 0.21f, 0.3f); // easier compare with Mitsuba's constant environment light
|
||||
#else
|
||||
float3 unitDir = r.dir;
|
||||
float t = 0.5f*(unitDir.y + 1.0f);
|
||||
float3 skyCol = ((1.0f - t)*float3(1.0f, 1.0f, 1.0f) + t * float3(0.5f, 0.7f, 1.0f)) * 0.3f;
|
||||
col += curAtten * skyCol;
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
return col;
|
||||
}
|
||||
|
||||
Texture2D srcImage : register(t0);
|
||||
RWTexture2D<float4> dstImage : register(u0);
|
||||
StructuredBuffer<Sphere> g_Spheres : register(t1);
|
||||
StructuredBuffer<Material> g_Materials : register(t2);
|
||||
StructuredBuffer<Params> g_Params : register(t3);
|
||||
StructuredBuffer<int> g_Emissives : register(t4);
|
||||
RWByteAddressBuffer g_OutRayCount : register(u1);
|
||||
|
||||
[numthreads(kCSGroupSizeX, kCSGroupSizeY, 1)]
|
||||
void main(uint3 gid : SV_DispatchThreadID, uint3 tid : SV_GroupThreadID)
|
||||
{
|
||||
// First, move scene data (spheres, materials, emissive indices) into group shared
|
||||
// memory. Do this in parallel; each thread in group copies its own chunk of data.
|
||||
uint threadID = tid.y * kCSGroupSizeX + tid.x;
|
||||
uint groupSize = kCSGroupSizeX * kCSGroupSizeY;
|
||||
uint objCount = g_Params[0].sphereCount;
|
||||
uint myObjCount = (objCount + groupSize - 1) / groupSize;
|
||||
uint myObjStart = threadID * myObjCount;
|
||||
for (uint io = myObjStart; io < myObjStart + myObjCount; ++io)
|
||||
{
|
||||
if (io < objCount)
|
||||
{
|
||||
s_GroupSpheres[io] = g_Spheres[io];
|
||||
s_GroupMaterials[io] = g_Materials[io];
|
||||
}
|
||||
if (io < g_Params[0].emissiveCount)
|
||||
{
|
||||
s_GroupEmissives[io] = g_Emissives[io];
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
int rayCount = 0;
|
||||
float3 col = 0;
|
||||
Params params = g_Params[0];
|
||||
uint rngState = (gid.x * 1973 + gid.y * 9277 + params.frames * 26699) | 1;
|
||||
for (int s = 0; s < DO_SAMPLES_PER_PIXEL; s++)
|
||||
{
|
||||
float u = float(gid.x + RandomFloat01(rngState)) * params.invWidth;
|
||||
float v = float(gid.y + RandomFloat01(rngState)) * params.invHeight;
|
||||
Ray r = CameraGetRay(params.cam, u, v, rngState);
|
||||
col += Trace(params.sphereCount, params.emissiveCount, r, rayCount, rngState);
|
||||
}
|
||||
col *= 1.0f / float(DO_SAMPLES_PER_PIXEL);
|
||||
|
||||
float3 prev = srcImage.Load(int3(gid.xy,0)).rgb;
|
||||
col = lerp(col, prev, params.lerpFac);
|
||||
dstImage[gid.xy] = float4(col, 1);
|
||||
|
||||
g_OutRayCount.InterlockedAdd(0, rayCount);
|
||||
}
|
||||
15
libs/tracy/examples/ToyPathTracer/Windows/PixelShader.hlsl
Normal file
@ -0,0 +1,15 @@
|
||||
float3 LinearToSRGB(float3 rgb)
|
||||
{
|
||||
rgb = max(rgb, float3(0, 0, 0));
|
||||
return max(1.055 * pow(rgb, 0.416666667) - 0.055, 0.0);
|
||||
}
|
||||
|
||||
Texture2D tex : register(t0);
|
||||
SamplerState smp : register(s0);
|
||||
|
||||
float4 main(float2 uv : TEXCOORD0) : SV_Target
|
||||
{
|
||||
float3 col = tex.Sample(smp, uv).rgb;
|
||||
col = LinearToSRGB(col);
|
||||
return float4(col, 1.0f);
|
||||
}
|
||||
31
libs/tracy/examples/ToyPathTracer/Windows/TestCpu.sln
Normal file
@ -0,0 +1,31 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 16
|
||||
VisualStudioVersion = 16.0.30907.101
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestCpu", "TestCpu.vcxproj", "{4F84B756-87F5-4B92-827B-DA087DAE1900}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x64 = Release|x64
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x64.Build.0 = Debug|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Debug|x86.Build.0 = Debug|Win32
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x64.ActiveCfg = Release|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x64.Build.0 = Release|x64
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x86.ActiveCfg = Release|Win32
|
||||
{4F84B756-87F5-4B92-827B-DA087DAE1900}.Release|x86.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {067FB780-37B8-465E-AD7E-E7B238B9C04F}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
245
libs/tracy/examples/ToyPathTracer/Windows/TestCpu.vcxproj
Normal file
@ -0,0 +1,245 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>15.0</VCProjectVersion>
|
||||
<ProjectGuid>{4F84B756-87F5-4B92-827B-DA087DAE1900}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>TestCpu</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>TRACY_ENABLE;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<ExceptionHandling>false</ExceptionHandling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>TRACY_ENABLE;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<ExceptionHandling>false</ExceptionHandling>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<CallingConvention>VectorCall</CallingConvention>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>d3d11.lib;kernel32.lib;user32.lib;gdi32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\public\TracyClient.cpp" />
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler.cpp" />
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler_c.cpp" />
|
||||
<ClCompile Include="..\Source\Maths.cpp" />
|
||||
<ClCompile Include="..\Source\Test.cpp" />
|
||||
<ClCompile Include="TestWin.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\Source\Config.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\Atomics.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\LockLessMultiReadPipe.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler_c.h" />
|
||||
<ClInclude Include="..\Source\enkiTS\Threads.h" />
|
||||
<ClInclude Include="..\Source\Maths.h" />
|
||||
<ClInclude Include="..\Source\MathSimd.h" />
|
||||
<ClInclude Include="..\Source\Test.h" />
|
||||
<ClInclude Include="..\Source\stb_image.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\.editorconfig" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<FxCompile Include="ComputeShader.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Compute</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_CSBytecode</VariableName>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledComputeShader.h</HeaderFileOutput>
|
||||
</FxCompile>
|
||||
<FxCompile Include="PixelShader.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Pixel</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Pixel</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Pixel</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Pixel</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledPixelShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_PSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_PSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_PSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_PSBytecode</VariableName>
|
||||
</FxCompile>
|
||||
<FxCompile Include="VertexShader.hlsl">
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Vertex</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Vertex</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Vertex</ShaderType>
|
||||
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Vertex</ShaderType>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">5.0</ShaderModel>
|
||||
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">5.0</ShaderModel>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<HeaderFileOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CompiledVertexShader.h</HeaderFileOutput>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">g_VSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">g_VSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">g_VSBytecode</VariableName>
|
||||
<VariableName Condition="'$(Configuration)|$(Platform)'=='Release|x64'">g_VSBytecode</VariableName>
|
||||
</FxCompile>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
@ -0,0 +1,67 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<ClCompile Include="TestWin.cpp" />
|
||||
<ClCompile Include="..\Source\Test.cpp">
|
||||
<Filter>Source</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler.cpp">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Source\enkiTS\TaskScheduler_c.cpp">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Source\Maths.cpp">
|
||||
<Filter>Source</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\public\TracyClient.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="Source">
|
||||
<UniqueIdentifier>{5f19f217-c1c7-4eeb-be61-8b986fee9375}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source\enkiTS">
|
||||
<UniqueIdentifier>{38c448a8-1dcc-4116-9410-a9f8d068caff}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\Source\Test.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\stb_image.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\Atomics.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\LockLessMultiReadPipe.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\TaskScheduler_c.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\enkiTS\Threads.h">
|
||||
<Filter>Source\enkiTS</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\Maths.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\Config.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\Source\MathSimd.h">
|
||||
<Filter>Source</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\.editorconfig" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<FxCompile Include="VertexShader.hlsl" />
|
||||
<FxCompile Include="PixelShader.hlsl" />
|
||||
<FxCompile Include="ComputeShader.hlsl" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
567
libs/tracy/examples/ToyPathTracer/Windows/TestWin.cpp
Normal file
@ -0,0 +1,567 @@
|
||||
#include <stdint.h>
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#include <d3d11_1.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "../Source/Config.h"
|
||||
#include "../Source/Maths.h"
|
||||
#include "../Source/Test.h"
|
||||
#include "CompiledVertexShader.h"
|
||||
#include "CompiledPixelShader.h"
|
||||
|
||||
#include "../../../public/tracy/Tracy.hpp"
|
||||
#include "../../../public/tracy/TracyD3D11.hpp"
|
||||
|
||||
static HINSTANCE g_HInstance;
|
||||
static HWND g_Wnd;
|
||||
|
||||
ATOM MyRegisterClass(HINSTANCE hInstance);
|
||||
BOOL InitInstance(HINSTANCE, int);
|
||||
LRESULT CALLBACK WndProc(HWND, UINT, WPARAM, LPARAM);
|
||||
INT_PTR CALLBACK About(HWND, UINT, WPARAM, LPARAM);
|
||||
|
||||
static HRESULT InitD3DDevice();
|
||||
static void ShutdownD3DDevice();
|
||||
static void RenderFrame();
|
||||
|
||||
static float* g_Backbuffer;
|
||||
|
||||
static D3D_FEATURE_LEVEL g_D3D11FeatureLevel = D3D_FEATURE_LEVEL_11_0;
|
||||
static ID3D11Device* g_D3D11Device = nullptr;
|
||||
static ID3D11DeviceContext* g_D3D11Ctx = nullptr;
|
||||
static IDXGISwapChain* g_D3D11SwapChain = nullptr;
|
||||
static ID3D11RenderTargetView* g_D3D11RenderTarget = nullptr;
|
||||
static ID3D11VertexShader* g_VertexShader;
|
||||
static ID3D11PixelShader* g_PixelShader;
|
||||
static ID3D11Texture2D *g_BackbufferTexture, *g_BackbufferTexture2;
|
||||
static ID3D11ShaderResourceView *g_BackbufferSRV, *g_BackbufferSRV2;
|
||||
static ID3D11UnorderedAccessView *g_BackbufferUAV, *g_BackbufferUAV2;
|
||||
static ID3D11SamplerState* g_SamplerLinear;
|
||||
static ID3D11RasterizerState* g_RasterState;
|
||||
static int g_BackbufferIndex;
|
||||
static tracy::D3D11Ctx *g_tracyCtx;
|
||||
|
||||
|
||||
#if DO_COMPUTE_GPU
|
||||
#include "CompiledComputeShader.h"
|
||||
struct ComputeParams
|
||||
{
|
||||
Camera cam;
|
||||
int sphereCount;
|
||||
int screenWidth;
|
||||
int screenHeight;
|
||||
int frames;
|
||||
float invWidth;
|
||||
float invHeight;
|
||||
float lerpFac;
|
||||
int emissiveCount;
|
||||
};
|
||||
static ID3D11ComputeShader* g_ComputeShader;
|
||||
static ID3D11Buffer* g_DataSpheres; static ID3D11ShaderResourceView* g_SRVSpheres;
|
||||
static ID3D11Buffer* g_DataMaterials; static ID3D11ShaderResourceView* g_SRVMaterials;
|
||||
static ID3D11Buffer* g_DataParams; static ID3D11ShaderResourceView* g_SRVParams;
|
||||
static ID3D11Buffer* g_DataEmissives; static ID3D11ShaderResourceView* g_SRVEmissives;
|
||||
static ID3D11Buffer* g_DataCounter; static ID3D11UnorderedAccessView* g_UAVCounter;
|
||||
static int g_SphereCount, g_ObjSize, g_MatSize;
|
||||
static ID3D11Query *g_QueryBegin, *g_QueryEnd, *g_QueryDisjoint;
|
||||
#endif // #if DO_COMPUTE_GPU
|
||||
|
||||
int APIENTRY wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE, _In_ LPWSTR, _In_ int nCmdShow)
|
||||
{
|
||||
g_Backbuffer = new float[kBackbufferWidth * kBackbufferHeight * 4];
|
||||
memset(g_Backbuffer, 0, kBackbufferWidth * kBackbufferHeight * 4 * sizeof(g_Backbuffer[0]));
|
||||
|
||||
InitializeTest();
|
||||
|
||||
MyRegisterClass(hInstance);
|
||||
if (!InitInstance (hInstance, nCmdShow))
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (FAILED(InitD3DDevice()))
|
||||
{
|
||||
ShutdownD3DDevice();
|
||||
return 0;
|
||||
}
|
||||
|
||||
g_D3D11Device->CreateVertexShader(g_VSBytecode, ARRAYSIZE(g_VSBytecode), NULL, &g_VertexShader);
|
||||
g_D3D11Device->CreatePixelShader(g_PSBytecode, ARRAYSIZE(g_PSBytecode), NULL, &g_PixelShader);
|
||||
#if DO_COMPUTE_GPU
|
||||
g_D3D11Device->CreateComputeShader(g_CSBytecode, ARRAYSIZE(g_CSBytecode), NULL, &g_ComputeShader);
|
||||
#endif
|
||||
|
||||
D3D11_TEXTURE2D_DESC texDesc = {};
|
||||
texDesc.Width = kBackbufferWidth;
|
||||
texDesc.Height = kBackbufferHeight;
|
||||
texDesc.MipLevels = 1;
|
||||
texDesc.ArraySize = 1;
|
||||
texDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
texDesc.SampleDesc.Count = 1;
|
||||
texDesc.SampleDesc.Quality = 0;
|
||||
#if DO_COMPUTE_GPU
|
||||
texDesc.Usage = D3D11_USAGE_DEFAULT;
|
||||
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
|
||||
texDesc.CPUAccessFlags = 0;
|
||||
#else
|
||||
texDesc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
texDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
#endif
|
||||
texDesc.MiscFlags = 0;
|
||||
g_D3D11Device->CreateTexture2D(&texDesc, NULL, &g_BackbufferTexture);
|
||||
g_D3D11Device->CreateTexture2D(&texDesc, NULL, &g_BackbufferTexture2);
|
||||
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
|
||||
srvDesc.Format = texDesc.Format;
|
||||
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
|
||||
srvDesc.Texture2D.MipLevels = 1;
|
||||
srvDesc.Texture2D.MostDetailedMip = 0;
|
||||
g_D3D11Device->CreateShaderResourceView(g_BackbufferTexture, &srvDesc, &g_BackbufferSRV);
|
||||
g_D3D11Device->CreateShaderResourceView(g_BackbufferTexture2, &srvDesc, &g_BackbufferSRV2);
|
||||
|
||||
D3D11_SAMPLER_DESC smpDesc = {};
|
||||
smpDesc.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT;
|
||||
smpDesc.AddressU = smpDesc.AddressV = smpDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
|
||||
g_D3D11Device->CreateSamplerState(&smpDesc, &g_SamplerLinear);
|
||||
|
||||
D3D11_RASTERIZER_DESC rasterDesc = {};
|
||||
rasterDesc.FillMode = D3D11_FILL_SOLID;
|
||||
rasterDesc.CullMode = D3D11_CULL_NONE;
|
||||
g_D3D11Device->CreateRasterizerState(&rasterDesc, &g_RasterState);
|
||||
|
||||
#if DO_COMPUTE_GPU
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
|
||||
|
||||
int camSize;
|
||||
GetObjectCount(g_SphereCount, g_ObjSize, g_MatSize, camSize);
|
||||
assert(g_ObjSize == 20);
|
||||
assert(g_MatSize == 36);
|
||||
assert(camSize == 88);
|
||||
D3D11_BUFFER_DESC bdesc = {};
|
||||
bdesc.ByteWidth = g_SphereCount * g_ObjSize;
|
||||
bdesc.Usage = D3D11_USAGE_DEFAULT;
|
||||
bdesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
bdesc.CPUAccessFlags = 0;
|
||||
bdesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
bdesc.StructureByteStride = g_ObjSize;
|
||||
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataSpheres);
|
||||
srvDesc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
srvDesc.Buffer.FirstElement = 0;
|
||||
srvDesc.Buffer.NumElements = g_SphereCount;
|
||||
g_D3D11Device->CreateShaderResourceView(g_DataSpheres, &srvDesc, &g_SRVSpheres);
|
||||
|
||||
bdesc.ByteWidth = g_SphereCount * g_MatSize;
|
||||
bdesc.StructureByteStride = g_MatSize;
|
||||
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataMaterials);
|
||||
srvDesc.Buffer.NumElements = g_SphereCount;
|
||||
g_D3D11Device->CreateShaderResourceView(g_DataMaterials, &srvDesc, &g_SRVMaterials);
|
||||
|
||||
bdesc.ByteWidth = sizeof(ComputeParams);
|
||||
bdesc.StructureByteStride = sizeof(ComputeParams);
|
||||
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataParams);
|
||||
srvDesc.Buffer.NumElements = 1;
|
||||
g_D3D11Device->CreateShaderResourceView(g_DataParams, &srvDesc, &g_SRVParams);
|
||||
|
||||
bdesc.ByteWidth = g_SphereCount * 4;
|
||||
bdesc.StructureByteStride = 4;
|
||||
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataEmissives);
|
||||
srvDesc.Buffer.NumElements = g_SphereCount;
|
||||
g_D3D11Device->CreateShaderResourceView(g_DataEmissives, &srvDesc, &g_SRVEmissives);
|
||||
|
||||
bdesc.ByteWidth = 4;
|
||||
bdesc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS;
|
||||
bdesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
|
||||
bdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
||||
g_D3D11Device->CreateBuffer(&bdesc, NULL, &g_DataCounter);
|
||||
uavDesc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
uavDesc.Buffer.FirstElement = 0;
|
||||
uavDesc.Buffer.NumElements = 1;
|
||||
uavDesc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
|
||||
g_D3D11Device->CreateUnorderedAccessView(g_DataCounter, &uavDesc, &g_UAVCounter);
|
||||
|
||||
uavDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D;
|
||||
uavDesc.Texture2D.MipSlice = 0;
|
||||
g_D3D11Device->CreateUnorderedAccessView(g_BackbufferTexture, &uavDesc, &g_BackbufferUAV);
|
||||
g_D3D11Device->CreateUnorderedAccessView(g_BackbufferTexture2, &uavDesc, &g_BackbufferUAV2);
|
||||
|
||||
D3D11_QUERY_DESC qDesc = {};
|
||||
qDesc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
g_D3D11Device->CreateQuery(&qDesc, &g_QueryBegin);
|
||||
g_D3D11Device->CreateQuery(&qDesc, &g_QueryEnd);
|
||||
qDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
g_D3D11Device->CreateQuery(&qDesc, &g_QueryDisjoint);
|
||||
#endif // #if DO_COMPUTE_GPU
|
||||
|
||||
|
||||
static int framesLeft = 10;
|
||||
|
||||
// Main message loop
|
||||
MSG msg = { 0 };
|
||||
while (msg.message != WM_QUIT)
|
||||
{
|
||||
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE))
|
||||
{
|
||||
TranslateMessage(&msg);
|
||||
DispatchMessage(&msg);
|
||||
}
|
||||
else
|
||||
{
|
||||
RenderFrame();
|
||||
TracyD3D11Collect(g_tracyCtx);
|
||||
if( --framesLeft == 0 ) break;
|
||||
}
|
||||
}
|
||||
|
||||
ShutdownTest();
|
||||
ShutdownD3DDevice();
|
||||
|
||||
return (int) msg.wParam;
|
||||
}
|
||||
|
||||
|
||||
ATOM MyRegisterClass(HINSTANCE hInstance)
|
||||
{
|
||||
ZoneScoped;
|
||||
|
||||
WNDCLASSEXW wcex;
|
||||
memset(&wcex, 0, sizeof(wcex));
|
||||
wcex.cbSize = sizeof(WNDCLASSEX);
|
||||
wcex.style = CS_HREDRAW | CS_VREDRAW;
|
||||
wcex.lpfnWndProc = WndProc;
|
||||
wcex.cbClsExtra = 0;
|
||||
wcex.cbWndExtra = 0;
|
||||
wcex.hInstance = hInstance;
|
||||
wcex.hCursor = LoadCursor(nullptr, IDC_ARROW);
|
||||
wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1);
|
||||
wcex.lpszClassName = L"TestClass";
|
||||
return RegisterClassExW(&wcex);
|
||||
}
|
||||
|
||||
BOOL InitInstance(HINSTANCE hInstance, int nCmdShow)
|
||||
{
|
||||
ZoneScoped;
|
||||
|
||||
g_HInstance = hInstance;
|
||||
RECT rc = { 0, 0, kBackbufferWidth, kBackbufferHeight };
|
||||
DWORD style = WS_OVERLAPPED | WS_CAPTION | WS_SYSMENU | WS_MINIMIZEBOX;
|
||||
AdjustWindowRect(&rc, style, FALSE);
|
||||
HWND hWnd = CreateWindowW(L"TestClass", L"Test", style, CW_USEDEFAULT, CW_USEDEFAULT, rc.right-rc.left, rc.bottom-rc.top, nullptr, nullptr, hInstance, nullptr);
|
||||
if (!hWnd)
|
||||
return FALSE;
|
||||
g_Wnd = hWnd;
|
||||
ShowWindow(hWnd, nCmdShow);
|
||||
UpdateWindow(hWnd);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static uint64_t s_Time;
|
||||
static int s_Count;
|
||||
static char s_Buffer[200];
|
||||
static unsigned s_Flags = kFlagProgressive;
|
||||
static int s_FrameCount = 0;
|
||||
|
||||
|
||||
static void RenderFrame()
|
||||
{
|
||||
ZoneScoped;
|
||||
TracyD3D11Zone(g_tracyCtx, "RenderFrame");
|
||||
|
||||
LARGE_INTEGER time1;
|
||||
|
||||
#if DO_COMPUTE_GPU
|
||||
QueryPerformanceCounter(&time1);
|
||||
float t = float(clock()) / CLOCKS_PER_SEC;
|
||||
UpdateTest(t, s_FrameCount, kBackbufferWidth, kBackbufferHeight, s_Flags);
|
||||
|
||||
g_BackbufferIndex = 1 - g_BackbufferIndex;
|
||||
void* dataSpheres = alloca(g_SphereCount * g_ObjSize);
|
||||
void* dataMaterials = alloca(g_SphereCount * g_MatSize);
|
||||
void* dataEmissives = alloca(g_SphereCount * 4);
|
||||
ComputeParams dataParams;
|
||||
GetSceneDesc(dataSpheres, dataMaterials, &dataParams.cam, dataEmissives, &dataParams.emissiveCount);
|
||||
|
||||
dataParams.sphereCount = g_SphereCount;
|
||||
dataParams.screenWidth = kBackbufferWidth;
|
||||
dataParams.screenHeight = kBackbufferHeight;
|
||||
dataParams.frames = s_FrameCount;
|
||||
dataParams.invWidth = 1.0f / kBackbufferWidth;
|
||||
dataParams.invHeight = 1.0f / kBackbufferHeight;
|
||||
float lerpFac = float(s_FrameCount) / float(s_FrameCount + 1);
|
||||
if (s_Flags & kFlagAnimate)
|
||||
lerpFac *= DO_ANIMATE_SMOOTHING;
|
||||
if (!(s_Flags & kFlagProgressive))
|
||||
lerpFac = 0;
|
||||
dataParams.lerpFac = lerpFac;
|
||||
|
||||
g_D3D11Ctx->UpdateSubresource(g_DataSpheres, 0, NULL, dataSpheres, 0, 0);
|
||||
g_D3D11Ctx->UpdateSubresource(g_DataMaterials, 0, NULL, dataMaterials, 0, 0);
|
||||
g_D3D11Ctx->UpdateSubresource(g_DataParams, 0, NULL, &dataParams, 0, 0);
|
||||
g_D3D11Ctx->UpdateSubresource(g_DataEmissives, 0, NULL, dataEmissives, 0, 0);
|
||||
|
||||
ID3D11ShaderResourceView* srvs[] = {
|
||||
g_BackbufferIndex == 0 ? g_BackbufferSRV2 : g_BackbufferSRV,
|
||||
g_SRVSpheres,
|
||||
g_SRVMaterials,
|
||||
g_SRVParams,
|
||||
g_SRVEmissives
|
||||
};
|
||||
g_D3D11Ctx->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs);
|
||||
ID3D11UnorderedAccessView* uavs[] = {
|
||||
g_BackbufferIndex == 0 ? g_BackbufferUAV : g_BackbufferUAV2,
|
||||
g_UAVCounter
|
||||
};
|
||||
g_D3D11Ctx->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, NULL);
|
||||
g_D3D11Ctx->CSSetShader(g_ComputeShader, NULL, 0);
|
||||
g_D3D11Ctx->Begin(g_QueryDisjoint);
|
||||
g_D3D11Ctx->End(g_QueryBegin);
|
||||
g_D3D11Ctx->Dispatch(kBackbufferWidth/kCSGroupSizeX, kBackbufferHeight/kCSGroupSizeY, 1);
|
||||
g_D3D11Ctx->End(g_QueryEnd);
|
||||
uavs[0] = NULL;
|
||||
g_D3D11Ctx->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, NULL);
|
||||
++s_FrameCount;
|
||||
|
||||
#else
|
||||
QueryPerformanceCounter(&time1);
|
||||
float t = float(clock()) / CLOCKS_PER_SEC;
|
||||
static size_t s_RayCounter = 0;
|
||||
int rayCount;
|
||||
UpdateTest(t, s_FrameCount, kBackbufferWidth, kBackbufferHeight, s_Flags);
|
||||
DrawTest(t, s_FrameCount, kBackbufferWidth, kBackbufferHeight, g_Backbuffer, rayCount, s_Flags);
|
||||
s_FrameCount++;
|
||||
s_RayCounter += rayCount;
|
||||
LARGE_INTEGER time2;
|
||||
QueryPerformanceCounter(&time2);
|
||||
uint64_t dt = time2.QuadPart - time1.QuadPart;
|
||||
++s_Count;
|
||||
s_Time += dt;
|
||||
if (s_Count > 10)
|
||||
{
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
|
||||
double s = double(s_Time) / double(frequency.QuadPart) / s_Count;
|
||||
sprintf_s(s_Buffer, sizeof(s_Buffer), "%.2fms (%.1f FPS) %.1fMrays/s %.2fMrays/frame frames %i\n", s * 1000.0f, 1.f / s, s_RayCounter / s_Count / s * 1.0e-6f, s_RayCounter / s_Count * 1.0e-6f, s_FrameCount);
|
||||
SetWindowTextA(g_Wnd, s_Buffer);
|
||||
OutputDebugStringA(s_Buffer);
|
||||
s_Count = 0;
|
||||
s_Time = 0;
|
||||
s_RayCounter = 0;
|
||||
}
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE mapped;
|
||||
g_D3D11Ctx->Map(g_BackbufferTexture, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped);
|
||||
const uint8_t* src = (const uint8_t*)g_Backbuffer;
|
||||
uint8_t* dst = (uint8_t*)mapped.pData;
|
||||
for (int y = 0; y < kBackbufferHeight; ++y)
|
||||
{
|
||||
memcpy(dst, src, kBackbufferWidth * 16);
|
||||
src += kBackbufferWidth * 16;
|
||||
dst += mapped.RowPitch;
|
||||
}
|
||||
g_D3D11Ctx->Unmap(g_BackbufferTexture, 0);
|
||||
#endif
|
||||
|
||||
g_D3D11Ctx->VSSetShader(g_VertexShader, NULL, 0);
|
||||
g_D3D11Ctx->PSSetShader(g_PixelShader, NULL, 0);
|
||||
g_D3D11Ctx->PSSetShaderResources(0, 1, g_BackbufferIndex == 0 ? &g_BackbufferSRV : &g_BackbufferSRV2);
|
||||
g_D3D11Ctx->PSSetSamplers(0, 1, &g_SamplerLinear);
|
||||
g_D3D11Ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
g_D3D11Ctx->RSSetState(g_RasterState);
|
||||
g_D3D11Ctx->Draw(3, 0);
|
||||
g_D3D11SwapChain->Present(0, 0);
|
||||
|
||||
FrameMark;
|
||||
|
||||
#if DO_COMPUTE_GPU
|
||||
g_D3D11Ctx->End(g_QueryDisjoint);
|
||||
|
||||
// get GPU times
|
||||
while (g_D3D11Ctx->GetData(g_QueryDisjoint, NULL, 0, 0) == S_FALSE) { Sleep(0); }
|
||||
D3D10_QUERY_DATA_TIMESTAMP_DISJOINT tsDisjoint;
|
||||
g_D3D11Ctx->GetData(g_QueryDisjoint, &tsDisjoint, sizeof(tsDisjoint), 0);
|
||||
if (!tsDisjoint.Disjoint)
|
||||
{
|
||||
UINT64 tsBegin, tsEnd;
|
||||
// Note: on some GPUs/drivers, even when the disjoint query above already said "yeah I have data",
|
||||
// might still not return "I have data" for timestamp queries before it.
|
||||
while (g_D3D11Ctx->GetData(g_QueryBegin, &tsBegin, sizeof(tsBegin), 0) == S_FALSE) { Sleep(0); }
|
||||
while (g_D3D11Ctx->GetData(g_QueryEnd, &tsEnd, sizeof(tsEnd), 0) == S_FALSE) { Sleep(0); }
|
||||
|
||||
float s = float(tsEnd - tsBegin) / float(tsDisjoint.Frequency);
|
||||
|
||||
static uint64_t s_RayCounter;
|
||||
D3D11_MAPPED_SUBRESOURCE mapped;
|
||||
g_D3D11Ctx->Map(g_DataCounter, 0, D3D11_MAP_READ, 0, &mapped);
|
||||
s_RayCounter += *(const int*)mapped.pData;
|
||||
g_D3D11Ctx->Unmap(g_DataCounter, 0);
|
||||
int zeroCount = 0;
|
||||
g_D3D11Ctx->UpdateSubresource(g_DataCounter, 0, NULL, &zeroCount, 0, 0);
|
||||
|
||||
static float s_Time;
|
||||
++s_Count;
|
||||
s_Time += s;
|
||||
if (s_Count > 150)
|
||||
{
|
||||
s = s_Time / s_Count;
|
||||
sprintf_s(s_Buffer, sizeof(s_Buffer), "%.2fms (%.1f FPS) %.1fMrays/s %.2fMrays/frame frames %i\n", s * 1000.0f, 1.f / s, s_RayCounter / s_Count / s * 1.0e-6f, s_RayCounter / s_Count * 1.0e-6f, s_FrameCount);
|
||||
SetWindowTextA(g_Wnd, s_Buffer);
|
||||
s_Count = 0;
|
||||
s_Time = 0;
|
||||
s_RayCounter = 0;
|
||||
}
|
||||
|
||||
}
|
||||
#endif // #if DO_COMPUTE_GPU
|
||||
}
|
||||
|
||||
|
||||
LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
|
||||
{
|
||||
switch (message)
|
||||
{
|
||||
case WM_PAINT:
|
||||
{
|
||||
PAINTSTRUCT ps;
|
||||
HDC hdc = BeginPaint(hWnd, &ps);
|
||||
EndPaint(hWnd, &ps);
|
||||
}
|
||||
break;
|
||||
case WM_DESTROY:
|
||||
PostQuitMessage(0);
|
||||
break;
|
||||
case WM_CHAR:
|
||||
if (wParam == 'a')
|
||||
s_Flags = s_Flags ^ kFlagAnimate;
|
||||
if (wParam == 'p')
|
||||
{
|
||||
s_Flags = s_Flags ^ kFlagProgressive;
|
||||
s_FrameCount = 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return DefWindowProc(hWnd, message, wParam, lParam);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static HRESULT InitD3DDevice()
|
||||
{
|
||||
ZoneScoped;
|
||||
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
RECT rc;
|
||||
GetClientRect(g_Wnd, &rc);
|
||||
UINT width = rc.right - rc.left;
|
||||
UINT height = rc.bottom - rc.top;
|
||||
|
||||
UINT createDeviceFlags = 0;
|
||||
#ifdef _DEBUG
|
||||
createDeviceFlags |= D3D11_CREATE_DEVICE_DEBUG;
|
||||
#endif
|
||||
|
||||
D3D_FEATURE_LEVEL featureLevels[] =
|
||||
{
|
||||
D3D_FEATURE_LEVEL_11_0,
|
||||
};
|
||||
UINT numFeatureLevels = ARRAYSIZE(featureLevels);
|
||||
hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, createDeviceFlags, featureLevels, numFeatureLevels, D3D11_SDK_VERSION, &g_D3D11Device, &g_D3D11FeatureLevel, &g_D3D11Ctx);
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
// Get DXGI factory
|
||||
IDXGIFactory1* dxgiFactory = nullptr;
|
||||
{
|
||||
IDXGIDevice* dxgiDevice = nullptr;
|
||||
hr = g_D3D11Device->QueryInterface(__uuidof(IDXGIDevice), reinterpret_cast<void**>(&dxgiDevice));
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
IDXGIAdapter* adapter = nullptr;
|
||||
hr = dxgiDevice->GetAdapter(&adapter);
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
hr = adapter->GetParent(__uuidof(IDXGIFactory1), reinterpret_cast<void**>(&dxgiFactory));
|
||||
adapter->Release();
|
||||
}
|
||||
dxgiDevice->Release();
|
||||
}
|
||||
}
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
// Create swap chain
|
||||
DXGI_SWAP_CHAIN_DESC sd;
|
||||
ZeroMemory(&sd, sizeof(sd));
|
||||
sd.BufferCount = 1;
|
||||
sd.BufferDesc.Width = width;
|
||||
sd.BufferDesc.Height = height;
|
||||
sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
sd.BufferDesc.RefreshRate.Numerator = 60;
|
||||
sd.BufferDesc.RefreshRate.Denominator = 1;
|
||||
sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
||||
sd.OutputWindow = g_Wnd;
|
||||
sd.SampleDesc.Count = 1;
|
||||
sd.SampleDesc.Quality = 0;
|
||||
sd.Windowed = TRUE;
|
||||
hr = dxgiFactory->CreateSwapChain(g_D3D11Device, &sd, &g_D3D11SwapChain);
|
||||
|
||||
// Prevent Alt-Enter
|
||||
dxgiFactory->MakeWindowAssociation(g_Wnd, DXGI_MWA_NO_ALT_ENTER);
|
||||
dxgiFactory->Release();
|
||||
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
// RTV
|
||||
ID3D11Texture2D* pBackBuffer = nullptr;
|
||||
hr = g_D3D11SwapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), reinterpret_cast<void**>(&pBackBuffer));
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
hr = g_D3D11Device->CreateRenderTargetView(pBackBuffer, nullptr, &g_D3D11RenderTarget);
|
||||
pBackBuffer->Release();
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
g_D3D11Ctx->OMSetRenderTargets(1, &g_D3D11RenderTarget, nullptr);
|
||||
|
||||
// Viewport
|
||||
D3D11_VIEWPORT vp;
|
||||
vp.Width = (float)width;
|
||||
vp.Height = (float)height;
|
||||
vp.MinDepth = 0.0f;
|
||||
vp.MaxDepth = 1.0f;
|
||||
vp.TopLeftX = 0;
|
||||
vp.TopLeftY = 0;
|
||||
g_D3D11Ctx->RSSetViewports(1, &vp);
|
||||
|
||||
g_tracyCtx = TracyD3D11Context(g_D3D11Device, g_D3D11Ctx);
|
||||
const char* tracyD3D11CtxName = "D3D11";
|
||||
TracyD3D11ContextName(g_tracyCtx, tracyD3D11CtxName, (uint16_t)strlen(tracyD3D11CtxName));
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
static void ShutdownD3DDevice()
|
||||
{
|
||||
ZoneScoped;
|
||||
|
||||
if (g_tracyCtx) TracyD3D11Destroy(g_tracyCtx);
|
||||
|
||||
if (g_D3D11Ctx) g_D3D11Ctx->ClearState();
|
||||
|
||||
if (g_D3D11RenderTarget) g_D3D11RenderTarget->Release();
|
||||
if (g_D3D11SwapChain) g_D3D11SwapChain->Release();
|
||||
if (g_D3D11Ctx) g_D3D11Ctx->Release();
|
||||
if (g_D3D11Device) g_D3D11Device->Release();
|
||||
}
|
||||
13
libs/tracy/examples/ToyPathTracer/Windows/VertexShader.hlsl
Normal file
@ -0,0 +1,13 @@
|
||||
struct vs2ps
|
||||
{
|
||||
float2 uv : TEXCOORD0;
|
||||
float4 pos : SV_Position;
|
||||
};
|
||||
|
||||
vs2ps main(uint vid : SV_VertexID)
|
||||
{
|
||||
vs2ps o;
|
||||
o.uv = float2((vid << 1) & 2, vid & 2);
|
||||
o.pos = float4(o.uv * float2(2, 2) + float2(-1, -1), 0, 1);
|
||||
return o;
|
||||
}
|
||||
24
libs/tracy/examples/ToyPathTracer/license.md
Normal file
@ -0,0 +1,24 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org>
|
||||
30
libs/tracy/examples/fibers.cpp
Normal file
@ -0,0 +1,30 @@
|
||||
// g++ fibers.cpp ../public/TracyClient.cpp -I../public/tracy -DTRACY_ENABLE -DTRACY_FIBERS -lpthread -ldl
|
||||
|
||||
#include <thread>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "TracyC.h"
|
||||
|
||||
const char* fiber = "job1";
|
||||
TracyCZoneCtx zone;
|
||||
|
||||
int main()
|
||||
{
|
||||
std::thread t1( [] {
|
||||
TracyFiberEnter( fiber );
|
||||
TracyCZone( ctx, 1 );
|
||||
zone = ctx;
|
||||
sleep( 1 );
|
||||
TracyFiberLeave;
|
||||
});
|
||||
t1.join();
|
||||
|
||||
std::thread t2( [] {
|
||||
TracyFiberEnter( fiber );
|
||||
sleep( 1 );
|
||||
TracyCZoneEnd( zone );
|
||||
TracyFiberLeave;
|
||||
});
|
||||
t2.join();
|
||||
}
|
||||
64
libs/tracy/extra/color-hot.cpp
Normal file
@ -0,0 +1,64 @@
|
||||
#include <algorithm>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
|
||||
inline float linear2sRGB( float v )
|
||||
{
|
||||
float s1 = sqrt( v );
|
||||
float s2 = sqrt( s1 );
|
||||
float s3 = sqrt( s2 );
|
||||
return 0.585122381f * s1 + 0.783140355f * s2 - 0.368262736f * s3;
|
||||
}
|
||||
|
||||
float lerp( float v0, float v1, float t )
|
||||
{
|
||||
return ( 1-t ) * v0 + t * v1;
|
||||
}
|
||||
|
||||
inline float sRGB2linear( float v )
|
||||
{
|
||||
return v * ( v * ( v * 0.305306011f + 0.682171111f ) + 0.012522878f );
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int c0 = 0x3333FF;
|
||||
int c1 = 0x33FF33;
|
||||
|
||||
uint32_t t[256] = {};
|
||||
|
||||
float r0 = ( c0 & 0xFF ) / 255.f;
|
||||
float r1 = ( c1 & 0xFF ) / 255.f;
|
||||
float g0 = ( ( c0 >> 8 ) & 0xFF ) / 255.f;
|
||||
float g1 = ( ( c1 >> 8 ) & 0xFF ) / 255.f;
|
||||
float b0 = ( ( c0 >> 16 ) & 0xFF ) / 255.f;
|
||||
float b1 = ( ( c1 >> 16 ) & 0xFF ) / 255.f;
|
||||
|
||||
for( int i=0; i<256; i++ )
|
||||
{
|
||||
float m = i / 255.f;
|
||||
float rf = linear2sRGB( lerp( sRGB2linear( r0 ), sRGB2linear( r1 ), m ) );
|
||||
float gf = linear2sRGB( lerp( sRGB2linear( g0 ), sRGB2linear( g1 ), m ) );
|
||||
float bf = linear2sRGB( lerp( sRGB2linear( b0 ), sRGB2linear( b1 ), m ) );
|
||||
|
||||
int r = (int)std::clamp( rf * 255.f, 0.f, 255.f );
|
||||
int g = (int)std::clamp( gf * 255.f, 0.f, 255.f );
|
||||
int b = (int)std::clamp( bf * 255.f, 0.f, 255.f );
|
||||
|
||||
t[i] = 0xFF000000 | ( b << 16 ) | ( g << 8 ) | r;
|
||||
}
|
||||
|
||||
printf( "uint32_t GoodnessColor[256] = {\n" );
|
||||
for( int i=0; i<256; i += 8 )
|
||||
{
|
||||
printf( " " );
|
||||
for( int j=i; j<i+8; j++ )
|
||||
{
|
||||
printf( " 0x%X,", t[j] );
|
||||
}
|
||||
printf( "\n" );
|
||||
}
|
||||
printf( "};\n" );
|
||||
}
|
||||
77
libs/tracy/extra/color.cpp
Normal file
@ -0,0 +1,77 @@
|
||||
#include <algorithm>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
|
||||
inline float sqrtfast( float v )
|
||||
{
|
||||
union
|
||||
{
|
||||
int i;
|
||||
float f;
|
||||
} u;
|
||||
|
||||
u.f = v;
|
||||
u.i -= 1 << 23;
|
||||
u.i >>= 1;
|
||||
u.i += 1 << 29;
|
||||
return u.f;
|
||||
}
|
||||
|
||||
inline float linear2sRGB( float v )
|
||||
{
|
||||
float s1 = sqrtfast( v );
|
||||
float s2 = sqrtfast( s1 );
|
||||
float s3 = sqrtfast( s2 );
|
||||
return 0.585122381f * s1 + 0.783140355f * s2 - 0.368262736f * s3;
|
||||
}
|
||||
|
||||
int lerp( int v0, int v1, float t )
|
||||
{
|
||||
return int( ( 1-t ) * v0 + t * v1 );
|
||||
}
|
||||
|
||||
inline float sRGB2linear( float v )
|
||||
{
|
||||
return v * ( v * ( v * 0.305306011f + 0.682171111f ) + 0.012522878f );
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int c0 = std::min( 255, int( sRGB2linear( 1.f ) * 255 ) );
|
||||
int c1 = std::min( 255, int( sRGB2linear( 0x44 / 255.f ) * 255 ) );
|
||||
|
||||
int s0 = std::min( 255, int( sRGB2linear( 1.f ) * 255 * 0.5 ) );
|
||||
int s1 = std::min( 255, int( sRGB2linear( 0x44 / 255.f ) * 255 * 0.5 ) );
|
||||
|
||||
float target = 80.f;
|
||||
|
||||
uint32_t t[256];
|
||||
memset( t, 0, sizeof( uint32_t ) * 256 );
|
||||
|
||||
for( int i=1; i<128; i++ )
|
||||
{
|
||||
float m = (i-1) / target;
|
||||
int l0 = std::min( 255, lerp( s0, c0, m ) );
|
||||
int l1 = std::min( 255, lerp( s1, c1, m ) );
|
||||
int g0 = std::min( 255, int( linear2sRGB( l0/255.f ) * 255 ) );
|
||||
int g1 = std::min( 255, int( linear2sRGB( l1/255.f ) * 255 ) );
|
||||
g0 = l0;
|
||||
g1 = l1;
|
||||
t[i] = 0xFF000000 | ( g1 << 16 ) | ( g0 << 8 ) | g1;
|
||||
t[uint8_t(-i)] = 0xFF000000 | ( g1 << 16 ) | ( g1 << 8 ) | g0;
|
||||
}
|
||||
|
||||
printf( "uint32_t MemDecayColor[256] = {\n" );
|
||||
for( int i=0; i<256; i += 8 )
|
||||
{
|
||||
printf( " " );
|
||||
for( int j=i; j<i+8; j++ )
|
||||
{
|
||||
printf( " 0x%X,", t[j] );
|
||||
}
|
||||
printf( "\n" );
|
||||
}
|
||||
printf( "};\n" );
|
||||
}
|
||||
14
libs/tracy/extra/desktop/application-tracy.xml
Normal file
@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<mime-info xmlns="http://www.freedesktop.org/standards/shared-mime-info">
|
||||
<mime-type type="application/tracy">
|
||||
<comment>Tracy Profiler trace file</comment>
|
||||
<comment xml:lang="pl">Zrzut sesji profilera Tracy</comment>
|
||||
<icon name="application-tracy"/>
|
||||
<magic>
|
||||
<match type="string" value="tlZ\x04" offset="0"/>
|
||||
<match type="string" value="tZst" offset="0"/>
|
||||
<match type="string" value="tr\xFDP" offset="0"/>
|
||||
</magic>
|
||||
<glob pattern="*.tracy"/>
|
||||
</mime-type>
|
||||
</mime-info>
|
||||
13
libs/tracy/extra/desktop/tracy.desktop
Normal file
@ -0,0 +1,13 @@
|
||||
[Desktop Entry]
|
||||
Version=1.0
|
||||
Type=Application
|
||||
Name=Tracy Profiler
|
||||
GenericName=Code profiler
|
||||
GenericName[pl]=Profiler kodu
|
||||
Comment=Examine code to see where it is slow
|
||||
Comment[pl]=Znajdowanie wolno wykonującego się kodu
|
||||
Exec=/usr/bin/tracy %f
|
||||
Icon=tracy
|
||||
Terminal=false
|
||||
Categories=Development;Profiling;
|
||||
MimeType=application/tracy;
|
||||
22
libs/tracy/extra/dxt1divtable.c
Normal file
@ -0,0 +1,22 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
for( int i=0; i<255*3+1; i++ )
|
||||
{
|
||||
// replace 4 with 2 for ARM NEON table
|
||||
uint32_t range = ( 4 << 16 ) / ( 1+i );
|
||||
if( range > 0xFFFF ) range = 0xFFFF;
|
||||
if( i % 16 == 15 )
|
||||
{
|
||||
printf( "0x%04x,\n", range );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "0x%04x, ", range );
|
||||
}
|
||||
}
|
||||
printf( "\n" );
|
||||
return 0;
|
||||
}
|
||||
36
libs/tracy/extra/dxt1table.c
Normal file
@ -0,0 +1,36 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static const uint8_t IndexTable[4] = { 1, 3, 2, 0 };
|
||||
|
||||
int convert( int v )
|
||||
{
|
||||
int v0 = v & 0x3;
|
||||
int v1 = ( v >> 2 ) & 0x3;
|
||||
int v2 = ( v >> 4 ) & 0x3;
|
||||
int v3 = ( v >> 6 );
|
||||
|
||||
int t0 = IndexTable[v0];
|
||||
int t1 = IndexTable[v1];
|
||||
int t2 = IndexTable[v2];
|
||||
int t3 = IndexTable[v3];
|
||||
|
||||
return t0 | ( t1 << 2 ) | ( t2 << 4 ) | ( t3 << 6 );
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
for( int i=0; i<256; i++ )
|
||||
{
|
||||
if( i % 16 == 15 )
|
||||
{
|
||||
printf( "%i,\n", convert( i ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "%i,\t", convert( i ) );
|
||||
}
|
||||
}
|
||||
printf( "\n" );
|
||||
return 0;
|
||||
}
|
||||
50
libs/tracy/extra/identify.cpp
Normal file
@ -0,0 +1,50 @@
|
||||
// g++ identify.cpp -lpthread ../public/common/tracy_lz4.cpp ../zstd/common/*.c ../zstd/decompress/*.c ../zstd/decompress/huf_decompress_amd64.S
|
||||
|
||||
#include <memory>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "../server/TracyFileRead.hpp"
|
||||
#include "../public/common/TracyVersion.hpp"
|
||||
|
||||
static const uint8_t FileHeader[8] { 't', 'r', 'a', 'c', 'y', tracy::Version::Major, tracy::Version::Minor, tracy::Version::Patch };
|
||||
enum { FileHeaderMagic = 5 };
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
if( argc != 2 )
|
||||
{
|
||||
fprintf( stderr, "Usage: %s trace\n", argv[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
std::unique_ptr<tracy::FileRead> f( tracy::FileRead::Open( argv[1] ) );
|
||||
if( !f )
|
||||
{
|
||||
fprintf( stderr, "%s: Cannot open!\n", argv[1] );
|
||||
return -2;
|
||||
}
|
||||
|
||||
uint8_t hdr[8];
|
||||
f->Read( hdr, sizeof( hdr ) );
|
||||
if( memcmp( FileHeader, hdr, FileHeaderMagic ) != 0 )
|
||||
{
|
||||
fprintf( stderr, "%s: Bad header!\n", argv[1] );
|
||||
return -3;
|
||||
}
|
||||
|
||||
printf( "%s: %i.%i.%i\n", argv[1], hdr[FileHeaderMagic], hdr[FileHeaderMagic+1], hdr[FileHeaderMagic+2] );
|
||||
}
|
||||
catch( const tracy::NotTracyDump& )
|
||||
{
|
||||
fprintf( stderr, "%s: Not a tracy dump!\n", argv[1] );
|
||||
return -4;
|
||||
}
|
||||
catch( const tracy::FileReadError& )
|
||||
{
|
||||
fprintf( stderr, "%s: File read error!\n", argv[1] );
|
||||
return -5;
|
||||
}
|
||||
}
|
||||
26
libs/tracy/extra/make-build.sh
Normal file
@ -0,0 +1,26 @@
|
||||
#!/bin/sh
|
||||
|
||||
rm -rf tracy-build
|
||||
mkdir tracy-build
|
||||
|
||||
./update-meson-version.sh
|
||||
|
||||
if [ ! -f vswhere.exe ]; then
|
||||
wget https://github.com/microsoft/vswhere/releases/download/2.8.4/vswhere.exe
|
||||
fi
|
||||
|
||||
MSVC=`./vswhere.exe -property installationPath -version '[17.0,17.999]' | head -n 1`
|
||||
MSVC=`wslpath "$MSVC" | tr -d '\r'`
|
||||
MSBUILD=$MSVC/MSBuild/Current/Bin/MSBuild.exe
|
||||
|
||||
for i in capture csvexport import-chrome update; do
|
||||
echo $i...
|
||||
"$MSBUILD" ../$i/build/win32/$i.sln /t:Clean /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
|
||||
"$MSBUILD" ../$i/build/win32/$i.sln /t:Build /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
|
||||
cp ../$i/build/win32/x64/Release/$i.exe tracy-build/
|
||||
done
|
||||
|
||||
echo profiler...
|
||||
"$MSBUILD" ../profiler/build/win32/Tracy.sln /t:Clean /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
|
||||
"$MSBUILD" ../profiler/build/win32/Tracy.sln /t:Build /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
|
||||
cp ../profiler/build/win32/x64/Release/Tracy.exe tracy-build/
|
||||
154
libs/tracy/extra/natvis.py
Normal file
@ -0,0 +1,154 @@
|
||||
import lldb
|
||||
|
||||
def VectorSummary(value, dict):
|
||||
v = value.GetNonSyntheticValue()
|
||||
size = v.GetChildMemberWithName('m_size').GetValueAsUnsigned()
|
||||
capacityVal = v.GetChildMemberWithName('m_capacity').GetValueAsUnsigned()
|
||||
capacity = 1 << capacityVal if capacityVal < 63 else 'read-only'
|
||||
magic = bool(v.GetChildMemberWithName('m_magic').GetValueAsUnsigned())
|
||||
return f'{{size={size}, capacity={capacity}, magic={magic}}}'
|
||||
|
||||
def ShortPtrSummary(value, dict):
|
||||
val = value.GetNonSyntheticValue()
|
||||
ptr = val.GetChildMemberWithName('m_ptr')
|
||||
type = val.GetType().GetTemplateArgumentType(0)
|
||||
p0 = ptr.GetChildAtIndex(0).GetValueAsUnsigned()
|
||||
p1 = ptr.GetChildAtIndex(1).GetValueAsUnsigned()
|
||||
p2 = ptr.GetChildAtIndex(2).GetValueAsUnsigned()
|
||||
p3 = ptr.GetChildAtIndex(3).GetValueAsUnsigned()
|
||||
p4 = ptr.GetChildAtIndex(4).GetValueAsUnsigned()
|
||||
p5 = ptr.GetChildAtIndex(5).GetValueAsUnsigned()
|
||||
#return '0x{0:02x}{1:02x}{2:02x}{3:02x}{4:02x}{5:02x}'.format(p5, p4, p3, p2, p1, p0)
|
||||
return value.CreateValueFromAddress('m_ptr', p0 | (p1 << 8) | (p2 << 16) | (p3 << 24) | (p4 << 32) | (p5 << 40), type)
|
||||
|
||||
class ShortPtrPrinter:
|
||||
def __init__(self, val, dict):
|
||||
self.val = val
|
||||
self.type = self.val.GetType().GetTemplateArgumentType(0)
|
||||
|
||||
def update(self):
|
||||
ptr = self.val.GetChildMemberWithName('m_ptr')
|
||||
p0 = ptr.GetChildAtIndex(0).GetValueAsUnsigned()
|
||||
p1 = ptr.GetChildAtIndex(1).GetValueAsUnsigned()
|
||||
p2 = ptr.GetChildAtIndex(2).GetValueAsUnsigned()
|
||||
p3 = ptr.GetChildAtIndex(3).GetValueAsUnsigned()
|
||||
p4 = ptr.GetChildAtIndex(4).GetValueAsUnsigned()
|
||||
p5 = ptr.GetChildAtIndex(5).GetValueAsUnsigned()
|
||||
self.ptr = p0 | (p1 << 8) | (p2 << 16) | (p3 << 24) | (p4 << 32) | (p5 << 40)
|
||||
|
||||
def num_children(self):
|
||||
return 1
|
||||
|
||||
def get_child_index(self, name):
|
||||
return int(name.lstrip('[').rstrip(']'))
|
||||
|
||||
def get_child_at_index(self, index):
|
||||
return self.val.CreateValueFromAddress('m_ptr', self.ptr, self.type)
|
||||
|
||||
class VectorPrinter:
|
||||
def __init__(self, val, dict):
|
||||
self.val = val
|
||||
self.magic = bool(val.GetChildMemberWithName('m_magic').GetValueAsUnsigned())
|
||||
if self.magic:
|
||||
self.type = val.GetType().GetTemplateArgumentType(0).GetTemplateArgumentType(0)
|
||||
else:
|
||||
self.type = val.GetType().GetTemplateArgumentType(0)
|
||||
self.stride = self.type.GetByteSize()
|
||||
|
||||
def update(self):
|
||||
ptr = self.val.GetChildMemberWithName('m_ptr').GetChildMemberWithName('m_ptr')
|
||||
p0 = ptr.GetChildAtIndex(0).GetValueAsUnsigned()
|
||||
p1 = ptr.GetChildAtIndex(1).GetValueAsUnsigned()
|
||||
p2 = ptr.GetChildAtIndex(2).GetValueAsUnsigned()
|
||||
p3 = ptr.GetChildAtIndex(3).GetValueAsUnsigned()
|
||||
p4 = ptr.GetChildAtIndex(4).GetValueAsUnsigned()
|
||||
p5 = ptr.GetChildAtIndex(5).GetValueAsUnsigned()
|
||||
self.ptr = p0 | (p1 << 8) | (p2 << 16) | (p3 << 24) | (p4 << 32) | (p5 << 40)
|
||||
self.size = self.val.GetChildMemberWithName('m_size').GetValueAsUnsigned()
|
||||
|
||||
def num_children(self):
|
||||
return self.size
|
||||
|
||||
def get_child_index(self, name):
|
||||
return int(name.lstrip('[').rstrip(']'))
|
||||
|
||||
def get_child_at_index(self, index):
|
||||
return self.val.CreateValueFromAddress('[%d]' % index, self.ptr + index * self.stride, self.type)
|
||||
|
||||
def Int24Summary(value, dict):
|
||||
val = value.GetNonSyntheticValue().GetChildMemberWithName('m_val')
|
||||
p0 = val.GetChildAtIndex(0).GetValueAsUnsigned()
|
||||
p1 = val.GetChildAtIndex(1).GetValueAsUnsigned()
|
||||
p2 = val.GetChildAtIndex(2).GetValueAsUnsigned()
|
||||
return p0 | (p1 << 8) | (p2 << 16)
|
||||
|
||||
def Int48Summary(value, dict):
|
||||
val = value.GetNonSyntheticValue().GetChildMemberWithName('m_val')
|
||||
p0 = val.GetChildAtIndex(0).GetValueAsUnsigned()
|
||||
p1 = val.GetChildAtIndex(1).GetValueAsUnsigned()
|
||||
p2 = val.GetChildAtIndex(2).GetValueAsUnsigned()
|
||||
p3 = val.GetChildAtIndex(3).GetValueAsUnsigned()
|
||||
p4 = val.GetChildAtIndex(4).GetValueAsUnsigned()
|
||||
p5 = val.GetChildAtIndex(5).GetValueAsUnsigned()
|
||||
return p0 | (p1 << 8) | (p2 << 16) | (p3 << 24) | (p4 << 32) | (p5 << 40)
|
||||
|
||||
class ZoneEventPrinter:
|
||||
def __init__(self, val, dict):
|
||||
self.val = val
|
||||
|
||||
def update(self):
|
||||
_start_srcloc = self.val.GetChildMemberWithName('_start_srcloc').GetValueAsUnsigned()
|
||||
_child2 = self.val.GetChildMemberWithName('_child2').GetValueAsUnsigned()
|
||||
_end_child1 = self.val.GetChildMemberWithName('_end_child1').GetValueAsUnsigned()
|
||||
self.extra = self.val.GetChildMemberWithName('extra').GetValueAsUnsigned()
|
||||
self.start = _start_srcloc >> 16
|
||||
self.end = _end_child1 >> 16
|
||||
self.srcloc = _start_srcloc & 0xffff
|
||||
self.child = ((_end_child1 & 0xffff) << 16) | _child2
|
||||
|
||||
def num_children(self):
|
||||
return 5
|
||||
|
||||
def get_child_index(self, name):
|
||||
if name == 'start':
|
||||
return 0
|
||||
if name == 'end':
|
||||
return 1
|
||||
if name == 'srcloc':
|
||||
return 2
|
||||
if name == 'child':
|
||||
return 3
|
||||
if name == 'extra':
|
||||
return 4
|
||||
return -1
|
||||
|
||||
def get_child_at_index(self, index):
|
||||
if index == 0:
|
||||
return self.val.CreateValueFromExpression('start', f'int64_t x = {self.start}; x')
|
||||
if index == 1:
|
||||
return self.val.CreateValueFromExpression('end', f'int64_t x = {self.end}; x')
|
||||
if index == 2:
|
||||
return self.val.CreateValueFromExpression('srcloc', f'int16_t x = {self.srcloc}; x')
|
||||
if index == 3:
|
||||
return self.val.CreateValueFromExpression('child', f'int32_t x = {self.child}; x')
|
||||
if index == 4:
|
||||
return self.val.CreateValueFromExpression('extra', f'uint32_t x = {self.extra}; x')
|
||||
|
||||
def RobinHoodSummary(value, dict):
|
||||
val = value.GetNonSyntheticValue()
|
||||
size = val.GetChildMemberWithName('mNumElements').GetValueAsUnsigned()
|
||||
mask = val.GetChildMemberWithName('mMask').GetValueAsUnsigned()
|
||||
return f'{{size={size}, load={float(size) / (mask+1)}}}'
|
||||
|
||||
def __lldb_init_module(debugger, dict):
|
||||
lldb.formatters.Logger._lldb_formatters_debug_level = 2
|
||||
debugger.HandleCommand('type summary add -w tracy -F natvis.VectorSummary -x ^tracy::Vector<.+>')
|
||||
debugger.HandleCommand('type summary add -w tracy -F natvis.ShortPtrSummary -x ^tracy::short_ptr<.+>')
|
||||
debugger.HandleCommand('type summary add -w tracy -F natvis.Int24Summary -x ^tracy::Int24')
|
||||
debugger.HandleCommand('type summary add -w tracy -F natvis.Int48Summary -x ^tracy::Int48')
|
||||
debugger.HandleCommand('type summary add -w tracy -F natvis.RobinHoodSummary -x ^tracy::detail::Table<.*>')
|
||||
debugger.HandleCommand('type synthetic add -w tracy -l natvis.VectorPrinter -x ^tracy::Vector<.+>')
|
||||
debugger.HandleCommand('type synthetic add -w tracy -l natvis.ShortPtrPrinter -x ^tracy::short_ptr<.+>')
|
||||
debugger.HandleCommand('type synthetic add -w tracy -l natvis.ZoneEventPrinter -x ^tracy::ZoneEvent')
|
||||
debugger.HandleCommand('type summary add -w tracy -x ^tracy::ZoneEvent --summary-string "start = ${var.start}, end = ${var.end}, srcloc = ${var.srcloc}, child = ${var.child}, extra = ${var.extra}"')
|
||||
debugger.HandleCommand('type category enable tracy')
|
||||
24
libs/tracy/extra/rdotbl.c
Normal file
@ -0,0 +1,24 @@
|
||||
#include <stdio.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
//int a = 16, b = 44, s = 4;
|
||||
//int av = 12, bv = 6, cv = 3;
|
||||
|
||||
//int a = 32, b = 48, s = 16;
|
||||
//int av = 12, bv = 6, cv = 3;
|
||||
|
||||
int a = 48, b = 64, s = 16;
|
||||
int av = 48, bv = 32, cv = 24;
|
||||
|
||||
printf( "int TrTbl[] = { " );
|
||||
int first = 1;
|
||||
for( int i=0; i<256; i+=s )
|
||||
{
|
||||
if( first ) first = 0; else printf( ", " );
|
||||
if( i < a ) printf( "%i", av );
|
||||
else if( i < b ) printf( "%i", bv );
|
||||
else printf( "%i", cv );
|
||||
}
|
||||
printf( " };\n" );
|
||||
}
|
||||
46
libs/tracy/extra/uarch/TracyMicroArchitecture.hpp
Normal file
@ -0,0 +1,46 @@
|
||||
#include <stdint.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct AsmDesc
|
||||
{
|
||||
uint8_t type;
|
||||
uint16_t width;
|
||||
};
|
||||
|
||||
struct AsmVar
|
||||
{
|
||||
int descNum;
|
||||
AsmDesc desc[5];
|
||||
int isaSet;
|
||||
float tp;
|
||||
int port, uops, minlat, maxlat;
|
||||
bool minbound, maxbound;
|
||||
};
|
||||
|
||||
struct AsmOp
|
||||
{
|
||||
int id;
|
||||
int descId;
|
||||
int numVariants;
|
||||
const AsmVar*const* variant;
|
||||
};
|
||||
|
||||
struct MicroArchitecture
|
||||
{
|
||||
int numOps;
|
||||
const AsmOp*const* ops;
|
||||
};
|
||||
|
||||
extern const char* MicroArchitectureList[];
|
||||
extern const char* PortList[];
|
||||
extern const char* OpsList[];
|
||||
extern const char* OpDescList[];
|
||||
extern const char* IsaList[];
|
||||
extern const MicroArchitecture* const MicroArchitectureData[];
|
||||
|
||||
extern int OpsNum;
|
||||
extern int MicroArchitectureNum;
|
||||
|
||||
};
|
||||
326
libs/tracy/extra/uarch/uarch.cpp
Normal file
@ -0,0 +1,326 @@
|
||||
// Use with instructions.xml retrieved from uops.info
|
||||
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
#include <limits>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <string.h>
|
||||
#include <pugixml.hpp>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
struct Dictionary
|
||||
{
|
||||
int Get( const std::string& str )
|
||||
{
|
||||
auto it = str2idx.find( str );
|
||||
if( it != str2idx.end() ) return it->second;
|
||||
const auto idx = strlist.size();
|
||||
str2idx.emplace( str, idx );
|
||||
strlist.emplace_back( str );
|
||||
return idx;
|
||||
}
|
||||
|
||||
int Get( const char* str ) { return Get( std::string( str ) ); }
|
||||
|
||||
const std::string& Get( int idx ) const
|
||||
{
|
||||
return strlist[idx];
|
||||
}
|
||||
|
||||
size_t Size() const { return strlist.size(); }
|
||||
|
||||
std::unordered_map<std::string, int> str2idx;
|
||||
std::vector<std::string> strlist;
|
||||
};
|
||||
|
||||
struct ParamDesc
|
||||
{
|
||||
int type;
|
||||
int width;
|
||||
};
|
||||
|
||||
struct Variant
|
||||
{
|
||||
std::vector<ParamDesc> desc;
|
||||
int isaSet;
|
||||
float tp;
|
||||
int port, uops, minlat, maxlat;
|
||||
bool minbound, maxbound;
|
||||
};
|
||||
|
||||
struct Op
|
||||
{
|
||||
std::vector<Variant> var;
|
||||
int desc;
|
||||
};
|
||||
|
||||
struct UArch
|
||||
{
|
||||
std::unordered_map<int, Op> ops;
|
||||
};
|
||||
|
||||
const std::vector<std::pair<const char*, const char*>> LatencyValues = {
|
||||
{ "cycles", "cycles_is_upper_bound" },
|
||||
{ "cycles_addr", "cycles_addr_is_upper_bound" },
|
||||
{ "cycles_addr_same_reg", "cycles_addr_same_reg_is_upper_bound" },
|
||||
{ "cycles_addr_VSIB", "cycles_addr_VSIB_is_upper_bound" },
|
||||
{ "cycles_mem", "cycles_mem_is_upper_bound" },
|
||||
{ "cycles_mem_same_reg", "cycles_mem_same_reg_is_upper_bound" },
|
||||
{ "cycles_same_reg", "cycles_same_reg_is_upper_bound" },
|
||||
{ "max_cycles", "max_cycles_is_upper_bound" },
|
||||
{ "max_cycles_addr", "max_cycles_addr_is_upper_bound" },
|
||||
{ "min_cycles", "min_cycles_is_upper_bound" },
|
||||
{ "min_cycles_addr", "min_cycles_addr_is_upper_bound" },
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
pugi::xml_document doc;
|
||||
doc.load_file( "instructions.xml" );
|
||||
auto root = doc.child( "root" );
|
||||
|
||||
Dictionary ops;
|
||||
Dictionary opsdesc;
|
||||
Dictionary uarchs;
|
||||
Dictionary ports;
|
||||
Dictionary isas;
|
||||
|
||||
std::vector<UArch> uav;
|
||||
|
||||
for( auto& ext : root )
|
||||
{
|
||||
assert( strcmp( ext.name(), "extension" ) == 0 );
|
||||
for( auto& op : ext )
|
||||
{
|
||||
assert( strcmp( op.name(), "instruction" ) == 0 );
|
||||
auto opstr = op.attribute( "asm" ).value();
|
||||
auto opdesc = op.attribute( "summary" ).value();
|
||||
bool magic = false;
|
||||
if( opstr[0] == '{' )
|
||||
{
|
||||
if( memcmp( opstr, "{load} ", 7 ) == 0 )
|
||||
{
|
||||
magic = true;
|
||||
opstr += 7;
|
||||
}
|
||||
else
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
char tmpbuf[64];
|
||||
auto opstr2 = op.attribute( "string" ).value();
|
||||
const auto strnext = opstr2[strlen(opstr)];
|
||||
if( !magic && strnext != ' ' && strnext != '\0' )
|
||||
{
|
||||
if( memcmp( opstr2, "LEA_", 4 ) == 0 )
|
||||
{
|
||||
auto ptr = tmpbuf;
|
||||
opstr = tmpbuf;
|
||||
while( *opstr2 != ' ' ) *ptr++ = *opstr2++;
|
||||
*ptr = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
const auto opidx = ops.Get( opstr );
|
||||
const auto opdescidx = opsdesc.Get( opdesc );
|
||||
|
||||
int isaSet = isas.Get( op.attribute( "isa-set" ).value() );
|
||||
|
||||
std::vector<ParamDesc> desc;
|
||||
for( auto& param : op.children( "operand" ) )
|
||||
{
|
||||
if( !param.attribute( "suppressed" ) )
|
||||
{
|
||||
int type = 0;
|
||||
if( strcmp( param.attribute( "type" ).value(), "imm" ) == 0 ) type = 0;
|
||||
else if( strcmp( param.attribute( "type" ).value(), "reg" ) == 0 ) type = 1;
|
||||
else if( strcmp( param.attribute( "type" ).value(), "mem" ) == 0 ) type = 2;
|
||||
else if( strcmp( param.attribute( "type" ).value(), "agen" ) == 0 ) type = 2;
|
||||
desc.emplace_back( ParamDesc { type, atoi( param.attribute( "width" ).value() ) } );
|
||||
}
|
||||
}
|
||||
|
||||
for( auto& ua : op.children( "architecture" ) )
|
||||
{
|
||||
auto measurement = ua.child( "measurement" );
|
||||
if( measurement )
|
||||
{
|
||||
const auto uaidx = uarchs.Get( ua.attribute( "name" ).value() );
|
||||
if( uav.size() <= uaidx ) uav.emplace_back( UArch {} );
|
||||
auto& uai = uav[uaidx];
|
||||
auto& opi = uai.ops[opidx];
|
||||
opi.desc = opdescidx;
|
||||
|
||||
float tp = -1;
|
||||
if( measurement.attribute( "TP" ) ) tp = atof( measurement.attribute( "TP" ).value() );
|
||||
else if( measurement.attribute( "TP_ports" ) ) tp = atof( measurement.attribute( "TP_ports" ).value() );
|
||||
else if( measurement.attribute( "TP_unrolled" ) ) tp = atof( measurement.attribute( "TP_unrolled" ).value() );
|
||||
|
||||
int portid = measurement.attribute( "ports" ) ? ports.Get( measurement.attribute( "ports" ).value() ) : -1;
|
||||
int uops = measurement.attribute( "uops" ) ? atoi( measurement.attribute( "uops" ).value() ) : -1;
|
||||
assert( tp != -1 && uops != -1 );
|
||||
|
||||
int minlat = std::numeric_limits<int>::max();
|
||||
int maxlat = -1;
|
||||
bool minbound = false;
|
||||
bool maxbound = false;
|
||||
|
||||
for( auto& lat : measurement.children( "latency" ) )
|
||||
{
|
||||
for( auto& v : LatencyValues )
|
||||
{
|
||||
auto attr = lat.attribute( v.first );
|
||||
if( attr )
|
||||
{
|
||||
const auto av = atoi( attr.value() );
|
||||
bool bound = lat.attribute( v.second );
|
||||
if( minlat > av || ( minlat == av && minbound ) )
|
||||
{
|
||||
minlat = av;
|
||||
minbound = bound;
|
||||
}
|
||||
if( maxlat < av || ( maxlat == av && maxbound ) )
|
||||
{
|
||||
maxlat = av;
|
||||
maxbound = bound;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if( maxlat == -1 ) minlat = -1;
|
||||
|
||||
opi.var.emplace_back( Variant { desc, isaSet, tp, portid, uops, minlat, maxlat, minbound, maxbound } );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf( "#include \"TracyMicroArchitecture.hpp\"\n\n" );
|
||||
|
||||
printf( "namespace tracy\n{\n\n" );
|
||||
|
||||
printf( "const char* MicroArchitectureList[]={\n" );
|
||||
for( auto& v : uarchs.strlist )
|
||||
{
|
||||
printf( "\"%s\",\n", v.c_str() );
|
||||
}
|
||||
printf( "};\n\n" );
|
||||
|
||||
printf( "const char* PortList[]={\n" );
|
||||
for( auto& v : ports.strlist )
|
||||
{
|
||||
printf( "\"%s\",\n", v.c_str() );
|
||||
}
|
||||
printf( "};\n\n" );
|
||||
|
||||
printf( "const char* OpsList[]={\n" );
|
||||
for( auto& v : ops.strlist )
|
||||
{
|
||||
printf( "\"%s\",\n", v.c_str() );
|
||||
}
|
||||
printf( "};\n\n" );
|
||||
|
||||
printf( "const char* IsaList[]={\n" );
|
||||
for( auto& v : isas.strlist )
|
||||
{
|
||||
printf( "\"%s\",\n", v.c_str() );
|
||||
}
|
||||
printf( "};\n\n" );
|
||||
|
||||
printf( "const char* OpDescList[]={\n" );
|
||||
for( auto& v : opsdesc.strlist )
|
||||
{
|
||||
printf( "\"%s\",\n", v.c_str() );
|
||||
}
|
||||
printf( "};\n\n" );
|
||||
|
||||
printf( "#define V static constexpr AsmVar\n" );
|
||||
printf( "#define A static constexpr AsmVar const*\n\n" );
|
||||
|
||||
int uaidx = 0;
|
||||
for( auto& ua : uav )
|
||||
{
|
||||
for( auto& op: ua.ops )
|
||||
{
|
||||
int varidx = 0;
|
||||
for( auto& var: op.second.var )
|
||||
{
|
||||
printf( "V z%x_%x_%x={%i,{", uaidx, op.first, varidx++, (int)var.desc.size() );
|
||||
bool first = true;
|
||||
for( auto& p : var.desc )
|
||||
{
|
||||
if( first ) first = false;
|
||||
else printf( "," );
|
||||
printf( "{%i,%i}", p.type, p.width );
|
||||
}
|
||||
printf( "},%i,%.2ff,%i,%i,%i,%i,%c,%c};\n", var.isaSet, var.tp, var.port, var.uops, var.minlat, var.maxlat, var.minbound ? '1' : '0', var.maxbound ? '1' : '0' );
|
||||
}
|
||||
|
||||
varidx = 0;
|
||||
printf( "A y%x_%x[]={", uaidx, op.first );
|
||||
bool first = true;
|
||||
for( auto& var: op.second.var )
|
||||
{
|
||||
if( first ) first = false;
|
||||
else printf( "," );
|
||||
printf( "&z%x_%x_%x", uaidx, op.first, varidx++ );
|
||||
}
|
||||
printf( "};\n" );
|
||||
}
|
||||
uaidx++;
|
||||
}
|
||||
|
||||
printf( "\n\n#define O static constexpr AsmOp\n\n" );
|
||||
|
||||
uaidx = 0;
|
||||
for( auto& ua : uav )
|
||||
{
|
||||
std::vector<decltype(ua.ops.begin())> opsort;
|
||||
for( auto it = ua.ops.begin(); it != ua.ops.end(); ++it )
|
||||
{
|
||||
auto& op = *it;
|
||||
printf( "O x%x_%x={%i,%i,%i,y%x_%x};\n", uaidx, op.first, op.first, op.second.desc, (int)op.second.var.size(), uaidx, op.first );
|
||||
opsort.emplace_back( it );
|
||||
}
|
||||
std::sort( opsort.begin(), opsort.end(), []( const auto& l, const auto& r ) { return l->first < r->first; } );
|
||||
printf( "static constexpr AsmOp const* w%x[]={", uaidx );
|
||||
bool first = true;
|
||||
for( auto& op: opsort )
|
||||
{
|
||||
if( first ) first = false;
|
||||
else printf( "," );
|
||||
printf( "&x%x_%x", uaidx, op->first );
|
||||
}
|
||||
printf( "};\n" );
|
||||
uaidx++;
|
||||
}
|
||||
printf( "\n" );
|
||||
|
||||
uaidx = 0;
|
||||
for( auto& ua : uav )
|
||||
{
|
||||
printf( "static constexpr MicroArchitecture v%x={%i,w%x};\n", uaidx, (int)ua.ops.size(), uaidx );
|
||||
uaidx++;
|
||||
}
|
||||
|
||||
printf( "\nconst MicroArchitecture* const MicroArchitectureData[]={" );
|
||||
uaidx = 0;
|
||||
bool first = true;
|
||||
for( auto& ua : uav )
|
||||
{
|
||||
if( first ) first = false;
|
||||
else printf( "," );
|
||||
printf( "&v%x", uaidx++ );
|
||||
}
|
||||
printf( "};\n\n" );
|
||||
|
||||
printf( "int OpsNum=%i;\nint MicroArchitectureNum=%i;\n", (int)ops.Size(), (int)uarchs.Size() );
|
||||
|
||||
printf( "}\n" );
|
||||
}
|
||||
13
libs/tracy/extra/update-meson-version.sh
Normal file
@ -0,0 +1,13 @@
|
||||
#!/bin/sh
|
||||
|
||||
version_header="../public/common/TracyVersion.hpp"
|
||||
|
||||
major=$(grep -o -E 'Major = [0-9]+' "$version_header" | awk -F '= ' '{print $2}')
|
||||
minor=$(grep -o -E 'Minor = [0-9]+' "$version_header" | awk -F '= ' '{print $2}')
|
||||
patch=$(grep -o -E 'Patch = [0-9]+' "$version_header" | awk -F '= ' '{print $2}')
|
||||
|
||||
version="${major}.${minor}.${patch}"
|
||||
|
||||
# the extension is required for macOS's outdated sed
|
||||
sed -i.bak "s/version: '[0-9]*\.[0-9]*\.[0-9]*'/version: '$version'/g" ../meson.build
|
||||
rm ../meson.build.bak
|
||||
11
libs/tracy/extra/version.cpp
Normal file
@ -0,0 +1,11 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "../server/TracyFileHeader.hpp"
|
||||
#include "../public/common/TracyVersion.hpp"
|
||||
|
||||
int main()
|
||||
{
|
||||
const auto ver = uint32_t( tracy::FileVersion( tracy::Version::Major, tracy::Version::Minor, tracy::Version::Patch ) );
|
||||
fwrite( &ver, 1, 4, stdout );
|
||||
}
|
||||
42
libs/tracy/extra/x11_colors.c
Normal file
@ -0,0 +1,42 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
FILE* f = fopen( "rgb.txt", "rb" );
|
||||
|
||||
char buf[1024];
|
||||
int off = 0;
|
||||
for(;;)
|
||||
{
|
||||
int sz = fread( buf+off, 1, 1, f );
|
||||
if( buf[off] == '\r' || buf[off] == '\n' || sz == 0 )
|
||||
{
|
||||
if( off == 0 )
|
||||
{
|
||||
if( sz == 0 ) break;
|
||||
continue;
|
||||
}
|
||||
int ok = 1;
|
||||
for( int i=13; i<off; i++ )
|
||||
{
|
||||
if( buf[i] == ' ' ) ok = 0;
|
||||
}
|
||||
if( ok == 1 )
|
||||
{
|
||||
buf[off] = '\0';
|
||||
int r, g, b;
|
||||
sscanf( buf, "%i %i %i", &r, &g, &b );
|
||||
printf( "%s = 0x%02x%02x%02x,\n", buf+13, r, g, b );
|
||||
}
|
||||
off = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
off++;
|
||||
}
|
||||
if( sz == 0 ) break;
|
||||
}
|
||||
|
||||
fclose( f );
|
||||
}
|
||||
67
libs/tracy/extra/zigzag.svg
Normal file
@ -0,0 +1,67 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
width="32"
|
||||
height="32"
|
||||
viewBox="0 0 8.4666665 8.4666666"
|
||||
version="1.1"
|
||||
id="svg5"
|
||||
inkscape:export-filename="zigzag.png"
|
||||
inkscape:export-xdpi="96"
|
||||
inkscape:export-ydpi="96"
|
||||
inkscape:version="1.2.2 (732a01da63, 2022-12-09)"
|
||||
sodipodi:docname="zigzag.svg"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:svg="http://www.w3.org/2000/svg">
|
||||
<sodipodi:namedview
|
||||
id="namedview7"
|
||||
pagecolor="#505050"
|
||||
bordercolor="#eeeeee"
|
||||
borderopacity="1"
|
||||
inkscape:showpageshadow="0"
|
||||
inkscape:pageopacity="0"
|
||||
inkscape:pagecheckerboard="0"
|
||||
inkscape:deskcolor="#505050"
|
||||
inkscape:document-units="px"
|
||||
showgrid="true"
|
||||
showguides="true"
|
||||
inkscape:zoom="26.808186"
|
||||
inkscape:cx="14.603748"
|
||||
inkscape:cy="18.893483"
|
||||
inkscape:window-width="3840"
|
||||
inkscape:window-height="2054"
|
||||
inkscape:window-x="-11"
|
||||
inkscape:window-y="-11"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="layer1">
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid132"
|
||||
originx="0"
|
||||
originy="0" />
|
||||
<sodipodi:guide
|
||||
position="3.7041666,4.2333333"
|
||||
orientation="0,-1"
|
||||
id="guide742"
|
||||
inkscape:locked="false" />
|
||||
<sodipodi:guide
|
||||
position="4.2333333,4.2333333"
|
||||
orientation="1,0"
|
||||
id="guide744"
|
||||
inkscape:locked="false" />
|
||||
</sodipodi:namedview>
|
||||
<defs
|
||||
id="defs2" />
|
||||
<g
|
||||
inkscape:label="Warstwa 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1">
|
||||
<path
|
||||
style="fill:none;stroke:#ffffff;stroke-width:0.79374999;stroke-linecap:round;stroke-linejoin:round;stroke-opacity:1;stroke-dasharray:none;paint-order:normal;stroke-dashoffset:0"
|
||||
d="M -0.79374999,5.0270833 2.1166666,2.1166666 6.3499999,6.3499999 9.2604166,3.4395833"
|
||||
id="path800" />
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 2.0 KiB |
BIN
libs/tracy/extra/zigzag01.png
Normal file
|
After Width: | Height: | Size: 126 B |
BIN
libs/tracy/extra/zigzag02.png
Normal file
|
After Width: | Height: | Size: 133 B |
BIN
libs/tracy/extra/zigzag04.png
Normal file
|
After Width: | Height: | Size: 151 B |
BIN
libs/tracy/extra/zigzag08.png
Normal file
|
After Width: | Height: | Size: 191 B |
BIN
libs/tracy/extra/zigzag16.png
Normal file
|
After Width: | Height: | Size: 234 B |
BIN
libs/tracy/extra/zigzag32.png
Normal file
|
After Width: | Height: | Size: 263 B |
239
libs/tracy/getopt/getopt.c
Normal file
@ -0,0 +1,239 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2012-2023, Kim Grasman <kim.grasman@gmail.com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Kim Grasman nor the
|
||||
* names of contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL KIM GRASMAN BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "getopt.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
char* optarg;
|
||||
int optopt;
|
||||
/* The variable optind [...] shall be initialized to 1 by the system. */
|
||||
int optind = 1;
|
||||
int opterr;
|
||||
|
||||
static char* optcursor = NULL;
|
||||
|
||||
/* Implemented based on [1] and [2] for optional arguments.
|
||||
optopt is handled FreeBSD-style, per [3].
|
||||
Other GNU and FreeBSD extensions are purely accidental.
|
||||
|
||||
[1] http://pubs.opengroup.org/onlinepubs/000095399/functions/getopt.html
|
||||
[2] http://www.kernel.org/doc/man-pages/online/pages/man3/getopt.3.html
|
||||
[3] http://www.freebsd.org/cgi/man.cgi?query=getopt&sektion=3&manpath=FreeBSD+9.0-RELEASE
|
||||
*/
|
||||
int getopt(int argc, char* const argv[], const char* optstring) {
|
||||
int optchar = -1;
|
||||
const char* optdecl = NULL;
|
||||
|
||||
optarg = NULL;
|
||||
opterr = 0;
|
||||
optopt = 0;
|
||||
|
||||
/* Unspecified, but we need it to avoid overrunning the argv bounds. */
|
||||
if (optind >= argc)
|
||||
goto no_more_optchars;
|
||||
|
||||
/* If, when getopt() is called argv[optind] is a null pointer, getopt()
|
||||
shall return -1 without changing optind. */
|
||||
if (argv[optind] == NULL)
|
||||
goto no_more_optchars;
|
||||
|
||||
/* If, when getopt() is called *argv[optind] is not the character '-',
|
||||
getopt() shall return -1 without changing optind. */
|
||||
if (*argv[optind] != '-')
|
||||
goto no_more_optchars;
|
||||
|
||||
/* If, when getopt() is called argv[optind] points to the string "-",
|
||||
getopt() shall return -1 without changing optind. */
|
||||
if (strcmp(argv[optind], "-") == 0)
|
||||
goto no_more_optchars;
|
||||
|
||||
/* If, when getopt() is called argv[optind] points to the string "--",
|
||||
getopt() shall return -1 after incrementing optind. */
|
||||
if (strcmp(argv[optind], "--") == 0) {
|
||||
++optind;
|
||||
goto no_more_optchars;
|
||||
}
|
||||
|
||||
if (optcursor == NULL || *optcursor == '\0')
|
||||
optcursor = argv[optind] + 1;
|
||||
|
||||
optchar = *optcursor;
|
||||
|
||||
/* FreeBSD: The variable optopt saves the last known option character
|
||||
returned by getopt(). */
|
||||
optopt = optchar;
|
||||
|
||||
/* The getopt() function shall return the next option character (if one is
|
||||
found) from argv that matches a character in optstring, if there is
|
||||
one that matches. */
|
||||
optdecl = strchr(optstring, optchar);
|
||||
if (optdecl) {
|
||||
/* [I]f a character is followed by a colon, the option takes an
|
||||
argument. */
|
||||
if (optdecl[1] == ':') {
|
||||
optarg = ++optcursor;
|
||||
if (*optarg == '\0') {
|
||||
/* GNU extension: Two colons mean an option takes an
|
||||
optional arg; if there is text in the current argv-element
|
||||
(i.e., in the same word as the option name itself, for example,
|
||||
"-oarg"), then it is returned in optarg, otherwise optarg is set
|
||||
to zero. */
|
||||
if (optdecl[2] != ':') {
|
||||
/* If the option was the last character in the string pointed to by
|
||||
an element of argv, then optarg shall contain the next element
|
||||
of argv, and optind shall be incremented by 2. If the resulting
|
||||
value of optind is greater than argc, this indicates a missing
|
||||
option-argument, and getopt() shall return an error indication.
|
||||
|
||||
Otherwise, optarg shall point to the string following the
|
||||
option character in that element of argv, and optind shall be
|
||||
incremented by 1.
|
||||
*/
|
||||
if (++optind < argc) {
|
||||
optarg = argv[optind];
|
||||
} else {
|
||||
/* If it detects a missing option-argument, it shall return the
|
||||
colon character ( ':' ) if the first character of optstring
|
||||
was a colon, or a question-mark character ( '?' ) otherwise.
|
||||
*/
|
||||
optarg = NULL;
|
||||
optchar = (optstring[0] == ':') ? ':' : '?';
|
||||
}
|
||||
} else {
|
||||
optarg = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
optcursor = NULL;
|
||||
}
|
||||
} else {
|
||||
/* If getopt() encounters an option character that is not contained in
|
||||
optstring, it shall return the question-mark ( '?' ) character. */
|
||||
optchar = '?';
|
||||
}
|
||||
|
||||
if (optcursor == NULL || *++optcursor == '\0')
|
||||
++optind;
|
||||
|
||||
return optchar;
|
||||
|
||||
no_more_optchars:
|
||||
optcursor = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Implementation based on [1].
|
||||
|
||||
[1] http://www.kernel.org/doc/man-pages/online/pages/man3/getopt.3.html
|
||||
*/
|
||||
int getopt_long(int argc, char* const argv[], const char* optstring,
|
||||
const struct option* longopts, int* longindex) {
|
||||
const struct option* o = longopts;
|
||||
const struct option* match = NULL;
|
||||
int num_matches = 0;
|
||||
size_t argument_name_length = 0;
|
||||
size_t option_length = 0;
|
||||
const char* current_argument = NULL;
|
||||
int retval = -1;
|
||||
|
||||
optarg = NULL;
|
||||
optopt = 0;
|
||||
|
||||
if (optind >= argc)
|
||||
return -1;
|
||||
|
||||
if (strlen(argv[optind]) < 3 || strncmp(argv[optind], "--", 2) != 0)
|
||||
return getopt(argc, argv, optstring);
|
||||
|
||||
/* It's an option; starts with -- and is longer than two chars. */
|
||||
current_argument = argv[optind] + 2;
|
||||
argument_name_length = strcspn(current_argument, "=");
|
||||
for (; o->name; ++o) {
|
||||
/* Check for exact match first. */
|
||||
option_length = strlen(o->name);
|
||||
if (option_length == argument_name_length &&
|
||||
strncmp(o->name, current_argument, option_length) == 0) {
|
||||
match = o;
|
||||
num_matches = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* If not exact, count the number of abbreviated matches. */
|
||||
if (strncmp(o->name, current_argument, argument_name_length) == 0) {
|
||||
match = o;
|
||||
++num_matches;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_matches == 1) {
|
||||
/* If longindex is not NULL, it points to a variable which is set to the
|
||||
index of the long option relative to longopts. */
|
||||
if (longindex)
|
||||
*longindex = (match - longopts);
|
||||
|
||||
/* If flag is NULL, then getopt_long() shall return val.
|
||||
Otherwise, getopt_long() returns 0, and flag shall point to a variable
|
||||
which shall be set to val if the option is found, but left unchanged if
|
||||
the option is not found. */
|
||||
if (match->flag)
|
||||
*(match->flag) = match->val;
|
||||
|
||||
retval = match->flag ? 0 : match->val;
|
||||
|
||||
if (match->has_arg != no_argument) {
|
||||
optarg = strchr(argv[optind], '=');
|
||||
if (optarg != NULL)
|
||||
++optarg;
|
||||
|
||||
if (match->has_arg == required_argument) {
|
||||
/* Only scan the next argv for required arguments. Behavior is not
|
||||
specified, but has been observed with Ubuntu and Mac OSX. */
|
||||
if (optarg == NULL && ++optind < argc) {
|
||||
optarg = argv[optind];
|
||||
}
|
||||
|
||||
if (optarg == NULL)
|
||||
retval = ':';
|
||||
}
|
||||
} else if (strchr(argv[optind], '=')) {
|
||||
/* An argument was provided to a non-argument option.
|
||||
I haven't seen this specified explicitly, but both GNU and BSD-based
|
||||
implementations show this behavior.
|
||||
*/
|
||||
retval = '?';
|
||||
}
|
||||
} else {
|
||||
/* Unknown option or ambiguous match. */
|
||||
retval = '?';
|
||||
}
|
||||
|
||||
++optind;
|
||||
return retval;
|
||||
}
|
||||
59
libs/tracy/getopt/getopt.h
Normal file
@ -0,0 +1,59 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2012-2023, Kim Grasman <kim.grasman@gmail.com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Kim Grasman nor the
|
||||
* names of contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL KIM GRASMAN BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef INCLUDED_GETOPT_PORT_H
|
||||
#define INCLUDED_GETOPT_PORT_H
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define no_argument 1
|
||||
#define required_argument 2
|
||||
#define optional_argument 3
|
||||
|
||||
extern char* optarg;
|
||||
extern int optind, opterr, optopt;
|
||||
|
||||
struct option {
|
||||
const char* name;
|
||||
int has_arg;
|
||||
int* flag;
|
||||
int val;
|
||||
};
|
||||
|
||||
int getopt(int argc, char* const argv[], const char* optstring);
|
||||
|
||||
int getopt_long(int argc, char* const argv[],
|
||||
const char* optstring, const struct option* longopts, int* longindex);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // INCLUDED_GETOPT_PORT_H
|
||||
12
libs/tracy/icon/application-tracy.copying
Normal file
@ -0,0 +1,12 @@
|
||||
application-tracy.svg (and no other file) is based on icons retrieved from
|
||||
https://gitlab.gnome.org/GNOME/adwaita-icon-theme.git
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
This work is licenced under the Creative Commons Attribution-Share Alike 3.0
|
||||
United States License. To view a copy of this licence, visit
|
||||
http://creativecommons.org/licenses/by-sa/3.0/ or send a letter to Creative
|
||||
Commons, 171 Second Street, Suite 300, San Francisco, California 94105, USA.
|
||||
|
||||
When attributing the artwork, using "GNOME Project" is enough.
|
||||
Please link to http://www.gnome.org where available.
|
||||
35
libs/tracy/icon/application-tracy.svg
Normal file
@ -0,0 +1,35 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
<svg id="svg2331" width="135.47mm" height="135.47mm" version="1.1" viewBox="0 0 135.47 135.47" xmlns="http://www.w3.org/2000/svg">
|
||||
<defs id="defs2328">
|
||||
<filter id="filter16020" x="-.013633" y="-.010717" width="1.0273" height="1.0214" color-interpolation-filters="sRGB">
|
||||
<feGaussianBlur id="feGaussianBlur16022" stdDeviation="2.0004419"/>
|
||||
</filter>
|
||||
</defs>
|
||||
<g id="layer1" transform="translate(-15.03 -32.374)">
|
||||
<g id="use2061" transform="matrix(1.0583 0 0 1.0583 15.03 -2340.4)" enable-background="new" stroke-width=".25">
|
||||
<title id="title2018">text-x-preview</title>
|
||||
<g id="g2068">
|
||||
<g id="g2030" transform="matrix(.25 0 0 .25 0 2295)" stroke-width=".25">
|
||||
<g id="g2026" stroke-width=".25">
|
||||
<path id="path2020" d="m111.83-178.55h200l120.17 119.67 5e-3 296.24c3.1e-4 17.728-14.449 32.088-32.177 32.088h-288c-17.728 0-32-14.272-32-32v-384c0-17.728 14.272-32 32-32z" enable-background="new" filter="url(#filter16020)" opacity=".4"/>
|
||||
<path id="path2022" d="m111.83-179.67h200l120.17 119.67 5e-3 296.24c3.1e-4 17.728-14.449 32.088-32.177 32.088h-288c-17.728 0-32-14.272-32-32v-384c0-17.728 14.272-32 32-32z" enable-background="new" fill="#f6f5f4"/>
|
||||
<path id="path2024" d="m311.83-179.67v88c0 17.728 14.442 31.694 32.17 31.694l88.001-0.022223z" enable-background="new" fill="#deddda"/>
|
||||
</g>
|
||||
</g>
|
||||
<rect id="rect2032" x="-1.5e-6" y="2242" width="128" height="128" enable-background="new" fill="none" style="paint-order:normal"/>
|
||||
</g>
|
||||
</g>
|
||||
<g id="g1223" transform="matrix(.17198 0 0 .17198 -155.61 392.08)" enable-background="new">
|
||||
<rect id="rect3988" x="1218.9" y="-1801.1" width="340.16" height="340.16" rx="2" ry="5.3723e-6" fill="#d6e4ff" stroke="#000" stroke-width="15.118" style="paint-order:normal"/>
|
||||
<g id="g4861" transform="matrix(3.7795 0 0 3.7795 1200 -2572.1)" fill="#006ed6" stroke="#004586">
|
||||
<rect id="rect4803" x="15" y="217" width="70" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
<rect id="rect4803-8" x="38" y="228" width="24" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
<rect id="rect4803-8-0" x="39" y="239" width="22" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
<rect id="rect4803-8-7" x="40" y="250" width="20" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
<rect id="rect4803-8-77" x="41" y="261" width="18" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
<rect id="rect4803-8-1" x="42" y="272" width="16" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 2.7 KiB |
BIN
libs/tracy/icon/icon.ico
Normal file
|
After Width: | Height: | Size: 4.7 KiB |
BIN
libs/tracy/icon/icon.pdf
Normal file
BIN
libs/tracy/icon/icon.png
Normal file
|
After Width: | Height: | Size: 854 B |
24
libs/tracy/icon/icon.svg
Normal file
@ -0,0 +1,24 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
<svg id="svg8" width="100mm" height="100mm" version="1.1" viewBox="0 0 100 100" xmlns="http://www.w3.org/2000/svg" xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
||||
<metadata id="metadata5">
|
||||
<rdf:RDF>
|
||||
<cc:Work rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
|
||||
<dc:title/>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g id="layer1" transform="translate(0,-197)">
|
||||
<rect id="rect3988" x="5" y="202" width="90" height="90" rx=".52917" ry="1.4214e-6" fill="#d6e4ff" stroke="#000" stroke-width="4" style="paint-order:normal"/>
|
||||
<g id="g4861" transform="translate(-6e-7 -2)" fill="#006ed6" stroke="#004586">
|
||||
<rect id="rect4803" x="15" y="217" width="70" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
<rect id="rect4803-8" x="38" y="228" width="24" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
<rect id="rect4803-8-0" x="39" y="239" width="22" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
<rect id="rect4803-8-7" x="40" y="250" width="20" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
<rect id="rect4803-8-77" x="41" y="261" width="18" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
<rect id="rect4803-8-1" x="42" y="272" width="16" height="10" rx=".52917" ry="1.4214e-6" style="paint-order:normal"/>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.6 KiB |
21
libs/tracy/imgui/LICENSE.txt
Normal file
@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014-2024 Omar Cornut
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
135
libs/tracy/imgui/imconfig.h
Normal file
@ -0,0 +1,135 @@
|
||||
//-----------------------------------------------------------------------------
|
||||
// DEAR IMGUI COMPILE-TIME OPTIONS
|
||||
// Runtime options (clipboard callbacks, enabling various features, etc.) can generally be set via the ImGuiIO structure.
|
||||
// You can use ImGui::SetAllocatorFunctions() before calling ImGui::CreateContext() to rewire memory allocation functions.
|
||||
//-----------------------------------------------------------------------------
|
||||
// A) You may edit imconfig.h (and not overwrite it when updating Dear ImGui, or maintain a patch/rebased branch with your modifications to it)
|
||||
// B) or '#define IMGUI_USER_CONFIG "my_imgui_config.h"' in your project and then add directives in your own file without touching this template.
|
||||
//-----------------------------------------------------------------------------
|
||||
// You need to make sure that configuration settings are defined consistently _everywhere_ Dear ImGui is used, which include the imgui*.cpp
|
||||
// files but also _any_ of your code that uses Dear ImGui. This is because some compile-time options have an affect on data structures.
|
||||
// Defining those options in imconfig.h will ensure every compilation unit gets to see the same data structure layouts.
|
||||
// Call IMGUI_CHECKVERSION() from your .cpp file to verify that the data structures your files are using are matching the ones imgui.cpp is using.
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#pragma once
|
||||
|
||||
//---- Define assertion handler. Defaults to calling assert().
|
||||
// If your macro uses multiple statements, make sure is enclosed in a 'do { .. } while (0)' block so it can be used as a single statement.
|
||||
//#define IM_ASSERT(_EXPR) MyAssert(_EXPR)
|
||||
//#define IM_ASSERT(_EXPR) ((void)(_EXPR)) // Disable asserts
|
||||
|
||||
//---- Define attributes of all API symbols declarations, e.g. for DLL under Windows
|
||||
// Using Dear ImGui via a shared library is not recommended, because of function call overhead and because we don't guarantee backward nor forward ABI compatibility.
|
||||
// DLL users: heaps and globals are not shared across DLL boundaries! You will need to call SetCurrentContext() + SetAllocatorFunctions()
|
||||
// for each static/DLL boundary you are calling from. Read "Context and Memory Allocators" section of imgui.cpp for more details.
|
||||
//#define IMGUI_API __declspec( dllexport )
|
||||
//#define IMGUI_API __declspec( dllimport )
|
||||
|
||||
//---- Don't define obsolete functions/enums/behaviors. Consider enabling from time to time after updating to clean your code of obsolete function/names.
|
||||
//#define IMGUI_DISABLE_OBSOLETE_FUNCTIONS
|
||||
//#define IMGUI_DISABLE_OBSOLETE_KEYIO // 1.87+ disable legacy io.KeyMap[]+io.KeysDown[] in favor io.AddKeyEvent(). This is automatically done by IMGUI_DISABLE_OBSOLETE_FUNCTIONS.
|
||||
|
||||
//---- Disable all of Dear ImGui or don't implement standard windows/tools.
|
||||
// It is very strongly recommended to NOT disable the demo windows and debug tool during development. They are extremely useful in day to day work. Please read comments in imgui_demo.cpp.
|
||||
//#define IMGUI_DISABLE // Disable everything: all headers and source files will be empty.
|
||||
//#define IMGUI_DISABLE_DEMO_WINDOWS // Disable demo windows: ShowDemoWindow()/ShowStyleEditor() will be empty.
|
||||
//#define IMGUI_DISABLE_DEBUG_TOOLS // Disable metrics/debugger and other debug tools: ShowMetricsWindow(), ShowDebugLogWindow() and ShowIDStackToolWindow() will be empty.
|
||||
|
||||
//---- Don't implement some functions to reduce linkage requirements.
|
||||
//#define IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS // [Win32] Don't implement default clipboard handler. Won't use and link with OpenClipboard/GetClipboardData/CloseClipboard etc. (user32.lib/.a, kernel32.lib/.a)
|
||||
//#define IMGUI_ENABLE_WIN32_DEFAULT_IME_FUNCTIONS // [Win32] [Default with Visual Studio] Implement default IME handler (require imm32.lib/.a, auto-link for Visual Studio, -limm32 on command-line for MinGW)
|
||||
//#define IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS // [Win32] [Default with non-Visual Studio compilers] Don't implement default IME handler (won't require imm32.lib/.a)
|
||||
//#define IMGUI_DISABLE_WIN32_FUNCTIONS // [Win32] Won't use and link with any Win32 function (clipboard, IME).
|
||||
//#define IMGUI_ENABLE_OSX_DEFAULT_CLIPBOARD_FUNCTIONS // [OSX] Implement default OSX clipboard handler (need to link with '-framework ApplicationServices', this is why this is not the default).
|
||||
//#define IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS // Don't implement ImFormatString/ImFormatStringV so you can implement them yourself (e.g. if you don't want to link with vsnprintf)
|
||||
//#define IMGUI_DISABLE_DEFAULT_MATH_FUNCTIONS // Don't implement ImFabs/ImSqrt/ImPow/ImFmod/ImCos/ImSin/ImAcos/ImAtan2 so you can implement them yourself.
|
||||
//#define IMGUI_DISABLE_FILE_FUNCTIONS // Don't implement ImFileOpen/ImFileClose/ImFileRead/ImFileWrite and ImFileHandle at all (replace them with dummies)
|
||||
//#define IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS // Don't implement ImFileOpen/ImFileClose/ImFileRead/ImFileWrite and ImFileHandle so you can implement them yourself if you don't want to link with fopen/fclose/fread/fwrite. This will also disable the LogToTTY() function.
|
||||
//#define IMGUI_DISABLE_DEFAULT_ALLOCATORS // Don't implement default allocators calling malloc()/free() to avoid linking with them. You will need to call ImGui::SetAllocatorFunctions().
|
||||
//#define IMGUI_DISABLE_SSE // Disable use of SSE intrinsics even if available
|
||||
|
||||
//---- Include imgui_user.h at the end of imgui.h as a convenience
|
||||
// May be convenient for some users to only explicitly include vanilla imgui.h and have extra stuff included.
|
||||
//#define IMGUI_INCLUDE_IMGUI_USER_H
|
||||
//#define IMGUI_USER_H_FILENAME "my_folder/my_imgui_user.h"
|
||||
|
||||
//---- Pack colors to BGRA8 instead of RGBA8 (to avoid converting from one to another)
|
||||
//#define IMGUI_USE_BGRA_PACKED_COLOR
|
||||
|
||||
//---- Use 32-bit for ImWchar (default is 16-bit) to support Unicode planes 1-16. (e.g. point beyond 0xFFFF like emoticons, dingbats, symbols, shapes, ancient languages, etc...)
|
||||
//#define IMGUI_USE_WCHAR32
|
||||
|
||||
//---- Avoid multiple STB libraries implementations, or redefine path/filenames to prioritize another version
|
||||
// By default the embedded implementations are declared static and not available outside of Dear ImGui sources files.
|
||||
//#define IMGUI_STB_TRUETYPE_FILENAME "my_folder/stb_truetype.h"
|
||||
//#define IMGUI_STB_RECT_PACK_FILENAME "my_folder/stb_rect_pack.h"
|
||||
//#define IMGUI_STB_SPRINTF_FILENAME "my_folder/stb_sprintf.h" // only used if IMGUI_USE_STB_SPRINTF is defined.
|
||||
//#define IMGUI_DISABLE_STB_TRUETYPE_IMPLEMENTATION
|
||||
//#define IMGUI_DISABLE_STB_RECT_PACK_IMPLEMENTATION
|
||||
//#define IMGUI_DISABLE_STB_SPRINTF_IMPLEMENTATION // only disabled if IMGUI_USE_STB_SPRINTF is defined.
|
||||
|
||||
//---- Use stb_sprintf.h for a faster implementation of vsnprintf instead of the one from libc (unless IMGUI_DISABLE_DEFAULT_FORMAT_FUNCTIONS is defined)
|
||||
// Compatibility checks of arguments and formats done by clang and GCC will be disabled in order to support the extra formats provided by stb_sprintf.h.
|
||||
//#define IMGUI_USE_STB_SPRINTF
|
||||
|
||||
//---- Use FreeType to build and rasterize the font atlas (instead of stb_truetype which is embedded by default in Dear ImGui)
|
||||
// Requires FreeType headers to be available in the include path. Requires program to be compiled with 'misc/freetype/imgui_freetype.cpp' (in this repository) + the FreeType library (not provided).
|
||||
// On Windows you may use vcpkg with 'vcpkg install freetype --triplet=x64-windows' + 'vcpkg integrate install'.
|
||||
//#define IMGUI_ENABLE_FREETYPE
|
||||
|
||||
//---- Use FreeType+lunasvg library to render OpenType SVG fonts (SVGinOT)
|
||||
// Requires lunasvg headers to be available in the include path + program to be linked with the lunasvg library (not provided).
|
||||
// Only works in combination with IMGUI_ENABLE_FREETYPE.
|
||||
// (implementation is based on Freetype's rsvg-port.c which is licensed under CeCILL-C Free Software License Agreement)
|
||||
//#define IMGUI_ENABLE_FREETYPE_LUNASVG
|
||||
|
||||
//---- Use stb_truetype to build and rasterize the font atlas (default)
|
||||
// The only purpose of this define is if you want force compilation of the stb_truetype backend ALONG with the FreeType backend.
|
||||
//#define IMGUI_ENABLE_STB_TRUETYPE
|
||||
|
||||
//---- Define constructor and implicit cast operators to convert back<>forth between your math types and ImVec2/ImVec4.
|
||||
// This will be inlined as part of ImVec2 and ImVec4 class declarations.
|
||||
/*
|
||||
#define IM_VEC2_CLASS_EXTRA \
|
||||
constexpr ImVec2(const MyVec2& f) : x(f.x), y(f.y) {} \
|
||||
operator MyVec2() const { return MyVec2(x,y); }
|
||||
|
||||
#define IM_VEC4_CLASS_EXTRA \
|
||||
constexpr ImVec4(const MyVec4& f) : x(f.x), y(f.y), z(f.z), w(f.w) {} \
|
||||
operator MyVec4() const { return MyVec4(x,y,z,w); }
|
||||
*/
|
||||
//---- ...Or use Dear ImGui's own very basic math operators.
|
||||
//#define IMGUI_DEFINE_MATH_OPERATORS
|
||||
|
||||
//---- Use 32-bit vertex indices (default is 16-bit) is one way to allow large meshes with more than 64K vertices.
|
||||
// Your renderer backend will need to support it (most example renderer backends support both 16/32-bit indices).
|
||||
// Another way to allow large meshes while keeping 16-bit indices is to handle ImDrawCmd::VtxOffset in your renderer.
|
||||
// Read about ImGuiBackendFlags_RendererHasVtxOffset for details.
|
||||
//#define ImDrawIdx unsigned int
|
||||
|
||||
#ifdef __EMSCRIPTEN__
|
||||
#define ImDrawIdx unsigned int
|
||||
#endif
|
||||
|
||||
//---- Override ImDrawCallback signature (will need to modify renderer backends accordingly)
|
||||
//struct ImDrawList;
|
||||
//struct ImDrawCmd;
|
||||
//typedef void (*MyImDrawCallback)(const ImDrawList* draw_list, const ImDrawCmd* cmd, void* my_renderer_user_data);
|
||||
//#define ImDrawCallback MyImDrawCallback
|
||||
|
||||
//---- Debug Tools: Macro to break in Debugger (we provide a default implementation of this in the codebase)
|
||||
// (use 'Metrics->Tools->Item Picker' to pick widgets with the mouse and break into them for easy debugging.)
|
||||
//#define IM_DEBUG_BREAK IM_ASSERT(0)
|
||||
//#define IM_DEBUG_BREAK __debugbreak()
|
||||
|
||||
//---- Debug Tools: Enable slower asserts
|
||||
//#define IMGUI_DEBUG_PARANOID
|
||||
|
||||
//---- Tip: You can add extra functions within the ImGui:: namespace from anywhere (e.g. your own sources/header files)
|
||||
/*
|
||||
namespace ImGui
|
||||
{
|
||||
void MyFunction(const char* name, MyMatrix44* mtx);
|
||||
}
|
||||
*/
|
||||