update tracy from 11.0 to 13.1 and fix build with tracy enabled

This commit is contained in:
Sven Balzer
2026-05-01 18:24:04 +02:00
parent 7fa5294e02
commit 2adf75973a
304 changed files with 20579 additions and 170182 deletions
File diff suppressed because it is too large Load Diff
+2 -18
View File
@@ -32,27 +32,11 @@
#include "client/TracyOverride.cpp"
#include "client/TracyKCore.cpp"
#if defined(TRACY_HAS_CALLSTACK)
# if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
# include "libbacktrace/alloc.cpp"
# include "libbacktrace/dwarf.cpp"
# include "libbacktrace/fileline.cpp"
# include "libbacktrace/mmapio.cpp"
# include "libbacktrace/posix.cpp"
# include "libbacktrace/sort.cpp"
# include "libbacktrace/state.cpp"
# if TRACY_HAS_CALLSTACK == 4
# include "libbacktrace/macho.cpp"
# else
# include "libbacktrace/elf.cpp"
# endif
# include "common/TracyStackFrames.cpp"
# endif
#ifdef TRACY_ROCPROF
# include "client/TracyRocprof.cpp"
#endif
#ifdef _MSC_VER
# pragma comment(lib, "ws2_32.lib")
# pragma comment(lib, "dbghelp.lib")
# pragma comment(lib, "advapi32.lib")
# pragma comment(lib, "user32.lib")
# pragma warning(pop)
+272 -206
View File
@@ -24,15 +24,33 @@
# pragma warning( disable : 4091 )
# endif
# include <dbghelp.h>
# pragma comment( lib, "dbghelp.lib" )
# ifdef _MSC_VER
# pragma warning( pop )
# endif
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
#elif defined(TRACY_USE_LIBBACKTRACE)
# include "../libbacktrace/backtrace.hpp"
# include <algorithm>
# include <dlfcn.h>
# include <cxxabi.h>
# include <stdlib.h>
// Implementation files
# include "../libbacktrace/alloc.cpp"
# include "../libbacktrace/dwarf.cpp"
# include "../libbacktrace/fileline.cpp"
# include "../libbacktrace/mmapio.cpp"
# include "../libbacktrace/posix.cpp"
# include "../libbacktrace/sort.cpp"
# include "../libbacktrace/state.cpp"
# if TRACY_HAS_CALLSTACK == 4
# include "../libbacktrace/macho.cpp"
# else
# include "../libbacktrace/elf.cpp"
# endif
# include "../common/TracyStackFrames.cpp"
#elif TRACY_HAS_CALLSTACK == 5
# include <dlfcn.h>
# include <cxxabi.h>
@@ -53,7 +71,7 @@ extern "C"
};
#endif
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 5 || TRACY_HAS_CALLSTACK == 6
#if defined(TRACY_USE_LIBBACKTRACE) || TRACY_HAS_CALLSTACK == 5
// If you want to use your own demangling functionality (e.g. for another language),
// define TRACY_DEMANGLE and provide your own implementation of the __tracy_demangle
// function. The input parameter is a function name. The demangle function must
@@ -91,94 +109,147 @@ extern "C" const char* ___tracy_demangle( const char* mangled )
#endif
#endif
#if TRACY_HAS_CALLSTACK == 3
# define TRACY_USE_IMAGE_CACHE
#if defined(TRACY_USE_LIBBACKTRACE) && TRACY_HAS_CALLSTACK != 4 // dl_iterate_phdr is required for the current image cache. Need to move it to libbacktrace?
# define TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
# include <link.h>
#endif
namespace tracy
{
#ifdef TRACY_USE_IMAGE_CACHE
// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths
// so we can quickly determine which image an address falls into.
// We refresh this cache only when we hit an address that doesn't fall into any known range.
static bool IsKernelAddress(uint64_t addr) {
return (addr >> 63) != 0;
}
void DestroyImageEntry( ImageEntry& entry )
{
tracy_free( entry.m_path );
tracy_free( entry.m_name );
}
class ImageCache
{
public:
struct ImageEntry
ImageCache( size_t imageCacheCapacity = 512 )
: m_images( imageCacheCapacity )
{
void* m_startAddress = nullptr;
void* m_endAddress = nullptr;
char* m_name = nullptr;
};
ImageCache()
: m_images( 512 )
{
Refresh();
}
~ImageCache()
{
Clear();
}
const ImageEntry* GetImageForAddress( void* address )
ImageEntry* AddEntry( const ImageEntry& entry )
{
const ImageEntry* entry = GetImageForAddressImpl( address );
if( m_sorted ) m_sorted = m_images.empty() || ( entry.m_startAddress < m_images.back().m_startAddress );
ImageEntry* newEntry = m_images.push_next();
*newEntry = entry;
return newEntry;
}
const ImageEntry* GetImageForAddress( uint64_t address )
{
Sort();
auto it = std::lower_bound( m_images.begin(), m_images.end(), address,
[]( const ImageEntry& lhs, const uint64_t rhs ) { return lhs.m_startAddress > rhs; } );
if( it != m_images.end() && address < it->m_endAddress )
{
return it;
}
return nullptr;
}
void Sort()
{
if( m_sorted ) return;
std::sort( m_images.begin(), m_images.end(),
[]( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } );
m_sorted = true;
}
void Clear()
{
for( ImageEntry& entry : m_images )
{
DestroyImageEntry( entry );
}
m_sorted = true;
m_images.clear();
}
bool ContainsImage( uint64_t startAddress ) const
{
return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } );
}
protected:
tracy::FastVector<ImageEntry> m_images;
bool m_sorted = true;
};
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths
// so we can quickly determine which image an address falls into.
// We refresh this cache only when we hit an address that doesn't fall into any known range.
class ImageCacheDlIteratePhdr : public ImageCache
{
public:
ImageCacheDlIteratePhdr()
{
Refresh();
}
~ImageCacheDlIteratePhdr()
{
}
const ImageEntry* GetImageForAddress( uint64_t address )
{
const ImageEntry* entry = ImageCache::GetImageForAddress( address );
if( !entry )
{
Refresh();
return GetImageForAddressImpl( address );
return ImageCache::GetImageForAddress( address );
}
return entry;
}
private:
tracy::FastVector<ImageEntry> m_images;
bool m_updated = false;
bool m_haveMainImageName = false;
static int Callback( struct dl_phdr_info* info, size_t size, void* data )
{
ImageCache* cache = reinterpret_cast<ImageCache*>( data );
ImageCacheDlIteratePhdr* cache = reinterpret_cast<ImageCacheDlIteratePhdr*>( data );
const auto startAddress = reinterpret_cast<void*>( info->dlpi_addr );
if( cache->Contains( startAddress ) ) return 0;
const auto startAddress = static_cast<uint64_t>( info->dlpi_addr );
if( cache->ContainsImage( startAddress ) ) return 0;
const uint32_t headerCount = info->dlpi_phnum;
assert( headerCount > 0);
const auto endAddress = reinterpret_cast<void*>( info->dlpi_addr +
const auto endAddress = static_cast<uint64_t>( info->dlpi_addr +
info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz);
ImageEntry* image = cache->m_images.push_next();
image->m_startAddress = startAddress;
image->m_endAddress = endAddress;
ImageEntry image{};
image.m_startAddress = startAddress;
image.m_endAddress = endAddress;
// the base executable name isn't provided when iterating with dl_iterate_phdr,
// we will have to patch the executable image name outside this callback
if( info->dlpi_name && info->dlpi_name[0] != '\0' )
{
size_t sz = strlen( info->dlpi_name ) + 1;
image->m_name = (char*)tracy_malloc( sz );
memcpy( image->m_name, info->dlpi_name, sz );
}
else
{
image->m_name = nullptr;
}
image.m_name = info->dlpi_name && info->dlpi_name[0] != '\0' ? CopyStringFast( info->dlpi_name ) : nullptr;
cache->AddEntry( image );
cache->m_updated = true;
return 0;
}
bool Contains( void* startAddress ) const
{
return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } );
}
void Refresh()
{
m_updated = false;
@@ -186,9 +257,7 @@ private:
if( m_updated )
{
std::sort( m_images.begin(), m_images.end(),
[]( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } );
Sort();
// patch the main executable image name here, as calling dl_* functions inside the dl_iterate_phdr callback might cause deadlocks
UpdateMainImageName();
}
@@ -223,31 +292,45 @@ private:
m_haveMainImageName = true;
}
const ImageEntry* GetImageForAddressImpl( void* address ) const
{
auto it = std::lower_bound( m_images.begin(), m_images.end(), address,
[]( const ImageEntry& lhs, const void* rhs ) { return lhs.m_startAddress > rhs; } );
if( it != m_images.end() && address < it->m_endAddress )
{
return it;
}
return nullptr;
}
void Clear()
{
for( ImageEntry& entry : m_images )
{
tracy_free( entry.m_name );
}
m_images.clear();
ImageCache::Clear();
m_haveMainImageName = false;
}
};
#endif //#ifdef TRACY_USE_IMAGE_CACHE
using UserlandImageCache = ImageCacheDlIteratePhdr;
#else
using UserlandImageCache = ImageCache;
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
static UserlandImageCache* s_imageCache;
static ImageCache* s_krnlCache;
void CreateImageCaches()
{
assert( s_imageCache == nullptr && s_krnlCache == nullptr );
s_imageCache = new ( tracy_malloc( sizeof( UserlandImageCache ) ) ) UserlandImageCache();
s_krnlCache = new ( tracy_malloc( sizeof( ImageCache ) ) ) ImageCache();
}
void DestroyImageCaches()
{
if( s_krnlCache != nullptr )
{
s_krnlCache->~ImageCache();
tracy_free( s_krnlCache );
s_krnlCache = nullptr;
}
if( s_imageCache != nullptr )
{
s_imageCache->~UserlandImageCache();
tracy_free( s_imageCache );
s_imageCache = nullptr;
}
}
// when "TRACY_SYMBOL_OFFLINE_RESOLVE" is set, instead of fully resolving symbols at runtime,
// simply resolve the offset and image name (which will be enough the resolving to be done offline)
@@ -282,32 +365,17 @@ extern "C"
t_SymFromInlineContext _SymFromInlineContext = 0;
t_SymGetLineFromInlineContext _SymGetLineFromInlineContext = 0;
TRACY_API ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain = 0;
typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChainPtr = nullptr;
TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void** callers, unsigned long count, unsigned long flags)
{
return ___tracy_RtlWalkFrameChainPtr(callers, count, flags);
}
}
struct ModuleCache
{
uint64_t start;
uint64_t end;
char* name;
};
static FastVector<ModuleCache>* s_modCache;
struct KernelDriver
{
uint64_t addr;
const char* mod;
const char* path;
};
KernelDriver* s_krnlCache = nullptr;
size_t s_krnlCacheCnt;
void InitCallstackCritical()
{
___tracy_RtlWalkFrameChain = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" );
___tracy_RtlWalkFrameChainPtr = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" );
}
void DbgHelpInit()
@@ -338,75 +406,57 @@ DWORD64 DbgHelpLoadSymbolsForModule( const char* imageName, uint64_t baseOfDll,
return SymLoadModuleEx( GetCurrentProcess(), nullptr, imageName, nullptr, baseOfDll, bllSize, nullptr, 0 );
}
ModuleCache* LoadSymbolsForModuleAndCache( const char* imageName, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
char* FormatImageName( const char* imageName, uint32_t imageNameLength )
{
DbgHelpLoadSymbolsForModule( imageName, baseOfDll, dllSize );
ModuleCache* cachedModule = s_modCache->push_next();
cachedModule->start = baseOfDll;
cachedModule->end = baseOfDll + dllSize;
// when doing offline symbol resolution, we must store the full path of the dll for the resolving to work
if( s_shouldResolveSymbolsOffline )
{
cachedModule->name = (char*)tracy_malloc_fast(imageNameLength + 1);
memcpy(cachedModule->name, imageName, imageNameLength);
cachedModule->name[imageNameLength] = '\0';
return CopyStringFast( imageName, imageNameLength );
}
else
{
auto ptr = imageName + imageNameLength;
while (ptr > imageName && *ptr != '\\' && *ptr != '/') ptr--;
if (ptr > imageName) ptr++;
const char* ptr = imageName + imageNameLength;
while( ptr > imageName && *ptr != '\\' && *ptr != '/' ) ptr--;
if( ptr > imageName ) ptr++;
const auto namelen = imageName + imageNameLength - ptr;
cachedModule->name = (char*)tracy_malloc_fast(namelen + 3);
cachedModule->name[0] = '[';
memcpy(cachedModule->name + 1, ptr, namelen);
cachedModule->name[namelen + 1] = ']';
cachedModule->name[namelen + 2] = '\0';
}
return cachedModule;
char* alloc = (char*)tracy_malloc_fast( namelen + 3 );
alloc[0] = '[';
memcpy( alloc + 1, ptr, namelen );
alloc[namelen + 1] = ']';
alloc[namelen + 2] = '\0';
return alloc;
}
}
void InitCallstack()
ImageEntry* CacheModuleInfo( const char* imagePath, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
{
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
if( s_shouldResolveSymbolsOffline )
{
TracyDebug("TRACY: enabling offline symbol resolving!\n");
}
ImageEntry moduleEntry = {};
moduleEntry.m_startAddress = baseOfDll;
moduleEntry.m_endAddress = baseOfDll + dllSize;
moduleEntry.m_path = CopyStringFast( imagePath, imageNameLength );
moduleEntry.m_name = FormatImageName( imagePath, imageNameLength );
DbgHelpInit();
return s_imageCache->AddEntry( moduleEntry );
}
#ifdef TRACY_DBGHELP_LOCK
DBGHELP_LOCK;
#endif
// use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver
// and process module symbol loading at startup time - they will be loaded on demand later
// Sometimes this process can take a very long time and prevent resolving callstack frames
// symbols during that time.
const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" );
const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' );
if ( !initTimeModuleLoad )
{
TracyDebug("TRACY: skipping init time dbghelper module load\n");
}
ImageEntry* LoadSymbolsForModuleAndCache( const char* imagePath, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
{
DbgHelpLoadSymbolsForModule( imagePath, baseOfDll, dllSize );
return CacheModuleInfo( imagePath, imageNameLength, baseOfDll, dllSize );
}
static void CacheProcessDrivers()
{
DWORD needed;
LPVOID dev[4096];
if( initTimeModuleLoad && EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 )
if( EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 )
{
char windir[MAX_PATH];
if( !GetWindowsDirectoryA( windir, sizeof( windir ) ) ) memcpy( windir, "c:\\windows", 11 );
const auto windirlen = strlen( windir );
const auto sz = needed / sizeof( LPVOID );
s_krnlCache = (KernelDriver*)tracy_malloc( sizeof(KernelDriver) * sz );
int cnt = 0;
for( size_t i=0; i<sz; i++ )
{
char fn[MAX_PATH];
@@ -417,7 +467,12 @@ void InitCallstack()
buf[0] = '<';
memcpy( buf+1, fn, len );
memcpy( buf+len+1, ">", 2 );
s_krnlCache[cnt] = KernelDriver { (uint64_t)dev[i], buf };
ImageEntry kernelDriver{};
kernelDriver.m_startAddress = (uint64_t)dev[i];
kernelDriver.m_endAddress = 0;
kernelDriver.m_name = buf;
kernelDriver.m_path = nullptr;
const auto len = GetDeviceDriverFileNameA( dev[i], fn, sizeof( fn ) );
if( len != 0 )
@@ -433,27 +488,23 @@ void InitCallstack()
}
DbgHelpLoadSymbolsForModule( path, (DWORD64)dev[i], 0 );
const auto psz = strlen( path );
auto pptr = (char*)tracy_malloc_fast( psz+1 );
memcpy( pptr, path, psz );
pptr[psz] = '\0';
s_krnlCache[cnt].path = pptr;
kernelDriver.m_path = CopyString( path );
}
cnt++;
s_krnlCache->AddEntry(kernelDriver);
}
}
s_krnlCacheCnt = cnt;
std::sort( s_krnlCache, s_krnlCache + s_krnlCacheCnt, []( const KernelDriver& lhs, const KernelDriver& rhs ) { return lhs.addr > rhs.addr; } );
s_krnlCache->Sort();
}
}
s_modCache = (FastVector<ModuleCache>*)tracy_malloc( sizeof( FastVector<ModuleCache> ) );
new(s_modCache) FastVector<ModuleCache>( 512 );
static void CacheProcessModules()
{
DWORD needed;
HANDLE proc = GetCurrentProcess();
HMODULE mod[1024];
if( initTimeModuleLoad && EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
{
const auto sz = needed / sizeof( HMODULE );
for( size_t i=0; i<sz; i++ )
@@ -472,6 +523,41 @@ void InitCallstack()
}
}
}
}
void InitCallstack()
{
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
if( s_shouldResolveSymbolsOffline )
{
TracyDebug("TRACY: enabling offline symbol resolving!\n");
}
CreateImageCaches();
DbgHelpInit();
#ifdef TRACY_DBGHELP_LOCK
DBGHELP_LOCK;
#endif
// use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver
// and process module symbol loading at startup time - they will be loaded on demand later
// Sometimes this process can take a very long time and prevent resolving callstack frames
// symbols during that time.
const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" );
const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' );
if ( !initTimeModuleLoad )
{
TracyDebug("TRACY: skipping init time dbghelper module load\n");
}
else
{
CacheProcessDrivers();
CacheProcessModules();
}
#ifdef TRACY_DBGHELP_LOCK
DBGHELP_UNLOCK;
@@ -480,6 +566,7 @@ void InitCallstack()
void EndCallstack()
{
DestroyImageCaches();
}
const char* DecodeCallstackPtrFast( uint64_t ptr )
@@ -514,11 +601,11 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
const char* GetKernelModulePath( uint64_t addr )
{
assert( addr >> 63 != 0 );
assert( IsKernelAddress( addr ) );
if( !s_krnlCache ) return nullptr;
auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } );
if( it == s_krnlCache + s_krnlCacheCnt ) return nullptr;
return it->path;
const ImageEntry* imageEntry = s_krnlCache->GetImageForAddress( addr );
if( imageEntry ) return imageEntry->m_path;
return nullptr;
}
struct ModuleNameAndBaseAddress
@@ -529,51 +616,38 @@ struct ModuleNameAndBaseAddress
ModuleNameAndBaseAddress GetModuleNameAndPrepareSymbols( uint64_t addr )
{
if( ( addr >> 63 ) != 0 )
if( IsKernelAddress( addr ) )
{
if( s_krnlCache )
{
auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } );
if( it != s_krnlCache + s_krnlCacheCnt )
{
return ModuleNameAndBaseAddress{ it->mod, it->addr };
}
}
const ImageEntry* entry = s_krnlCache->GetImageForAddress( addr );
if( entry != nullptr ) return ModuleNameAndBaseAddress{ entry->m_name, entry->m_startAddress };
return ModuleNameAndBaseAddress{ "<kernel>", addr };
}
for( auto& v : *s_modCache )
{
if( addr >= v.start && addr < v.end )
{
return ModuleNameAndBaseAddress{ v.name, v.start };
}
}
const ImageEntry* entry = s_imageCache->GetImageForAddress( addr );
if( entry != nullptr ) return ModuleNameAndBaseAddress{ entry->m_name, entry->m_startAddress };
HMODULE mod[1024];
DWORD needed;
HANDLE proc = GetCurrentProcess();
// Do not use FreeLibrary because we set the flag GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT
// see https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-getmodulehandleexa to get more information
constexpr DWORD flag = GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT;
HMODULE mod = NULL;
InitRpmalloc();
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
if( GetModuleHandleExA( flag, (char*)addr, &mod ) != 0 )
{
const auto sz = needed / sizeof( HMODULE );
for( size_t i=0; i<sz; i++ )
MODULEINFO info;
if( GetModuleInformation( proc, mod, &info, sizeof( info ) ) != 0 )
{
MODULEINFO info;
if( GetModuleInformation( proc, mod[i], &info, sizeof( info ) ) != 0 )
const auto base = uint64_t( info.lpBaseOfDll );
if( addr >= base && addr < ( base + info.SizeOfImage ) )
{
const auto base = uint64_t( info.lpBaseOfDll );
if( addr >= base && addr < base + info.SizeOfImage )
char name[1024];
const auto nameLength = GetModuleFileNameA( mod, name, sizeof( name ) );
if( nameLength > 0 )
{
char name[1024];
const auto nameLength = GetModuleFileNameA( mod[i], name, 1021 );
if( nameLength > 0 )
{
// since this is the first time we encounter this module, load its symbols (needed for modules loaded after SymInitialize)
ModuleCache* cachedModule = LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage );
return ModuleNameAndBaseAddress{ cachedModule->name, cachedModule->start };
}
// since this is the first time we encounter this module, load its symbols (needed for modules loaded after SymInitialize)
ImageEntry* cachedModule = LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage );
return ModuleNameAndBaseAddress{ cachedModule->m_name, cachedModule->m_startAddress };
}
}
}
@@ -753,7 +827,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
return { cb_data, uint8_t( cb_num ), moduleNameAndAddress.name };
}
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
#elif defined(TRACY_USE_LIBBACKTRACE)
enum { MaxCbTrace = 64 };
@@ -762,9 +836,6 @@ struct backtrace_state* cb_bts = nullptr;
int cb_num;
CallstackEntry cb_data[MaxCbTrace];
int cb_fixup;
#ifdef TRACY_USE_IMAGE_CACHE
static ImageCache* s_imageCache = nullptr;
#endif //#ifdef TRACY_USE_IMAGE_CACHE
#ifdef TRACY_DEBUGINFOD
debuginfod_client* s_debuginfod;
@@ -959,10 +1030,9 @@ void InitCallstack()
{
InitRpmalloc();
#ifdef TRACY_USE_IMAGE_CACHE
s_imageCache = (ImageCache*)tracy_malloc( sizeof( ImageCache ) );
new(s_imageCache) ImageCache();
#endif //#ifdef TRACY_USE_IMAGE_CACHE
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
CreateImageCaches();
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
@@ -1056,13 +1126,9 @@ debuginfod_client* GetDebuginfodClient()
void EndCallstack()
{
#ifdef TRACY_USE_IMAGE_CACHE
if( s_imageCache )
{
s_imageCache->~ImageCache();
tracy_free( s_imageCache );
}
#endif //#ifdef TRACY_USE_IMAGE_CACHE
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
DestroyImageCaches();
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
#ifndef TRACY_DEMANGLE
___tracy_free_demangle_buffer();
#endif
@@ -1252,17 +1318,17 @@ void GetSymbolForOfflineResolve(void* address, uint64_t imageBaseAddress, Callst
CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
{
InitRpmalloc();
if( ptr >> 63 == 0 )
if ( !IsKernelAddress( ptr ) )
{
const char* imageName = nullptr;
uint64_t imageBaseAddress = 0x0;
#ifdef TRACY_USE_IMAGE_CACHE
const auto* image = s_imageCache->GetImageForAddress((void*)ptr);
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
const auto* image = s_imageCache->GetImageForAddress( ptr );
if( image )
{
imageName = image->m_name;
imageBaseAddress = uint64_t(image->m_startAddress);
imageBaseAddress = uint64_t( image->m_startAddress );
}
#else
Dl_info dlinfo;
+6 -2
View File
@@ -8,8 +8,8 @@
# endif
# if defined _WIN32
# include "../common/TracyUwp.hpp"
# ifndef TRACY_UWP
# include "../common/TracyWinFamily.hpp"
# if !defined TRACY_WIN32_NO_DESKTOP
# define TRACY_HAS_CALLSTACK 1
# endif
# elif defined __ANDROID__
@@ -30,6 +30,10 @@
# define TRACY_HAS_CALLSTACK 6
# endif
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
#define TRACY_USE_LIBBACKTRACE
#endif
#endif
#endif
+23 -6
View File
@@ -1,15 +1,31 @@
#ifndef __TRACYCALLSTACK_HPP__
#define __TRACYCALLSTACK_HPP__
#include <stdint.h>
#include "../common/TracyApi.h"
#include "../common/TracyForceInline.hpp"
#include "TracyCallstack.h"
namespace tracy
{
struct ImageEntry
{
uint64_t m_startAddress = 0;
uint64_t m_endAddress = 0;
char* m_name = nullptr;
char* m_path = nullptr;
};
}
#ifndef TRACY_HAS_CALLSTACK
namespace tracy
{
static tracy_force_inline void* Callstack( int /*depth*/ ) { return nullptr; }
static constexpr bool has_callstack() { return false; }
static tracy_force_inline void* Callstack( int32_t /*depth*/ ) { return nullptr; }
}
#else
@@ -38,6 +54,8 @@ static tracy_force_inline void* Callstack( int /*depth*/ ) { return nullptr; }
namespace tracy
{
static constexpr bool has_callstack() { return true; }
struct CallstackSymbolData
{
const char* file;
@@ -79,11 +97,10 @@ debuginfod_client* GetDebuginfodClient();
extern "C"
{
typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
TRACY_API extern ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain;
TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void**, unsigned long, unsigned long );
}
static tracy_force_inline void* Callstack( int depth )
static tracy_force_inline void* Callstack( int32_t depth )
{
assert( depth >= 1 && depth < 63 );
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
@@ -112,7 +129,7 @@ static _Unwind_Reason_Code tracy_unwind_callback( struct _Unwind_Context* ctx, v
return _URC_NO_REASON;
}
static tracy_force_inline void* Callstack( int depth )
static tracy_force_inline void* Callstack( int32_t depth )
{
assert( depth >= 1 && depth < 63 );
@@ -127,7 +144,7 @@ static tracy_force_inline void* Callstack( int depth )
#elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
static tracy_force_inline void* Callstack( int depth )
static tracy_force_inline void* Callstack( int32_t depth )
{
assert( depth >= 1 );
+4 -2
View File
@@ -219,8 +219,9 @@ public:
m_ctx.CustomName( name, size );
}
private:
T m_lockable;
private:
LockableCtx m_ctx;
};
@@ -535,8 +536,9 @@ public:
m_ctx.CustomName( name, size );
}
private:
T m_lockable;
private:
SharedLockableCtx m_ctx;
};
File diff suppressed because it is too large Load Diff
+193 -102
View File
@@ -58,6 +58,9 @@ TRACY_API bool IsProfilerStarted();
# define TracyIsStarted true
#endif
TRACY_API bool BeginSamplingProfiling();
TRACY_API void EndSamplingProfiling();
class GpuCtx;
class Profiler;
class Socket;
@@ -114,11 +117,11 @@ struct LuaZoneState
#define TracyLfqPrepare( _type ) \
moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
auto __token = GetToken(); \
tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
auto __token = tracy::GetToken(); \
auto& __tail = __token->get_tail_index(); \
auto item = __token->enqueue_begin( __magic ); \
MemWrite( &item->hdr.type, _type );
tracy::MemWrite( &item->hdr.type, _type );
#define TracyLfqCommit \
__tail.store( __magic + 1, std::memory_order_release );
@@ -136,11 +139,11 @@ struct LuaZoneState
#ifdef TRACY_FIBERS
# define TracyQueuePrepare( _type ) \
auto item = Profiler::QueueSerial(); \
MemWrite( &item->hdr.type, _type );
auto item = tracy::Profiler::QueueSerial(); \
tracy::MemWrite( &item->hdr.type, _type );
# define TracyQueueCommit( _name ) \
MemWrite( &item->_name.thread, GetThreadHandle() ); \
Profiler::QueueSerialFinish();
tracy::MemWrite( &item->_name.thread, tracy::GetThreadHandle() ); \
tracy::Profiler::QueueSerialFinish();
# define TracyQueuePrepareC( _type ) \
auto item = tracy::Profiler::QueueSerial(); \
tracy::MemWrite( &item->hdr.type, _type );
@@ -252,6 +255,9 @@ public:
#endif
}
bool BeginSamplingProfiling();
void EndSamplingProfiling();
tracy_force_inline uint32_t GetNextZoneId()
{
return m_zoneId.fetch_add( 1, std::memory_order_relaxed );
@@ -387,58 +393,58 @@ public:
TracyLfqCommit;
}
static tracy_force_inline void Message( const char* txt, size_t size, int callstack )
static tracy_force_inline void Message( const char* txt, size_t size, int32_t callstack_depth )
{
assert( size < (std::numeric_limits<uint16_t>::max)() );
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
if( callstack != 0 )
if( callstack_depth != 0 && has_callstack() )
{
tracy::GetProfiler().SendCallstack( callstack );
tracy::GetProfiler().SendCallstack( callstack_depth );
}
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
TracyQueuePrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
TracyQueuePrepare( callstack_depth == 0 ? QueueType::Message : QueueType::MessageCallstack );
MemWrite( &item->messageFat.time, GetTime() );
MemWrite( &item->messageFat.text, (uint64_t)ptr );
MemWrite( &item->messageFat.size, (uint16_t)size );
TracyQueueCommit( messageFatThread );
}
static tracy_force_inline void Message( const char* txt, int callstack )
static tracy_force_inline void Message( const char* txt, int32_t callstack_depth )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
if( callstack != 0 )
if( callstack_depth != 0 && has_callstack() )
{
tracy::GetProfiler().SendCallstack( callstack );
tracy::GetProfiler().SendCallstack( callstack_depth );
}
TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
MemWrite( &item->messageLiteral.time, GetTime() );
MemWrite( &item->messageLiteral.text, (uint64_t)txt );
TracyQueueCommit( messageLiteralThread );
}
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack )
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int32_t callstack_depth )
{
assert( size < (std::numeric_limits<uint16_t>::max)() );
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
if( callstack != 0 )
if( callstack_depth != 0 && has_callstack() )
{
tracy::GetProfiler().SendCallstack( callstack );
tracy::GetProfiler().SendCallstack( callstack_depth );
}
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
TracyQueuePrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
MemWrite( &item->messageColorFat.time, GetTime() );
MemWrite( &item->messageColorFat.text, (uint64_t)ptr );
MemWrite( &item->messageColorFat.b, uint8_t( ( color ) & 0xFF ) );
@@ -448,17 +454,17 @@ public:
TracyQueueCommit( messageColorFatThread );
}
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack )
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int32_t callstack_depth )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
if( callstack != 0 )
if( callstack_depth != 0 && has_callstack() )
{
tracy::GetProfiler().SendCallstack( callstack );
tracy::GetProfiler().SendCallstack( callstack_depth );
}
TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
MemWrite( &item->messageColorLiteral.time, GetTime() );
MemWrite( &item->messageColorLiteral.text, (uint64_t)txt );
MemWrite( &item->messageColorLiteral.b, uint8_t( ( color ) & 0xFF ) );
@@ -510,29 +516,31 @@ public:
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure )
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int32_t depth, bool secure )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
if( depth > 0 && has_callstack() )
{
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
const auto thread = GetThreadHandle();
auto callstack = Callstack( depth );
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
profiler.m_serialLock.unlock();
#else
static_cast<void>(depth); // unused
MemAlloc( ptr, size, secure );
#endif
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
profiler.m_serialLock.unlock();
}
else
{
MemAlloc( ptr, size, secure );
}
}
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure )
static tracy_force_inline void MemFreeCallstack( const void* ptr, int32_t depth, bool secure )
{
if( secure && !ProfilerAvailable() ) return;
if( !ProfilerAllocatorAvailable() )
@@ -540,23 +548,25 @@ public:
MemFree( ptr, secure );
return;
}
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
if( depth > 0 && has_callstack() )
{
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
const auto thread = GetThreadHandle();
auto callstack = Callstack( depth );
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
profiler.m_serialLock.unlock();
#else
static_cast<void>(depth); // unused
MemFree( ptr, secure );
#endif
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
profiler.m_serialLock.unlock();
}
else
{
MemFree( ptr, secure );
}
}
static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name )
@@ -587,62 +597,101 @@ public:
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name )
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int32_t depth, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
if( depth > 0 && has_callstack() )
{
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
const auto thread = GetThreadHandle();
auto callstack = Callstack( depth );
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemName( name );
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
profiler.m_serialLock.unlock();
#else
static_cast<void>(depth); // unused
MemAllocNamed( ptr, size, secure, name );
#endif
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemName( name );
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
profiler.m_serialLock.unlock();
}
else
{
MemAllocNamed( ptr, size, secure, name );
}
}
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name )
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int32_t depth, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
if( depth > 0 && has_callstack() )
{
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
const auto thread = GetThreadHandle();
auto callstack = Callstack( depth );
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemName( name );
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
profiler.m_serialLock.unlock();
#else
static_cast<void>(depth); // unused
MemFreeNamed( ptr, secure, name );
#endif
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemName( name );
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
profiler.m_serialLock.unlock();
}
else
{
MemFreeNamed( ptr, secure, name );
}
}
static tracy_force_inline void SendCallstack( int depth )
static tracy_force_inline void MemDiscard( const char* name, bool secure )
{
#ifdef TRACY_HAS_CALLSTACK
auto ptr = Callstack( depth );
TracyQueuePrepare( QueueType::Callstack );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
TracyQueueCommit( callstackFatThread );
#else
static_cast<void>(depth); // unused
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
const auto thread = GetThreadHandle();
GetProfiler().m_serialLock.lock();
SendMemDiscard( QueueType::MemDiscard, thread, name );
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemDiscardCallstack( const char* name, bool secure, int32_t depth )
{
if( secure && !ProfilerAvailable() ) return;
if( depth > 0 && has_callstack() )
{
# ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
auto callstack = Callstack( depth );
GetProfiler().m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemDiscard( QueueType::MemDiscard, thread, name );
GetProfiler().m_serialLock.unlock();
}
else
{
MemDiscard( name, secure );
}
}
static tracy_force_inline void SendCallstack( int32_t depth )
{
if( depth > 0 && has_callstack() )
{
auto ptr = Callstack( depth );
TracyQueuePrepare( QueueType::Callstack );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
TracyQueueCommit( callstackFatThread );
}
}
static tracy_force_inline void ParameterRegister( ParameterCallback cb, void* data )
@@ -677,6 +726,9 @@ public:
#ifdef TRACY_FIBERS
static tracy_force_inline void EnterFiber( const char* fiber, int32_t groupHint )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
TracyQueuePrepare( QueueType::FiberEnter );
MemWrite( &item->fiberEnter.time, GetTime() );
MemWrite( &item->fiberEnter.fiber, (uint64_t)fiber );
@@ -686,13 +738,16 @@ public:
static tracy_force_inline void LeaveFiber()
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
TracyQueuePrepare( QueueType::FiberLeave );
MemWrite( &item->fiberLeave.time, GetTime() );
TracyQueueCommit( fiberLeave );
}
#endif
void SendCallstack( int depth, const char* skipBefore );
void SendCallstack( int32_t depth, const char* skipBefore );
static void CutCallstack( void* callstack, const char* skipBefore );
static bool ShouldExit();
@@ -800,7 +855,7 @@ private:
void InstallCrashHandler();
void RemoveCrashHandler();
void ClearQueues( tracy::moodycamel::ConsumerToken& token );
void ClearSerial();
DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token );
@@ -833,6 +888,21 @@ private:
m_bufferOffset += int( len );
}
char* SafeCopyProlog( const char* p, size_t size );
void SafeCopyEpilog( char* buf );
template<class Callable> // must be void( const char* buf, size_t size )
bool WithSafeCopy( const char* p, size_t size, Callable&& callable )
{
if( char* buf = SafeCopyProlog( p, size ) )
{
callable( buf, size );
SafeCopyEpilog( buf );
return true;
}
return false;
}
bool SendData( const char* data, size_t len );
void SendLongString( uint64_t ptr, const char* str, size_t len, QueueType type );
void SendSourceLocation( uint64_t ptr );
@@ -862,14 +932,13 @@ private:
static tracy_force_inline void SendCallstackSerial( void* ptr )
{
#ifdef TRACY_HAS_CALLSTACK
auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, QueueType::CallstackSerial );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
GetProfiler().m_serialQueue.commit_next();
#else
static_cast<void>(ptr); // unused
#endif
if( has_callstack() )
{
auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, QueueType::CallstackSerial );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
GetProfiler().m_serialQueue.commit_next();
}
}
static tracy_force_inline void SendMemAlloc( QueueType type, const uint32_t thread, const void* ptr, size_t size )
@@ -907,6 +976,18 @@ private:
GetProfiler().m_serialQueue.commit_next();
}
static tracy_force_inline void SendMemDiscard( QueueType type, const uint32_t thread, const char* name )
{
assert( type == QueueType::MemDiscard || type == QueueType::MemDiscardCallstack );
auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, type );
MemWrite( &item->memDiscard.time, GetTime() );
MemWrite( &item->memDiscard.thread, thread );
MemWrite( &item->memDiscard.name, (uint64_t)name );
GetProfiler().m_serialQueue.commit_next();
}
static tracy_force_inline void SendMemName( const char* name )
{
assert( name );
@@ -922,7 +1003,6 @@ private:
double m_timerMul;
uint64_t m_resolution;
uint64_t m_delay;
std::atomic<int64_t> m_timeBegin;
uint32_t m_mainThread;
uint64_t m_epoch, m_exectime;
@@ -963,6 +1043,7 @@ private:
std::atomic<bool> m_isConnected;
#ifdef TRACY_ON_DEMAND
std::atomic<uint64_t> m_connectionId;
std::atomic<bool> m_symbolsBusy;
TracyMutex m_deferredLock;
FastVector<QueueItem> m_deferredQueue;
@@ -990,9 +1071,19 @@ private:
char* m_queryData;
char* m_queryDataPtr;
#if defined _WIN32
void* m_exceptionHandler;
#ifndef NDEBUG
// m_safeSendBuffer and m_pipe should only be used by the Tracy Profiler thread; this ensures that in debug builds.
std::atomic_bool m_inUse{ false };
#endif
char* m_safeSendBuffer;
#if defined _WIN32
void* m_prevHandler;
#else
int m_pipe[2];
int m_pipeBufSize;
#endif
#ifdef __linux__
struct {
struct sigaction pwr, ill, fpe, segv, pipe, bus, abrt;
+556
View File
@@ -0,0 +1,556 @@
#include "../server/tracy_robin_hood.h"
#include "TracyProfiler.hpp"
#include "TracyThread.hpp"
#include "tracy/TracyC.h"
#include <rocprofiler-sdk/registration.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <iostream>
#include <mutex>
#include <set>
#include <shared_mutex>
#include <sstream>
#include <time.h>
#include <unordered_map>
#include <vector>
#define ROCPROFILER_CALL( result, msg ) \
{ \
rocprofiler_status_t CHECKSTATUS = result; \
if( CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS ) \
{ \
std::string status_msg = rocprofiler_get_status_string( CHECKSTATUS ); \
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg << " failed with error code " \
<< CHECKSTATUS << ": " << status_msg << std::endl; \
std::stringstream errmsg{}; \
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" << status_msg \
<< ")"; \
throw std::runtime_error( errmsg.str() ); \
} \
}
namespace
{
using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
struct DispatchData
{
int64_t launch_start;
int64_t launch_end;
uint32_t thread_id;
uint16_t query_id;
};
struct ToolData
{
uint32_t version;
const char* runtime_version;
uint32_t priority;
rocprofiler_client_id_t client_id;
uint8_t context_id;
bool init;
uint64_t query_id;
int64_t previous_cpu_time;
tracy::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data_t> client_kernels;
tracy::unordered_map<rocprofiler_dispatch_id_t, DispatchData> dispatch_data;
tracy::unordered_set<std::string> counter_names = { "SQ_WAVES", "GL2C_MISS", "GL2C_HIT" };
std::unique_ptr<tracy::Thread> cal_thread;
std::mutex mut{};
};
using namespace tracy;
rocprofiler_context_id_t& get_client_ctx()
{
static rocprofiler_context_id_t ctx{ 0 };
return ctx;
}
const char* CTX_NAME = "rocprofv3";
uint8_t gpu_context_allocate( ToolData* data )
{
timespec ts;
clock_gettime( CLOCK_BOOTTIME, &ts );
uint64_t cpu_timestamp = Profiler::GetTime();
uint64_t gpu_timestamp = ( (uint64_t)ts.tv_sec * 1000000000 ) + ts.tv_nsec;
float timestamp_period = 1.0f;
data->previous_cpu_time = cpu_timestamp;
// Allocate the process-unique GPU context ID. There's a max of 255 available;
// if we are recreating devices a lot we may exceed that. Don't do that, or
// wrap around and get weird (but probably still usable) numbers.
uint8_t context_id = tracy::GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed );
if( context_id >= 255 )
{
context_id %= 255;
}
uint8_t context_flags = 0;
#ifdef TRACY_ROCPROF_CALIBRATION
// Tell tracy we'll be passing calibrated timestamps and not to mess with
// the times. We'll periodically send GpuCalibration events in case the
// times drift.
context_flags |= tracy::GpuContextCalibration;
#endif
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext );
tracy::MemWrite( &item->gpuNewContext.cpuTime, cpu_timestamp );
tracy::MemWrite( &item->gpuNewContext.gpuTime, gpu_timestamp );
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
tracy::MemWrite( &item->gpuNewContext.period, timestamp_period );
tracy::MemWrite( &item->gpuNewContext.context, context_id );
tracy::MemWrite( &item->gpuNewContext.flags, context_flags );
tracy::MemWrite( &item->gpuNewContext.type, tracy::GpuContextType::Rocprof );
tracy::Profiler::QueueSerialFinish();
}
// Send the name of the context along.
// NOTE: Tracy will unconditionally free the name so we must clone it here.
// Since internally Tracy will use its own rpmalloc implementation we must
// make sure we allocate from the same source.
size_t name_length = strlen( CTX_NAME );
char* cloned_name = (char*)tracy::tracy_malloc( name_length );
memcpy( cloned_name, CTX_NAME, name_length );
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName );
tracy::MemWrite( &item->gpuContextNameFat.context, context_id );
tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)cloned_name );
tracy::MemWrite( &item->gpuContextNameFat.size, name_length );
tracy::Profiler::QueueSerialFinish();
}
return context_id;
}
uint64_t kernel_src_loc( ToolData* data, uint64_t kernel_id )
{
uint64_t src_loc = 0;
auto _lk = std::unique_lock{ data->mut };
rocprofiler_kernel_id_t kid = kernel_id;
if( data->client_kernels.count( kid ) )
{
auto& sym_data = data->client_kernels[kid];
const char* name = sym_data.kernel_name;
size_t name_len = strlen( name );
uint32_t line = 0;
src_loc = tracy::Profiler::AllocSourceLocation( line, NULL, 0, name, name_len, NULL, 0 );
}
return src_loc;
}
void record_interval( ToolData* data, rocprofiler_timestamp_t start_timestamp, rocprofiler_timestamp_t end_timestamp,
uint64_t src_loc, rocprofiler_dispatch_id_t dispatch_id )
{
uint16_t query_id = 0;
uint8_t context_id = data->context_id;
{
auto _lk = std::unique_lock{ data->mut };
query_id = data->query_id;
data->query_id++;
if( dispatch_id != UINT64_MAX )
{
DispatchData& dispatch_data = data->dispatch_data[dispatch_id];
dispatch_data.query_id = query_id;
dispatch_data.thread_id = tracy::GetThreadHandle();
}
}
uint64_t cpu_start_time = 0, cpu_end_time = 0;
if( dispatch_id == UINT64_MAX )
{
cpu_start_time = tracy::Profiler::GetTime();
cpu_end_time = tracy::Profiler::GetTime();
}
else
{
auto _lk = std::unique_lock{ data->mut };
DispatchData& dispatch_data = data->dispatch_data[dispatch_id];
cpu_start_time = dispatch_data.launch_start;
cpu_end_time = dispatch_data.launch_end;
}
if( src_loc != 0 )
{
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial );
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, cpu_start_time );
tracy::MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)src_loc );
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
tracy::MemWrite( &item->gpuZoneBegin.queryId, query_id );
tracy::MemWrite( &item->gpuZoneBegin.context, context_id );
tracy::Profiler::QueueSerialFinish();
}
}
else
{
static const ___tracy_source_location_data src_loc = { NULL, NULL, NULL, 0, 0 };
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial );
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, cpu_start_time );
tracy::MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)&src_loc );
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
tracy::MemWrite( &item->gpuZoneBegin.queryId, query_id );
tracy::MemWrite( &item->gpuZoneBegin.context, context_id );
tracy::Profiler::QueueSerialFinish();
}
}
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime );
tracy::MemWrite( &item->gpuTime.gpuTime, start_timestamp );
tracy::MemWrite( &item->gpuTime.queryId, query_id );
tracy::MemWrite( &item->gpuTime.context, context_id );
tracy::Profiler::QueueSerialFinish();
}
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial );
tracy::MemWrite( &item->gpuZoneEnd.cpuTime, cpu_end_time );
tracy::MemWrite( &item->gpuZoneEnd.thread, tracy::GetThreadHandle() );
tracy::MemWrite( &item->gpuZoneEnd.queryId, query_id );
tracy::MemWrite( &item->gpuZoneEnd.context, context_id );
tracy::Profiler::QueueSerialFinish();
}
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime );
tracy::MemWrite( &item->gpuTime.gpuTime, end_timestamp );
tracy::MemWrite( &item->gpuTime.queryId, query_id );
tracy::MemWrite( &item->gpuTime.context, context_id );
tracy::Profiler::QueueSerialFinish();
}
}
void record_callback( rocprofiler_dispatch_counting_service_data_t dispatch_data,
rocprofiler_record_counter_t* record_data, size_t record_count,
rocprofiler_user_data_t /*user_data*/, void* callback_data )
{
assert( callback_data != nullptr );
ToolData* data = static_cast<ToolData*>( callback_data );
if( !data->init ) return;
std::unordered_map<rocprofiler_counter_instance_id_t, double> sums;
for( size_t i = 0; i < record_count; ++i )
{
auto _counter_id = rocprofiler_counter_id_t{};
ROCPROFILER_CALL( rocprofiler_query_record_counter_id( record_data[i].id, &_counter_id ),
"query record counter id" );
sums[_counter_id.handle] += record_data[i].counter_value;
}
uint16_t query_id = 0;
uint32_t thread_id = 0;
{
auto _lk = std::unique_lock{ data->mut };
// An assumption is made here that the counter values are supplied after the dispatch
// complete callback.
assert( data->dispatch_data.count( dispatch_data.dispatch_info.dispatch_id ) );
DispatchData& ddata = data->dispatch_data[dispatch_data.dispatch_info.dispatch_id];
query_id = ddata.query_id;
thread_id = ddata.thread_id;
}
for( auto& p : sums )
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneAnnotation );
tracy::MemWrite( &item->zoneAnnotation.noteId, p.first );
tracy::MemWrite( &item->zoneAnnotation.queryId, query_id );
tracy::MemWrite( &item->zoneAnnotation.thread, thread_id );
tracy::MemWrite( &item->zoneAnnotation.value, p.second );
tracy::MemWrite( &item->zoneAnnotation.context, data->context_id );
tracy::Profiler::QueueSerialFinish();
}
}
/**
* Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue.
* rocprofiler_counter_config_id_t* is a return to specify what counters to collect
* for this dispatch (dispatch_packet).
*/
void dispatch_callback( rocprofiler_dispatch_counting_service_data_t dispatch_data,
rocprofiler_profile_config_id_t* config, rocprofiler_user_data_t* /*user_data*/,
void* callback_data )
{
assert( callback_data != nullptr );
ToolData* data = static_cast<ToolData*>( callback_data );
if( !data->init ) return;
/**
* This simple example uses the same profile counter set for all agents.
* We store this in a cache to prevent constructing many identical profile counter
* sets. We first check the cache to see if we have already constructed a counter"
* set for the agent. If we have, return it. Otherwise, construct a new profile counter
* set.
*/
static std::shared_mutex m_mutex = {};
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache = {};
auto search_cache = [&]()
{
if( auto pos = profile_cache.find( dispatch_data.dispatch_info.agent_id.handle ); pos != profile_cache.end() )
{
*config = pos->second;
return true;
}
return false;
};
{
auto rlock = std::shared_lock{ m_mutex };
if( search_cache() ) return;
}
auto wlock = std::unique_lock{ m_mutex };
if( search_cache() ) return;
// GPU Counter IDs
std::vector<rocprofiler_counter_id_t> gpu_counters;
// Iterate through the agents and get the counters available on that agent
ROCPROFILER_CALL(
rocprofiler_iterate_agent_supported_counters(
dispatch_data.dispatch_info.agent_id,
[]( rocprofiler_agent_id_t, rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data )
{
std::vector<rocprofiler_counter_id_t>* vec =
static_cast<std::vector<rocprofiler_counter_id_t>*>( user_data );
for( size_t i = 0; i < num_counters; i++ )
{
vec->push_back( counters[i] );
}
return ROCPROFILER_STATUS_SUCCESS;
},
static_cast<void*>( &gpu_counters ) ),
"Could not fetch supported counters" );
std::vector<rocprofiler_counter_id_t> collect_counters;
collect_counters.reserve( data->counter_names.size() );
// Look for the counters contained in counters_to_collect in gpu_counters
for( auto& counter : gpu_counters )
{
rocprofiler_counter_info_v0_t info;
ROCPROFILER_CALL(
rocprofiler_query_counter_info( counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast<void*>( &info ) ),
"Could not query info" );
if( data->counter_names.count( std::string( info.name ) ) > 0 )
{
collect_counters.push_back( counter );
size_t name_length = strlen( info.name );
char* cloned_name = (char*)tracy::tracy_malloc( name_length );
memcpy( cloned_name, info.name, name_length );
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuAnnotationName );
tracy::MemWrite( &item->gpuAnnotationNameFat.context, data->context_id );
tracy::MemWrite( &item->gpuAnnotationNameFat.noteId, counter.handle );
tracy::MemWrite( &item->gpuAnnotationNameFat.ptr, (uint64_t)cloned_name );
tracy::MemWrite( &item->gpuAnnotationNameFat.size, name_length );
tracy::Profiler::QueueSerialFinish();
}
}
}
// Create a colleciton profile for the counters
rocprofiler_profile_config_id_t profile = { .handle = 0 };
ROCPROFILER_CALL( rocprofiler_create_profile_config( dispatch_data.dispatch_info.agent_id, collect_counters.data(),
collect_counters.size(), &profile ),
"Could not construct profile cfg" );
profile_cache.emplace( dispatch_data.dispatch_info.agent_id.handle, profile );
// Return the profile to collect those counters for this dispatch
*config = profile;
}
void tool_callback_tracing_callback( rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data,
void* callback_data )
{
assert( callback_data != nullptr );
ToolData* data = static_cast<ToolData*>( callback_data );
if( !data->init ) return;
if( record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER )
{
auto* sym_data = static_cast<kernel_symbol_data_t*>( record.payload );
if( record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD )
{
auto _lk = std::unique_lock{ data->mut };
data->client_kernels.emplace( sym_data->kernel_id, *sym_data );
}
else if( record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD )
{
auto _lk = std::unique_lock{ data->mut };
data->client_kernels.erase( sym_data->kernel_id );
}
}
else if( record.kind == ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH )
{
auto* rdata = static_cast<rocprofiler_callback_tracing_kernel_dispatch_data_t*>( record.payload );
if( record.operation == ROCPROFILER_KERNEL_DISPATCH_ENQUEUE )
{
if( record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER )
{
auto _lk = std::unique_lock{ data->mut };
data->dispatch_data[rdata->dispatch_info.dispatch_id].launch_start = tracy::Profiler::GetTime();
}
else if( record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT )
{
auto _lk = std::unique_lock{ data->mut };
data->dispatch_data[rdata->dispatch_info.dispatch_id].launch_end = tracy::Profiler::GetTime();
}
}
else if( record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE )
{
uint64_t src_loc = kernel_src_loc( data, rdata->dispatch_info.kernel_id );
record_interval( data, rdata->start_timestamp, rdata->end_timestamp, src_loc,
rdata->dispatch_info.dispatch_id );
}
}
else if( record.kind == ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY &&
record.operation != ROCPROFILER_MEMORY_COPY_NONE && record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT )
{
auto* rdata = static_cast<rocprofiler_callback_tracing_memory_copy_data_t*>( record.payload );
const char* name = nullptr;
switch( record.operation )
{
case ROCPROFILER_MEMORY_COPY_DEVICE_TO_DEVICE:
name = "DeviceToDeviceCopy";
break;
case ROCPROFILER_MEMORY_COPY_DEVICE_TO_HOST:
name = "DeviceToHostCopy";
break;
case ROCPROFILER_MEMORY_COPY_HOST_TO_DEVICE:
name = "HostToDeviceCopy";
break;
case ROCPROFILER_MEMORY_COPY_HOST_TO_HOST:
name = "HostToHostCopy";
break;
}
size_t name_len = strlen( name );
uint64_t src_loc = tracy::Profiler::AllocSourceLocation( 0, NULL, 0, name, name_len, NULL, 0 );
record_interval( data, rdata->start_timestamp, rdata->end_timestamp, src_loc, UINT64_MAX );
}
}
void calibration_thread( void* ptr )
{
while( !TracyIsStarted )
;
ToolData* data = static_cast<ToolData*>( ptr );
data->context_id = gpu_context_allocate( data );
const char* user_counters = GetEnvVar( "TRACY_ROCPROF_COUNTERS" );
if( user_counters )
{
data->counter_names.clear();
std::stringstream ss( user_counters );
std::string counter;
while( std::getline( ss, counter, ',' ) ) data->counter_names.insert( counter );
}
data->init = true;
#ifdef TRACY_ROCPROF_CALIBRATION
while( data->init )
{
sleep( 1 );
timespec ts;
// HSA performs a linear interpolation of GPU time to CLOCK_BOOTTIME. However, this is
// subject to network time updates and can drift relative to tracy's clock.
clock_gettime( CLOCK_BOOTTIME, &ts );
int64_t cpu_timestamp = Profiler::GetTime();
int64_t gpu_timestamp = ts.tv_nsec + ts.tv_sec * 1e9L;
if( cpu_timestamp > data->previous_cpu_time )
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration );
tracy::MemWrite( &item->gpuCalibration.gpuTime, gpu_timestamp );
tracy::MemWrite( &item->gpuCalibration.cpuTime, cpu_timestamp );
tracy::MemWrite( &item->gpuCalibration.cpuDelta, cpu_timestamp - data->previous_cpu_time );
tracy::MemWrite( &item->gpuCalibration.context, data->context_id );
tracy::Profiler::QueueSerialFinish();
data->previous_cpu_time = cpu_timestamp;
}
}
#endif
}
int tool_init( rocprofiler_client_finalize_t fini_func, void* user_data )
{
ToolData* data = static_cast<ToolData*>( user_data );
data->cal_thread = std::make_unique<tracy::Thread>( calibration_thread, data );
ROCPROFILER_CALL( rocprofiler_create_context( &get_client_ctx() ), "context creation failed" );
ROCPROFILER_CALL( rocprofiler_configure_callback_dispatch_counting_service( get_client_ctx(), dispatch_callback,
user_data, record_callback, user_data ),
"Could not setup counting service" );
rocprofiler_tracing_operation_t ops[] = { ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER };
ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service( get_client_ctx(),
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ops, 1,
tool_callback_tracing_callback, user_data ),
"callback tracing service failed to configure" );
rocprofiler_tracing_operation_t ops2[] = { ROCPROFILER_KERNEL_DISPATCH_COMPLETE,
ROCPROFILER_KERNEL_DISPATCH_ENQUEUE };
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service( get_client_ctx(), ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH,
ops2, 2, tool_callback_tracing_callback, user_data ),
"callback tracing service failed to configure" );
ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service( get_client_ctx(),
ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY, nullptr,
0, tool_callback_tracing_callback, user_data ),
"callback tracing service failed to configure" );
ROCPROFILER_CALL( rocprofiler_start_context( get_client_ctx() ), "start context" );
return 0;
}
void tool_fini( void* tool_data_v )
{
rocprofiler_stop_context( get_client_ctx() );
ToolData* data = static_cast<ToolData*>( tool_data_v );
data->init = false;
data->cal_thread.reset();
}
}
extern "C"
{
rocprofiler_tool_configure_result_t* rocprofiler_configure( uint32_t version, const char* runtime_version,
uint32_t priority, rocprofiler_client_id_t* client_id )
{
// If not the first tool to register, indicate that the tool doesn't want to do anything
if( priority > 0 ) return nullptr;
// (optional) Provide a name for this tool to rocprofiler
client_id->name = "Tracy";
// (optional) create configure data
static ToolData data = ToolData{ version, runtime_version, priority, *client_id, 0, false, 0, 0 };
// construct configure result
static auto cfg = rocprofiler_tool_configure_result_t{ sizeof( rocprofiler_tool_configure_result_t ),
&tool_init, &tool_fini, static_cast<void*>( &data ) };
return &cfg;
}
}
+28 -49
View File
@@ -10,7 +10,14 @@
#include "../common/TracyAlign.hpp"
#include "../common/TracyAlloc.hpp"
#include "TracyProfiler.hpp"
#include "TracyCallstack.hpp"
#if (defined(__GNUC__) || defined(__clang__))
# define TRACY_ATTRIBUTE_FORMAT_PRINTF(fmt_idx, arg_idx) \
__attribute__((format(printf, fmt_idx, arg_idx)))
#else
# define TRACY_ATTRIBUTE_FORMAT_PRINTF(fmt_idx, arg_idx)
#endif
namespace tracy
{
@@ -22,7 +29,7 @@ public:
ScopedZone& operator=( const ScopedZone& ) = delete;
ScopedZone& operator=( ScopedZone&& ) = delete;
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true )
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int32_t depth = -1, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
@@ -33,13 +40,19 @@ public:
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
TracyQueuePrepare( QueueType::ZoneBegin );
auto zoneQueue = QueueType::ZoneBegin;
if( depth > 0 && has_callstack() )
{
GetProfiler().SendCallstack( depth );
zoneQueue = QueueType::ZoneBeginCallstack;
}
TracyQueuePrepare( zoneQueue );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
TracyQueueCommit( zoneBeginThread );
}
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true )
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int32_t depth = -1, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
@@ -50,55 +63,21 @@ public:
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
GetProfiler().SendCallstack( depth );
TracyQueuePrepare( QueueType::ZoneBeginCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
TracyQueueCommit( zoneBeginThread );
}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
auto zoneQueue = QueueType::ZoneBeginAllocSrcLoc;
if( depth > 0 && has_callstack() )
{
GetProfiler().SendCallstack( depth );
zoneQueue = QueueType::ZoneBeginAllocSrcLocCallstack;
}
TracyQueuePrepare( zoneQueue );
const auto srcloc =
Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, static_cast<uint32_t>(0), is_active ) {}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int depth, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
GetProfiler().SendCallstack( depth );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {}
tracy_force_inline ~ScopedZone()
{
@@ -126,7 +105,7 @@ public:
TracyQueueCommit( zoneTextFatThread );
}
void TextFmt( const char* fmt, ... )
void TextFmt( const char* fmt, ... ) TRACY_ATTRIBUTE_FORMAT_PRINTF(2, 3)
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
@@ -165,7 +144,7 @@ public:
TracyQueueCommit( zoneTextFatThread );
}
void NameFmt( const char* fmt, ... )
void NameFmt( const char* fmt, ... ) TRACY_ATTRIBUTE_FORMAT_PRINTF(2, 3)
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
+1 -1
View File
@@ -85,7 +85,7 @@ void SysPower::ScanDirectory( const char* path, int parent )
FILE* f = fopen( tmp, "r" );
if( f )
{
fscanf( f, "%" PRIu64, &maxRange );
(void)fscanf( f, "%" PRIu64, &maxRange );
fclose( f );
}
}
+13 -1
View File
@@ -4,6 +4,7 @@
# if defined _WIN32
# include <windows.h>
# include "../common/TracyWinFamily.hpp"
# elif defined __linux__
# include <stdio.h>
# include <inttypes.h>
@@ -27,13 +28,24 @@ static inline uint64_t ConvertTime( const FILETIME& t )
void SysTime::ReadTimes()
{
FILETIME idleTime;
FILETIME kernelTime;
FILETIME userTime;
# if defined TRACY_GDK
FILETIME creationTime;
FILETIME exitTime;
GetProcessTimes( GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime );
idle = 0;
# else
FILETIME idleTime;
GetSystemTimes( &idleTime, &kernelTime, &userTime );
idle = ConvertTime( idleTime );
# endif
const auto kernel = ConvertTime( kernelTime );
const auto user = ConvertTime( userTime );
used = kernel + user;
+139 -59
View File
@@ -173,8 +173,11 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId );
MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId );
MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber );
MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason );
MemWrite( &item->contextSwitch.state, cswitch->oldThreadState );
MemWrite( &item->contextSwitch.oldThreadWaitReason, cswitch->oldThreadWaitReason );
MemWrite( &item->contextSwitch.oldThreadState, cswitch->oldThreadState );
MemWrite( &item->contextSwitch.newThreadPriority, cswitch->newThreadPriority );
MemWrite( &item->contextSwitch.oldThreadPriority, cswitch->oldThreadPriority );
MemWrite( &item->contextSwitch.previousCState, cswitch->previousCState );
TracyLfqCommit;
}
else if( hdr.EventDescriptor.Opcode == 50 )
@@ -183,7 +186,10 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart );
MemWrite( &item->threadWakeup.cpu, record->BufferContext.ProcessorNumber );
MemWrite( &item->threadWakeup.thread, rt->threadId );
MemWrite( &item->threadWakeup.adjustReason, rt->adjustReason );
MemWrite( &item->threadWakeup.adjustIncrement, rt->adjustIncrement );
TracyLfqCommit;
}
else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 )
@@ -232,6 +238,10 @@ void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record )
#endif
const auto& hdr = record->EventHeader;
// Check for Lost_Event (6a399ae0-4bc6-4de9-870b-3657f8947e7e)
if( hdr.ProviderId.Data1 == 0x6A399AE0 ) return;
assert( hdr.ProviderId.Data1 == 0x802EC45A );
assert( hdr.EventDescriptor.Id == 0x0011 );
@@ -498,11 +508,11 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
if( _GetThreadDescription )
{
PWSTR tmp;
_GetThreadDescription( hnd, &tmp );
char buf[256];
if( tmp )
if ( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) )
{
char buf[256];
auto ret = wcstombs( buf, tmp, 256 );
LocalFree(tmp);
if( ret != 0 )
{
threadName = CopyString( buf, ret );
@@ -521,25 +531,23 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
const auto phnd = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid );
if( phnd != INVALID_HANDLE_VALUE )
{
HMODULE modules[1024];
DWORD needed;
if( _EnumProcessModules( phnd, modules, 1024 * sizeof( HMODULE ), &needed ) != 0 )
MEMORY_BASIC_INFORMATION vmeminfo;
SIZE_T infosize = VirtualQueryEx( phnd, ptr, &vmeminfo, sizeof( vmeminfo ) );
if( infosize == sizeof( vmeminfo ) )
{
const auto sz = std::min( DWORD( needed / sizeof( HMODULE ) ), DWORD( 1024 ) );
for( DWORD i=0; i<sz; i++ )
if (vmeminfo.Type == MEM_IMAGE)
{
// for MEM_IMAGE regions, vmeminfo.AllocationBase _is_ the HMODULE
HMODULE mod = (HMODULE)vmeminfo.AllocationBase;
MODULEINFO info;
if( _GetModuleInformation( phnd, modules[i], &info, sizeof( info ) ) != 0 )
if( _GetModuleInformation( phnd, mod, &info, sizeof( info ) ) != 0 )
{
if( (uint64_t)ptr >= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage )
char buf2[1024];
const auto modlen = _GetModuleBaseNameA( phnd, mod, buf2, 1024 );
if( modlen != 0 )
{
char buf2[1024];
const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 );
if( modlen != 0 )
{
threadName = CopyString( buf2, modlen );
threadSent = true;
}
threadName = CopyString( buf2, modlen );
threadSent = true;
}
}
}
@@ -606,6 +614,7 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
# include <fcntl.h>
# include <inttypes.h>
# include <limits>
# include <mntent.h>
# include <poll.h>
# include <stdio.h>
# include <stdlib.h>
@@ -678,7 +687,7 @@ enum TraceEventId
EventBranchMiss,
EventVsync,
EventContextSwitch,
EventWakeup,
EventWaking,
};
static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid )
@@ -753,6 +762,42 @@ static const char* ReadFile( const char* path )
return tmp;
}
static const char* ReadFile( const char* base, const char* path )
{
const auto blen = strlen( base );
const auto plen = strlen( path );
auto tmp = (char*)tracy_malloc( blen + plen + 1 );
memcpy( tmp, base, blen );
memcpy( tmp + blen, path, plen );
tmp[blen+plen] = '\0';
auto res = ReadFile( tmp );
tracy_free( tmp );
return res;
}
static char* GetTraceFsPath()
{
auto f = setmntent( "/proc/mounts", "r" );
if( !f ) return nullptr;
char* ret = nullptr;
while( auto ent = getmntent( f ) )
{
if( strcmp( ent->mnt_fsname, "tracefs" ) == 0 )
{
auto len = strlen( ent->mnt_dir );
ret = (char*)tracy_malloc( len + 1 );
memcpy( ret, ent->mnt_dir, len );
ret[len] = '\0';
break;
}
}
endmntent( f );
return ret;
}
bool SysTraceStart( int64_t& samplingPeriod )
{
#ifndef CLOCK_MONOTONIC_RAW
@@ -767,16 +812,22 @@ bool SysTraceStart( int64_t& samplingPeriod )
TracyDebug( "perf_event_paranoid: %i\n", paranoidLevel );
#endif
int switchId = -1, wakeupId = -1, vsyncId = -1;
const auto switchIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_switch/id" );
auto traceFsPath = GetTraceFsPath();
if( !traceFsPath ) return false;
TracyDebug( "tracefs path: %s\n", traceFsPath );
int switchId = -1, wakingId = -1, vsyncId = -1;
const auto switchIdStr = ReadFile( traceFsPath, "/events/sched/sched_switch/id" );
if( switchIdStr ) switchId = atoi( switchIdStr );
const auto wakeupIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_wakeup/id" );
if( wakeupIdStr ) wakeupId = atoi( wakeupIdStr );
const auto vsyncIdStr = ReadFile( "/sys/kernel/debug/tracing/events/drm/drm_vblank_event/id" );
const auto wakingIdStr = ReadFile( traceFsPath, "/events/sched/sched_waking/id" );
if( wakingIdStr ) wakingId = atoi( wakingIdStr );
const auto vsyncIdStr = ReadFile( traceFsPath, "/events/drm/drm_vblank_event/id" );
if( vsyncIdStr ) vsyncId = atoi( vsyncIdStr );
tracy_free( traceFsPath );
TracyDebug( "sched_switch id: %i\n", switchId );
TracyDebug( "sched_wakeup id: %i\n", wakeupId );
TracyDebug( "sched_waking id: %i\n", wakingId );
TracyDebug( "drm_vblank_event id: %i\n", vsyncId );
#ifdef TRACY_NO_SAMPLING
@@ -831,7 +882,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
2 + // CPU cycles + instructions retired
2 + // cache reference + miss
2 + // branch retired + miss
2 + // context switches + wakeups
2 + // context switches + waking ups
1 // vsync
);
s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers );
@@ -1076,18 +1127,31 @@ bool SysTraceStart( int64_t& samplingPeriod )
}
}
if( wakeupId != -1 )
if( wakingId != -1 )
{
pe.config = wakeupId;
pe.config &= ~PERF_SAMPLE_CALLCHAIN;
pe = {};
pe.type = PERF_TYPE_TRACEPOINT;
pe.size = sizeof( perf_event_attr );
pe.sample_period = 1;
pe.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_RAW;
// Coult ask for callstack here
//pe.sample_type |= PERF_SAMPLE_CALLCHAIN;
pe.disabled = 1;
pe.inherit = 1;
pe.config = wakingId;
pe.read_format = 0;
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
pe.use_clockid = 1;
pe.clockid = CLOCK_MONOTONIC_RAW;
#endif
TracyDebug( "Setup wakeup capture\n" );
TracyDebug( "Setup waking up capture\n" );
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWakeup, i );
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWaking, i );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
@@ -1332,6 +1396,7 @@ void SysTraceWorker( void* ptr )
hadData = true;
while( activeNum > 0 )
{
// Find the earliest event from the active buffers
int sel = -1;
int selPos;
int64_t t0 = std::numeric_limits<int64_t>::max();
@@ -1369,6 +1434,7 @@ void SysTraceWorker( void* ptr )
}
}
}
// Found any event
if( sel >= 0 )
{
auto& ring = ringArray[ctxBufferIdx + sel];
@@ -1384,10 +1450,10 @@ void SysTraceWorker( void* ptr )
const auto rid = ring.GetId();
if( rid == EventContextSwitch )
{
// Layout:
// u64 time
// u64 cnt
// u64 ip[cnt]
// Layout: See /sys/kernel/debug/tracing/events/sched/sched_switch/format
// u64 time // PERF_SAMPLE_TIME
// u64 cnt // PERF_SAMPLE_CALLCHAIN
// u64 ip[cnt] // PERF_SAMPLE_CALLCHAIN
// u32 size
// u8 data[size]
// Data (not ABI stable, but has not changed since it was added, in 2009):
@@ -1408,35 +1474,43 @@ void SysTraceWorker( void* ptr )
const auto traceOffset = offset;
offset += sizeof( uint64_t ) * cnt + sizeof( uint32_t ) + 8 + 16;
uint32_t prev_pid, next_pid;
uint32_t prev_pid, prev_prio;
uint32_t next_pid, next_prio;
long prev_state;
ring.Read( &prev_pid, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t ) + sizeof( uint32_t );
offset += sizeof( uint32_t );
ring.Read( &prev_prio, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t );
ring.Read( &prev_state, offset, sizeof( long ) );
offset += sizeof( long ) + 16;
ring.Read( &next_pid, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t );
ring.Read( &next_prio, offset, sizeof( uint32_t ) );
uint8_t reason = 100;
uint8_t state;
uint8_t oldThreadWaitReason = 100;
uint8_t oldThreadState;
if( prev_state & 0x0001 ) state = 104;
else if( prev_state & 0x0002 ) state = 101;
else if( prev_state & 0x0004 ) state = 105;
else if( prev_state & 0x0008 ) state = 106;
else if( prev_state & 0x0010 ) state = 108;
else if( prev_state & 0x0020 ) state = 109;
else if( prev_state & 0x0040 ) state = 110;
else if( prev_state & 0x0080 ) state = 102;
else state = 103;
if( prev_state & 0x0001 ) oldThreadState = 104;
else if( prev_state & 0x0002 ) oldThreadState = 101;
else if( prev_state & 0x0004 ) oldThreadState = 105;
else if( prev_state & 0x0008 ) oldThreadState = 106;
else if( prev_state & 0x0010 ) oldThreadState = 108;
else if( prev_state & 0x0020 ) oldThreadState = 109;
else if( prev_state & 0x0040 ) oldThreadState = 110;
else if( prev_state & 0x0080 ) oldThreadState = 102;
else oldThreadState = 103;
TracyLfqPrepare( QueueType::ContextSwitch );
MemWrite( &item->contextSwitch.time, t0 );
MemWrite( &item->contextSwitch.oldThread, prev_pid );
MemWrite( &item->contextSwitch.newThread, next_pid );
MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) );
MemWrite( &item->contextSwitch.reason, reason );
MemWrite( &item->contextSwitch.state, state );
MemWrite( &item->contextSwitch.oldThreadWaitReason, oldThreadWaitReason );
MemWrite( &item->contextSwitch.oldThreadState, oldThreadState );
MemWrite( &item->contextSwitch.previousCState, uint8_t( 0 ) );
MemWrite( &item->contextSwitch.newThreadPriority, int8_t( next_prio ) );
MemWrite( &item->contextSwitch.oldThreadPriority, int8_t( prev_prio ) );
TracyLfqCommit;
if( cnt > 0 && prev_pid != 0 && CurrentProcOwnsThread( prev_pid ) )
@@ -1450,27 +1524,33 @@ void SysTraceWorker( void* ptr )
TracyLfqCommit;
}
}
else if( rid == EventWakeup )
else if( rid == EventWaking)
{
// See /sys/kernel/debug/tracing/events/sched/sched_waking/format
// Layout:
// u64 time
// u64 time // PERF_SAMPLE_TIME
// u32 size
// u8 data[size]
// Data:
// u8 hdr[8]
// u8 comm[16]
// u32 pid
// u32 prio
// u64 target_cpu
offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16;
// i32 prio
// i32 target_cpu
const uint32_t dataOffset = sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t );
offset += dataOffset + 8 + 16;
uint32_t pid;
ring.Read( &pid, offset, sizeof( uint32_t ) );
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, t0 );
MemWrite( &item->threadWakeup.thread, pid );
MemWrite( &item->threadWakeup.cpu, (uint8_t)ring.GetCpu() );
int8_t adjustReason = -1; // Does not exist on Linux
int8_t adjustIncrement = 0; // Should perhaps store the new prio?
MemWrite( &item->threadWakeup.adjustReason, adjustReason );
MemWrite( &item->threadWakeup.adjustIncrement, adjustIncrement );
TracyLfqCommit;
}
else
+2 -2
View File
@@ -2,8 +2,8 @@
#define __TRACYSYSTRACE_HPP__
#if !defined TRACY_NO_SYSTEM_TRACING && ( defined _WIN32 || defined __linux__ )
# include "../common/TracyUwp.hpp"
# ifndef TRACY_UWP
# include "../common/TracyWinFamily.hpp"
# if !defined TRACY_WIN32_NO_DESKTOP
# define TRACY_HAS_SYSTEM_TRACING
# endif
#endif
+4 -2
View File
@@ -690,7 +690,9 @@ static pthread_key_t _memory_thread_heap;
# define _Thread_local __declspec(thread)
# define TLS_MODEL
# else
# ifndef __HAIKU__
# if defined(__ANDROID__) && __ANDROID_API__ >= 29 && defined(__NDK_MAJOR__) && __NDK_MAJOR__ >= 26
# define TLS_MODEL __attribute__((tls_model("local-dynamic")))
# elif !defined(__HAIKU__)
# define TLS_MODEL __attribute__((tls_model("initial-exec")))
# else
# define TLS_MODEL
@@ -2778,7 +2780,7 @@ rpmalloc_initialize_config(const rpmalloc_config_t* config) {
_memory_huge_pages = 1;
}
#if PLATFORM_WINDOWS
#if PLATFORM_WINDOWS && !defined TRACY_GDK
if (_memory_config.enable_huge_pages) {
HANDLE token = 0;
size_t large_page_minimum = GetLargePageMinimum();
+2 -3
View File
@@ -9,7 +9,7 @@ namespace tracy
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
enum : uint32_t { ProtocolVersion = 69 };
enum : uint32_t { ProtocolVersion = 76 };
enum : uint16_t { BroadcastVersion = 3 };
using lz4sz_t = uint32_t;
@@ -83,7 +83,7 @@ struct WelcomeFlag
enum _t : uint8_t
{
OnDemand = 1 << 0,
IsApple = 1 << 1,
IgnoreMemFaults = 1 << 1,
CodeTransfer = 1 << 2,
CombineSamples = 1 << 3,
IdentifySamples = 1 << 4,
@@ -95,7 +95,6 @@ struct WelcomeMessage
double timerMul;
int64_t initBegin;
int64_t initEnd;
uint64_t delay;
uint64_t resolution;
uint64_t epoch;
uint64_t exectime;
+55 -5
View File
@@ -42,6 +42,8 @@ enum class QueueType : uint8_t
MemAllocCallstackNamed,
MemFreeCallstack,
MemFreeCallstackNamed,
MemDiscard,
MemDiscardCallstack,
GpuZoneBegin,
GpuZoneBeginCallstack,
GpuZoneBeginAllocSrcLoc,
@@ -59,6 +61,7 @@ enum class QueueType : uint8_t
ThreadWakeup,
GpuTime,
GpuContextName,
GpuAnnotationName,
CallstackFrameSize,
SymbolInformation,
ExternalNameMetadata,
@@ -109,6 +112,7 @@ enum class QueueType : uint8_t
SecondStringData,
MemNamePayload,
ThreadGroupHint,
GpuZoneAnnotation,
StringData,
ThreadName,
PlotName,
@@ -329,7 +333,7 @@ struct QueuePlotDataInt : public QueuePlotDataBase
int64_t val;
};
struct QueuePlotDataFloat : public QueuePlotDataBase
struct QueuePlotDataFloat : public QueuePlotDataBase
{
float val;
};
@@ -401,7 +405,11 @@ enum class GpuContextType : uint8_t
Vulkan,
OpenCL,
Direct3D12,
Direct3D11
Direct3D11,
Metal,
Custom,
CUDA,
Rocprof
};
enum GpuContextFlags : uint8_t
@@ -441,6 +449,15 @@ struct QueueGpuZoneEnd
uint8_t context;
};
struct QueueGpuZoneAnnotation
{
int64_t noteId;
double value;
uint32_t thread;
uint16_t queryId;
uint8_t context;
};
struct QueueGpuTime
{
int64_t gpuTime;
@@ -462,7 +479,7 @@ struct QueueGpuTimeSync
int64_t cpuTime;
uint8_t context;
};
struct QueueGpuContextName
{
uint8_t context;
@@ -474,6 +491,18 @@ struct QueueGpuContextNameFat : public QueueGpuContextName
uint16_t size;
};
struct QueueGpuAnnotationName
{
int64_t noteId;
uint8_t context;
};
struct QueueGpuAnnotationNameFat : public QueueGpuAnnotationName
{
uint64_t ptr;
uint16_t size;
};
struct QueueMemNamePayload
{
uint64_t name;
@@ -500,6 +529,13 @@ struct QueueMemFree
uint64_t ptr;
};
struct QueueMemDiscard
{
int64_t time;
uint32_t thread;
uint64_t name;
};
struct QueueCallstackFat
{
uint64_t ptr;
@@ -593,14 +629,20 @@ struct QueueContextSwitch
uint32_t oldThread;
uint32_t newThread;
uint8_t cpu;
uint8_t reason;
uint8_t state;
uint8_t oldThreadWaitReason;
uint8_t oldThreadState;
uint8_t previousCState;
int8_t newThreadPriority;
int8_t oldThreadPriority;
};
struct QueueThreadWakeup
{
int64_t time;
uint32_t thread;
uint8_t cpu;
int8_t adjustReason;
int8_t adjustIncrement;
};
struct QueueTidToPid
@@ -738,8 +780,11 @@ struct QueueItem
QueueGpuTimeSync gpuTimeSync;
QueueGpuContextName gpuContextName;
QueueGpuContextNameFat gpuContextNameFat;
QueueGpuAnnotationName gpuAnnotationName;
QueueGpuAnnotationNameFat gpuAnnotationNameFat;
QueueMemAlloc memAlloc;
QueueMemFree memFree;
QueueMemDiscard memDiscard;
QueueMemNamePayload memName;
QueueThreadGroupHint threadGroupHint;
QueueCallstackFat callstackFat;
@@ -770,6 +815,7 @@ struct QueueItem
QueueSourceCodeNotAvailable sourceCodeNotAvailable;
QueueFiberEnter fiberEnter;
QueueFiberLeave fiberLeave;
QueueGpuZoneAnnotation zoneAnnotation;
};
};
#pragma pack( pop )
@@ -811,6 +857,8 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named
sizeof( QueueHeader ) + sizeof( QueueMemDiscard ),
sizeof( QueueHeader ) + sizeof( QueueMemDiscard ), // callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location
@@ -828,6 +876,7 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueThreadWakeup ),
sizeof( QueueHeader ) + sizeof( QueueGpuTime ),
sizeof( QueueHeader ) + sizeof( QueueGpuContextName ),
sizeof( QueueHeader ) + sizeof( QueueGpuAnnotationName ),
sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ),
sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ),
sizeof( QueueHeader ), // ExternalNameMetadata - not for wire transfer
@@ -879,6 +928,7 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ), // second string data
sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ),
sizeof( QueueHeader ) + sizeof( QueueThreadGroupHint ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneAnnotation ), // GPU zone annotation
// keep all QueueStringTransfer below
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name
+6 -4
View File
@@ -10,7 +10,7 @@
# endif
# include <windows.h>
# include <malloc.h>
# include "TracyUwp.hpp"
# include "TracyWinFamily.hpp"
#else
# include <pthread.h>
# include <string.h>
@@ -26,7 +26,9 @@
# include <fcntl.h>
#elif defined __FreeBSD__
# include <sys/thr.h>
#elif defined __NetBSD__ || defined __DragonFly__
#elif defined __NetBSD__
# include <lwp.h>
#elif defined __DragonFly__
# include <sys/lwp.h>
#elif defined __QNX__
# include <process.h>
@@ -135,7 +137,7 @@ TRACY_API void SetThreadName( const char* name )
TRACY_API void SetThreadNameWithHint( const char* name, int32_t groupHint )
{
#if defined _WIN32
# ifdef TRACY_UWP
# if defined TRACY_WIN32_NO_DESKTOP
static auto _SetThreadDescription = &::SetThreadDescription;
# else
static auto _SetThreadDescription = (t_SetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "SetThreadDescription" );
@@ -244,7 +246,7 @@ TRACY_API const char* GetThreadName( uint32_t id )
#endif
#if defined _WIN32
# ifdef TRACY_UWP
# if defined TRACY_WIN32_NO_DESKTOP
static auto _GetThreadDescription = &::GetThreadDescription;
# else
static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
-11
View File
@@ -1,11 +0,0 @@
#ifndef __TRACYUWP_HPP__
#define __TRACYUWP_HPP__
#ifdef _WIN32
# include <winapifamily.h>
# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
# define TRACY_UWP
# endif
#endif
#endif
+1 -1
View File
@@ -6,7 +6,7 @@ namespace tracy
namespace Version
{
enum { Major = 0 };
enum { Minor = 11 };
enum { Minor = 13 };
enum { Patch = 1 };
}
}
@@ -0,0 +1,16 @@
#ifndef __TRACYWINFAMILY_HPP__
#define __TRACYWINFAMILY_HPP__
#ifdef _WIN32
# include <winapifamily.h>
# if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
# define TRACY_WIN32_NO_DESKTOP
# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_GAMES)
# define TRACY_GDK
# elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
# define TRACY_UWP
# endif
# endif
#endif
#endif
+35 -28
View File
@@ -725,8 +725,8 @@ struct dwarf_data
struct dwarf_data *next;
/* The data for .gnu_debugaltlink. */
struct dwarf_data *altlink;
/* The base address for this file. */
uintptr_t base_address;
/* The base address mapping for this file. */
struct libbacktrace_base_address base_address;
/* A sorted list of address ranges. */
struct unit_addrs *addrs;
/* Number of address ranges in list. */
@@ -1947,8 +1947,9 @@ update_pcrange (const struct attr* attr, const struct attr_val* val,
static int
add_low_high_range (struct backtrace_state *state,
const struct dwarf_sections *dwarf_sections,
uintptr_t base_address, int is_bigendian,
struct unit *u, const struct pcrange *pcrange,
struct libbacktrace_base_address base_address,
int is_bigendian, struct unit *u,
const struct pcrange *pcrange,
int (*add_range) (struct backtrace_state *state,
void *rdata, uintptr_t lowpc,
uintptr_t highpc,
@@ -1983,8 +1984,8 @@ add_low_high_range (struct backtrace_state *state,
/* Add in the base address of the module when recording PC values,
so that we can look up the PC directly. */
lowpc += base_address;
highpc += base_address;
lowpc = libbacktrace_add_base (lowpc, base_address);
highpc = libbacktrace_add_base (highpc, base_address);
return add_range (state, rdata, lowpc, highpc, error_callback, data, vec);
}
@@ -1996,7 +1997,7 @@ static int
add_ranges_from_ranges (
struct backtrace_state *state,
const struct dwarf_sections *dwarf_sections,
uintptr_t base_address, int is_bigendian,
struct libbacktrace_base_address base_address, int is_bigendian,
struct unit *u, uintptr_t base,
const struct pcrange *pcrange,
int (*add_range) (struct backtrace_state *state, void *rdata,
@@ -2042,10 +2043,11 @@ add_ranges_from_ranges (
base = (uintptr_t) high;
else
{
if (!add_range (state, rdata,
(uintptr_t) low + base + base_address,
(uintptr_t) high + base + base_address,
error_callback, data, vec))
uintptr_t rl, rh;
rl = libbacktrace_add_base ((uintptr_t) low + base, base_address);
rh = libbacktrace_add_base ((uintptr_t) high + base, base_address);
if (!add_range (state, rdata, rl, rh, error_callback, data, vec))
return 0;
}
}
@@ -2063,7 +2065,7 @@ static int
add_ranges_from_rnglists (
struct backtrace_state *state,
const struct dwarf_sections *dwarf_sections,
uintptr_t base_address, int is_bigendian,
struct libbacktrace_base_address base_address, int is_bigendian,
struct unit *u, uintptr_t base,
const struct pcrange *pcrange,
int (*add_range) (struct backtrace_state *state, void *rdata,
@@ -2146,9 +2148,10 @@ add_ranges_from_rnglists (
u->addrsize, is_bigendian, index,
error_callback, data, &high))
return 0;
if (!add_range (state, rdata, low + base_address,
high + base_address, error_callback, data,
vec))
if (!add_range (state, rdata,
libbacktrace_add_base (low, base_address),
libbacktrace_add_base (high, base_address),
error_callback, data, vec))
return 0;
}
break;
@@ -2165,7 +2168,7 @@ add_ranges_from_rnglists (
error_callback, data, &low))
return 0;
length = read_uleb128 (&rnglists_buf);
low += base_address;
low = libbacktrace_add_base (low, base_address);
if (!add_range (state, rdata, low, low + length,
error_callback, data, vec))
return 0;
@@ -2179,8 +2182,9 @@ add_ranges_from_rnglists (
low = read_uleb128 (&rnglists_buf);
high = read_uleb128 (&rnglists_buf);
if (!add_range (state, rdata, low + base + base_address,
high + base + base_address,
if (!add_range (state, rdata,
libbacktrace_add_base (low + base, base_address),
libbacktrace_add_base (high + base, base_address),
error_callback, data, vec))
return 0;
}
@@ -2197,9 +2201,10 @@ add_ranges_from_rnglists (
low = (uintptr_t) read_address (&rnglists_buf, u->addrsize);
high = (uintptr_t) read_address (&rnglists_buf, u->addrsize);
if (!add_range (state, rdata, low + base_address,
high + base_address, error_callback, data,
vec))
if (!add_range (state, rdata,
libbacktrace_add_base (low, base_address),
libbacktrace_add_base (high, base_address),
error_callback, data, vec))
return 0;
}
break;
@@ -2211,7 +2216,7 @@ add_ranges_from_rnglists (
low = (uintptr_t) read_address (&rnglists_buf, u->addrsize);
length = (uintptr_t) read_uleb128 (&rnglists_buf);
low += base_address;
low = libbacktrace_add_base (low, base_address);
if (!add_range (state, rdata, low, low + length,
error_callback, data, vec))
return 0;
@@ -2239,7 +2244,7 @@ add_ranges_from_rnglists (
static int
add_ranges (struct backtrace_state *state,
const struct dwarf_sections *dwarf_sections,
uintptr_t base_address, int is_bigendian,
struct libbacktrace_base_address base_address, int is_bigendian,
struct unit *u, uintptr_t base, const struct pcrange *pcrange,
int (*add_range) (struct backtrace_state *state, void *rdata,
uintptr_t lowpc, uintptr_t highpc,
@@ -2275,7 +2280,8 @@ add_ranges (struct backtrace_state *state,
read, 0 if there is some error. */
static int
find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
find_address_ranges (struct backtrace_state *state,
struct libbacktrace_base_address base_address,
struct dwarf_buf *unit_buf,
const struct dwarf_sections *dwarf_sections,
int is_bigendian, struct dwarf_data *altlink,
@@ -2430,7 +2436,8 @@ find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
on success, 0 on failure. */
static int
build_address_map (struct backtrace_state *state, uintptr_t base_address,
build_address_map (struct backtrace_state *state,
struct libbacktrace_base_address base_address,
const struct dwarf_sections *dwarf_sections,
int is_bigendian, struct dwarf_data *altlink,
backtrace_error_callback error_callback, void *data,
@@ -2649,7 +2656,7 @@ add_line (struct backtrace_state *state, struct dwarf_data *ddata,
/* Add in the base address here, so that we can look up the PC
directly. */
ln->pc = pc + ddata->base_address;
ln->pc = libbacktrace_add_base (pc, ddata->base_address);
ln->filename = filename;
ln->lineno = lineno;
@@ -4329,7 +4336,7 @@ dwarf_fileline (struct backtrace_state *state, uintptr_t pc,
static struct dwarf_data *
build_dwarf_data (struct backtrace_state *state,
uintptr_t base_address,
struct libbacktrace_base_address base_address,
const struct dwarf_sections *dwarf_sections,
int is_bigendian,
struct dwarf_data *altlink,
@@ -4387,7 +4394,7 @@ build_dwarf_data (struct backtrace_state *state,
int
backtrace_dwarf_add (struct backtrace_state *state,
uintptr_t base_address,
struct libbacktrace_base_address base_address,
const struct dwarf_sections *dwarf_sections,
int is_bigendian,
struct dwarf_data *fileline_altlink,
+47 -39
View File
@@ -75,7 +75,7 @@ namespace tracy
{
#ifdef TRACY_DEBUGINFOD
int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size );
int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size, const char* filename );
#endif
#if !defined(HAVE_DECL_STRNLEN) || !HAVE_DECL_STRNLEN
@@ -643,7 +643,7 @@ elf_symbol_search (const void *vkey, const void *ventry)
static int
elf_initialize_syminfo (struct backtrace_state *state,
uintptr_t base_address,
struct libbacktrace_base_address base_address,
const unsigned char *symtab_data, size_t symtab_size,
const unsigned char *strtab, size_t strtab_size,
backtrace_error_callback error_callback,
@@ -709,7 +709,8 @@ elf_initialize_syminfo (struct backtrace_state *state,
= *(const b_elf_addr *) (opd->data + (sym->st_value - opd->addr));
else
elf_symbols[j].address = sym->st_value;
elf_symbols[j].address += base_address;
elf_symbols[j].address =
libbacktrace_add_base (elf_symbols[j].address, base_address);
elf_symbols[j].size = sym->st_size;
++j;
}
@@ -1200,14 +1201,7 @@ elf_fetch_bits_backward (const unsigned char **ppin,
val = *pval;
if (unlikely (pin <= pinend))
{
if (bits == 0)
{
elf_uncompress_failed ();
return 0;
}
return 1;
}
return 1;
pin -= 4;
@@ -5712,10 +5706,10 @@ elf_uncompress_lzma_block (const unsigned char *compressed,
/* Block header CRC. */
computed_crc = elf_crc32 (0, compressed + block_header_offset,
block_header_size - 4);
stream_crc = (compressed[off]
| (compressed[off + 1] << 8)
| (compressed[off + 2] << 16)
| (compressed[off + 3] << 24));
stream_crc = ((uint32_t)compressed[off]
| ((uint32_t)compressed[off + 1] << 8)
| ((uint32_t)compressed[off + 2] << 16)
| ((uint32_t)compressed[off + 3] << 24));
if (unlikely (computed_crc != stream_crc))
{
elf_uncompress_failed ();
@@ -6222,10 +6216,10 @@ elf_uncompress_lzma_block (const unsigned char *compressed,
return 0;
}
computed_crc = elf_crc32 (0, uncompressed, uncompressed_offset);
stream_crc = (compressed[off]
| (compressed[off + 1] << 8)
| (compressed[off + 2] << 16)
| (compressed[off + 3] << 24));
stream_crc = ((uint32_t)compressed[off]
| ((uint32_t)compressed[off + 1] << 8)
| ((uint32_t)compressed[off + 2] << 16)
| ((uint32_t)compressed[off + 3] << 24));
if (computed_crc != stream_crc)
{
elf_uncompress_failed ();
@@ -6325,10 +6319,10 @@ elf_uncompress_lzma (struct backtrace_state *state,
/* Next comes a CRC of the stream flags. */
computed_crc = elf_crc32 (0, compressed + 6, 2);
stream_crc = (compressed[8]
| (compressed[9] << 8)
| (compressed[10] << 16)
| (compressed[11] << 24));
stream_crc = ((uint32_t)compressed[8]
| ((uint32_t)compressed[9] << 8)
| ((uint32_t)compressed[10] << 16)
| ((uint32_t)compressed[11] << 24));
if (unlikely (computed_crc != stream_crc))
{
elf_uncompress_failed ();
@@ -6369,10 +6363,10 @@ elf_uncompress_lzma (struct backtrace_state *state,
/* Before that is a footer CRC. */
computed_crc = elf_crc32 (0, compressed + offset, 6);
stream_crc = (compressed[offset - 4]
| (compressed[offset - 3] << 8)
| (compressed[offset - 2] << 16)
| (compressed[offset - 1] << 24));
stream_crc = ((uint32_t)compressed[offset - 4]
| ((uint32_t)compressed[offset - 3] << 8)
| ((uint32_t)compressed[offset - 2] << 16)
| ((uint32_t)compressed[offset - 1] << 24));
if (unlikely (computed_crc != stream_crc))
{
elf_uncompress_failed ();
@@ -6428,10 +6422,10 @@ elf_uncompress_lzma (struct backtrace_state *state,
/* Next is a CRC of the index. */
computed_crc = elf_crc32 (0, compressed + index_offset,
offset - index_offset);
stream_crc = (compressed[offset]
| (compressed[offset + 1] << 8)
| (compressed[offset + 2] << 16)
| (compressed[offset + 3] << 24));
stream_crc = ((uint32_t)compressed[offset]
| ((uint32_t)compressed[offset + 1] << 8)
| ((uint32_t)compressed[offset + 2] << 16)
| ((uint32_t)compressed[offset + 3] << 24));
if (unlikely (computed_crc != stream_crc))
{
elf_uncompress_failed ();
@@ -6524,7 +6518,8 @@ backtrace_uncompress_lzma (struct backtrace_state *state,
static int
elf_add (struct backtrace_state *state, const char *filename, int descriptor,
const unsigned char *memory, size_t memory_size,
uintptr_t base_address, struct elf_ppc64_opd_data *caller_opd,
struct libbacktrace_base_address base_address,
struct elf_ppc64_opd_data *caller_opd,
backtrace_error_callback error_callback, void *data,
fileline *fileline_fn, int *found_sym, int *found_dwarf,
struct dwarf_data **fileline_entry, int exe, int debuginfo,
@@ -6867,7 +6862,8 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor,
}
}
if (!gnu_debugdata_view_valid
if (!debuginfo
&& !gnu_debugdata_view_valid
&& strcmp (name, ".gnu_debugdata") == 0)
{
if (!elf_get_view (state, descriptor, memory, memory_size,
@@ -7425,6 +7421,7 @@ phdr_callback (struct PhdrIterate *info, void *pdata)
const char *filename;
int descriptor;
int does_not_exist;
struct libbacktrace_base_address base_address;
fileline elf_fileline_fn;
int found_dwarf;
@@ -7454,7 +7451,8 @@ phdr_callback (struct PhdrIterate *info, void *pdata)
return 0;
}
if (elf_add (pd->state, filename, descriptor, NULL, 0, info->dlpi_addr, NULL,
base_address.m = info->dlpi_addr;
if (elf_add (pd->state, filename, descriptor, NULL, 0, base_address, NULL,
pd->error_callback, pd->data, &elf_fileline_fn, pd->found_sym,
&found_dwarf, NULL, 0, 0, NULL, 0))
{
@@ -7543,11 +7541,21 @@ backtrace_initialize (struct backtrace_state *state, const char *filename,
fileline elf_fileline_fn = elf_nodebug;
struct phdr_data pd;
ret = elf_add (state, filename, descriptor, NULL, 0, 0, NULL, error_callback,
data, &elf_fileline_fn, &found_sym, &found_dwarf, NULL, 1, 0,
NULL, 0);
if (!ret)
return 0;
/* When using fdpic we must use dl_iterate_phdr for all modules, including
the main executable, so that we can get the right base address
mapping. */
if (!libbacktrace_using_fdpic ())
{
struct libbacktrace_base_address zero_base_address;
memset (&zero_base_address, 0, sizeof zero_base_address);
ret = elf_add (state, filename, descriptor, NULL, 0, zero_base_address,
NULL, error_callback, data, &elf_fileline_fn, &found_sym,
&found_dwarf, NULL, 1, 0, NULL, 0);
if (!ret)
return 0;
}
pd.state = state;
pd.error_callback = error_callback;
+35 -1
View File
@@ -333,10 +333,44 @@ struct dwarf_sections
struct dwarf_data;
/* The load address mapping. */
#if defined(__FDPIC__) && defined(HAVE_DL_ITERATE_PHDR) && (defined(HAVE_LINK_H) || defined(HAVE_SYS_LINK_H))
#ifdef HAVE_LINK_H
#include <link.h>
#endif
#ifdef HAVE_SYS_LINK_H
#include <sys/link.h>
#endif
#define libbacktrace_using_fdpic() (1)
struct libbacktrace_base_address
{
struct elf32_fdpic_loadaddr m;
};
#define libbacktrace_add_base(pc, base) \
((uintptr_t) (__RELOC_POINTER ((pc), (base).m)))
#else /* not _FDPIC__ */
#define libbacktrace_using_fdpic() (0)
struct libbacktrace_base_address
{
uintptr_t m;
};
#define libbacktrace_add_base(pc, base) ((pc) + (base).m)
#endif /* not _FDPIC__ */
/* Add file/line information for a DWARF module. */
extern int backtrace_dwarf_add (struct backtrace_state *state,
uintptr_t base_address,
struct libbacktrace_base_address base_address,
const struct dwarf_sections *dwarf_sections,
int is_bigendian,
struct dwarf_data *fileline_altlink,
+26 -19
View File
@@ -274,12 +274,14 @@ struct macho_nlist_64
/* Value found in nlist n_type field. */
#define MACH_O_N_EXT 0x01 /* Extern symbol */
#define MACH_O_N_ABS 0x02 /* Absolute symbol */
#define MACH_O_N_SECT 0x0e /* Defined in section */
#define MACH_O_N_TYPE 0x0e /* Mask for type bits */
#define MACH_O_N_STAB 0xe0 /* Stabs debugging symbol */
#define MACH_O_N_TYPE 0x0e /* Mask for type bits */
/* Values found after masking with MACH_O_N_TYPE. */
#define MACH_O_N_UNDF 0x00 /* Undefined symbol */
#define MACH_O_N_ABS 0x02 /* Absolute symbol */
#define MACH_O_N_SECT 0x0e /* Defined in section from n_sect field */
/* Information we keep for a Mach-O symbol. */
@@ -307,17 +309,18 @@ static const char * const dwarf_section_names[DEBUG_MAX] =
"__debug_abbrev",
"__debug_ranges",
"__debug_str",
"", /* DEBUG_ADDR */
"__debug_addr",
"__debug_str_offs",
"", /* DEBUG_LINE_STR */
"__debug_line_str",
"__debug_rnglists"
};
/* Forward declaration. */
static int macho_add (struct backtrace_state *, const char *, int, off_t,
const unsigned char *, uintptr_t, int,
backtrace_error_callback, void *, fileline *, int *);
const unsigned char *, struct libbacktrace_base_address,
int, backtrace_error_callback, void *, fileline *,
int *);
/* A dummy callback function used when we can't find any debug info. */
@@ -495,10 +498,10 @@ macho_defined_symbol (uint8_t type)
{
if ((type & MACH_O_N_STAB) != 0)
return 0;
if ((type & MACH_O_N_EXT) != 0)
return 0;
switch (type & MACH_O_N_TYPE)
{
case MACH_O_N_UNDF:
return 0;
case MACH_O_N_ABS:
return 1;
case MACH_O_N_SECT:
@@ -512,7 +515,7 @@ macho_defined_symbol (uint8_t type)
static int
macho_add_symtab (struct backtrace_state *state, int descriptor,
uintptr_t base_address, int is_64,
struct libbacktrace_base_address base_address, int is_64,
off_t symoff, unsigned int nsyms, off_t stroff,
unsigned int strsize,
backtrace_error_callback error_callback, void *data)
@@ -627,7 +630,7 @@ macho_add_symtab (struct backtrace_state *state, int descriptor,
if (name[0] == '_')
++name;
macho_symbols[j].name = name;
macho_symbols[j].address = value + base_address;
macho_symbols[j].address = libbacktrace_add_base (value, base_address);
++j;
}
@@ -760,7 +763,8 @@ macho_syminfo (struct backtrace_state *state, uintptr_t addr,
static int
macho_add_fat (struct backtrace_state *state, const char *filename,
int descriptor, int swapped, off_t offset,
const unsigned char *match_uuid, uintptr_t base_address,
const unsigned char *match_uuid,
struct libbacktrace_base_address base_address,
int skip_symtab, uint32_t nfat_arch, int is_64,
backtrace_error_callback error_callback, void *data,
fileline *fileline_fn, int *found_sym)
@@ -862,7 +866,8 @@ macho_add_fat (struct backtrace_state *state, const char *filename,
static int
macho_add_dsym (struct backtrace_state *state, const char *filename,
uintptr_t base_address, const unsigned char *uuid,
struct libbacktrace_base_address base_address,
const unsigned char *uuid,
backtrace_error_callback error_callback, void *data,
fileline* fileline_fn)
{
@@ -980,7 +985,7 @@ macho_add_dsym (struct backtrace_state *state, const char *filename,
static int
macho_add (struct backtrace_state *state, const char *filename, int descriptor,
off_t offset, const unsigned char *match_uuid,
uintptr_t base_address, int skip_symtab,
struct libbacktrace_base_address base_address, int skip_symtab,
backtrace_error_callback error_callback, void *data,
fileline *fileline_fn, int *found_sym)
{
@@ -1242,7 +1247,7 @@ backtrace_initialize (struct backtrace_state *state, const char *filename,
c = _dyld_image_count ();
for (i = 0; i < c; ++i)
{
uintptr_t base_address;
struct libbacktrace_base_address base_address;
const char *name;
int d;
fileline mff;
@@ -1266,7 +1271,7 @@ backtrace_initialize (struct backtrace_state *state, const char *filename,
continue;
}
base_address = _dyld_get_image_vmaddr_slide (i);
base_address.m = _dyld_get_image_vmaddr_slide (i);
mff = macho_nodebug;
if (!macho_add (state, name, d, 0, NULL, base_address, 0,
@@ -1321,10 +1326,12 @@ backtrace_initialize (struct backtrace_state *state, const char *filename,
void *data, fileline *fileline_fn)
{
fileline macho_fileline_fn;
struct libbacktrace_base_address zero_base_address;
int found_sym;
macho_fileline_fn = macho_nodebug;
if (!macho_add (state, filename, descriptor, 0, NULL, 0, 0,
memset (&zero_base_address, 0, sizeof zero_base_address);
if (!macho_add (state, filename, descriptor, 0, NULL, zero_base_address, 0,
error_callback, data, &macho_fileline_fn, &found_sym))
return 0;
+85 -107
View File
@@ -13,7 +13,7 @@
#endif
#ifndef TracyLine
# define TracyLine __LINE__
# define TracyLine TracyConcat(__LINE__,U) // MSVC Edit and continue __LINE__ is non-constant. See https://developercommunity.visualstudio.com/t/-line-cannot-be-used-as-an-argument-for-constexpr/195665
#endif
#ifndef TRACY_ENABLE
@@ -75,8 +75,10 @@
#define TracyAlloc(x,y)
#define TracyFree(x)
#define TracyMemoryDiscard(x)
#define TracySecureAlloc(x,y)
#define TracySecureFree(x)
#define TracySecureMemoryDiscard(x)
#define TracyAllocN(x,y,z)
#define TracyFreeN(x,y)
@@ -98,8 +100,10 @@
#define TracyAllocS(x,y,z)
#define TracyFreeS(x,y)
#define TracyMemoryDiscardS(x,y)
#define TracySecureAllocS(x,y,z)
#define TracySecureFreeS(x,y)
#define TracySecureMemoryDiscardS(x,y)
#define TracyAllocNS(x,y,z,w)
#define TracyFreeNS(x,y,z)
@@ -130,32 +134,49 @@
#include "../client/TracyProfiler.hpp"
#include "../client/TracyScoped.hpp"
#define TracyNoop tracy::ProfilerAvailable()
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active )
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active )
# define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active )
#else
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active )
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active )
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active )
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active )
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, active )
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active )
# define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, active )
#ifndef TRACY_CALLSTACK
#define TRACY_CALLSTACK 0
#endif
#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true )
#define ZoneScopedN( name ) ZoneNamedN( ___tracy_scoped_zone, name, true )
#define ZoneScopedC( color ) ZoneNamedC( ___tracy_scoped_zone, color, true )
#define ZoneScopedNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, true )
#define TracyNoop tracy::ProfilerAvailable()
#define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
#define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
#define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
#define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
#define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active )
#define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active )
#define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active )
#if defined(TRACY_ALLOW_SHADOW_WARNING)
#define SuppressVarShadowWarning(Expr) Expr
#elif defined(__clang__)
#define SuppressVarShadowWarning(Expr) \
_Pragma("clang diagnostic push") \
_Pragma("clang diagnostic ignored \"-Wshadow\"") \
Expr; \
_Pragma("clang diagnostic pop")
#elif defined(__GNUC__)
#define SuppressVarShadowWarning(Expr) \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wshadow\"") \
Expr; \
_Pragma("GCC diagnostic pop")
#elif defined(_MSC_VER)
#define SuppressVarShadowWarning(Expr) \
_Pragma("warning(push)") \
_Pragma("warning(disable : 4456)") \
Expr; \
_Pragma("warning(pop)")
#else
#define SuppressVarShadowWarning(Expr) Expr
#endif
#define ZoneScoped SuppressVarShadowWarning( ZoneNamed( ___tracy_scoped_zone, true ) )
#define ZoneScopedN( name ) SuppressVarShadowWarning( ZoneNamedN( ___tracy_scoped_zone, name, true ) )
#define ZoneScopedC( color ) SuppressVarShadowWarning( ZoneNamedC( ___tracy_scoped_zone, color, true ) )
#define ZoneScopedNC( name, color ) SuppressVarShadowWarning( ZoneNamedNC( ___tracy_scoped_zone, name, color, true ) )
#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size )
#define ZoneTextV( varname, txt, size ) varname.Text( txt, size )
@@ -185,7 +206,7 @@
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() }
#define LockableBase( type ) tracy::Lockable<type>
#define SharedLockableBase( type ) tracy::SharedLockable<type>
#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &__tracy_lock_location_##varname )
#define LockMark( varname ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_lock_location_,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &TracyConcat(__tracy_lock_location_,TracyLine) )
#define LockableName( varname, txt, size ) varname.CustomName( txt, size )
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val )
@@ -193,95 +214,52 @@
#define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size )
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK )
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK )
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK )
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK )
#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK )
#define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK )
#define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK )
#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK )
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false )
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false )
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true )
# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true )
#define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false )
#define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false )
#define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true )
#define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true )
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name )
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name )
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name )
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name )
#else
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 )
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 )
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 )
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 )
#define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name )
#define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name )
#define TracyMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, false, TRACY_CALLSTACK )
#define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name )
#define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name )
#define TracySecureMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, true, TRACY_CALLSTACK )
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false )
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false )
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true )
# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true )
#define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
#define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
#define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
#define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name )
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name )
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name )
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name )
#endif
#define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active )
#define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active )
#ifdef TRACY_HAS_CALLSTACK
# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
#define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
#define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
#define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
#define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active )
# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active )
#define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false )
#define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false )
#define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true )
#define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true )
# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
#define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name )
#define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name )
#define TracyMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, false, depth )
#define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name )
#define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name )
#define TracySecureMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, true, depth )
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false )
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false )
# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true )
# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true )
# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name )
# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name )
# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name )
# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name )
# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth )
# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth )
# define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth )
# define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth )
#else
# define ZoneNamedS( varname, depth, active ) ZoneNamed( varname, active )
# define ZoneNamedNS( varname, name, depth, active ) ZoneNamedN( varname, name, active )
# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active )
# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active )
# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active )
# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active )
# define ZoneScopedS( depth ) ZoneScoped
# define ZoneScopedNS( name, depth ) ZoneScopedN( name )
# define ZoneScopedCS( color, depth ) ZoneScopedC( color )
# define ZoneScopedNCS( name, color, depth ) ZoneScopedNC( name, color )
# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size )
# define TracyFreeS( ptr, depth ) TracyFree( ptr )
# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size )
# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr )
# define TracyAllocNS( ptr, size, depth, name ) TracyAllocN( ptr, size, name )
# define TracyFreeNS( ptr, depth, name ) TracyFreeN( ptr, name )
# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAllocN( ptr, size, name )
# define TracySecureFreeNS( ptr, depth, name ) TracySecureFreeN( ptr, name )
# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size )
# define TracyMessageLS( txt, depth ) TracyMessageL( txt )
# define TracyMessageCS( txt, size, color, depth ) TracyMessageC( txt, size, color )
# define TracyMessageLCS( txt, color, depth ) TracyMessageLC( txt, color )
#endif
#define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth )
#define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth )
#define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth )
#define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth )
#define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data )
#define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data )
+78 -102
View File
@@ -4,7 +4,6 @@
#include <stddef.h>
#include <stdint.h>
#include "../client/TracyCallstack.h"
#include "../common/TracyApi.h"
#ifdef __cplusplus
@@ -53,8 +52,10 @@ typedef const void* TracyCLockCtx;
#define TracyCAlloc(x,y)
#define TracyCFree(x)
#define TracyCMemoryDiscard(x)
#define TracyCSecureAlloc(x,y)
#define TracyCSecureFree(x)
#define TracyCSecureMemoryDiscard(x)
#define TracyCAllocN(x,y,z)
#define TracyCFreeN(x,y)
@@ -85,8 +86,10 @@ typedef const void* TracyCLockCtx;
#define TracyCAllocS(x,y,z)
#define TracyCFreeS(x,y)
#define TracyCMemoryDiscardS(x,y)
#define TracyCSecureAllocS(x,y,z)
#define TracyCSecureFreeS(x,y)
#define TracyCSecureMemoryDiscardS(x,y)
#define TracyCAllocNS(x,y,z,w)
#define TracyCFreeNS(x,y,z)
@@ -111,6 +114,9 @@ typedef const void* TracyCLockCtx;
#define TracyCIsConnected 0
#define TracyCIsStarted 0
#define TracyCBeginSamplingProfiling() 0
#define TracyCEndSamplingProfiling()
#ifdef TRACY_FIBERS
# define TracyCFiberEnter(fiber)
# define TracyCFiberLeave
@@ -137,7 +143,7 @@ struct ___tracy_source_location_data
struct ___tracy_c_zone_context
{
uint32_t id;
int active;
int32_t active;
};
struct ___tracy_gpu_time_data
@@ -155,7 +161,7 @@ struct ___tracy_gpu_zone_begin_data {
struct ___tracy_gpu_zone_begin_callstack_data {
uint64_t srcloc;
int depth;
int32_t depth;
uint16_t queryId;
uint8_t context;
};
@@ -201,7 +207,7 @@ typedef struct __tracy_lockable_context_data* TracyCLockCtx;
#ifdef TRACY_MANUAL_LIFETIME
TRACY_API void ___tracy_startup_profiler(void);
TRACY_API void ___tracy_shutdown_profiler(void);
TRACY_API int ___tracy_profiler_started(void);
TRACY_API int32_t ___tracy_profiler_started(void);
# define TracyCIsStarted ___tracy_profiler_started()
#else
@@ -211,10 +217,10 @@ TRACY_API int ___tracy_profiler_started(void);
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color );
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active );
TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx );
TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size );
TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size );
@@ -243,20 +249,17 @@ TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_
TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data );
TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data );
TRACY_API int ___tracy_connected(void);
TRACY_API int32_t ___tracy_connected(void);
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
#else
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active );
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active );
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active );
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active );
#ifndef TRACY_CALLSTACK
#define TRACY_CALLSTACK 0
#endif
#define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
#define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
#define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
#define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
#define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx );
#define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size );
@@ -265,57 +268,44 @@ TRACY_API int ___tracy_connected(void);
#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value );
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure );
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure );
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure );
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure );
TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name );
TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name );
TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name );
TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name );
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure );
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure );
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure );
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure );
TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name );
TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name );
TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name );
TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name );
TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure );
TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth );
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack );
TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack );
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack );
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack );
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth );
TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth );
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth );
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
#define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
#define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
#define TracyCMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 0, TRACY_CALLSTACK );
#define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
#define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
#define TracyCSecureMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 1, TRACY_CALLSTACK );
# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name )
# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name )
# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name )
# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name )
#define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name )
#define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name )
#define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name )
#define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name )
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
#else
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 );
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 );
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 );
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 );
# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 0, name );
# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 0, name );
# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 1, name );
# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 1, name );
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 );
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 );
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, 0 );
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, 0 );
#endif
#define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
#define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
#define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
#define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
TRACY_API void ___tracy_emit_frame_mark( const char* name );
TRACY_API void ___tracy_emit_frame_mark_start( const char* name );
TRACY_API void ___tracy_emit_frame_mark_end( const char* name );
TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip );
TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip );
#define TracyCFrameMark ___tracy_emit_frame_mark( 0 );
#define TracyCFrameMarkNamed( name ) ___tracy_emit_frame_mark( name );
@@ -327,7 +317,7 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_
TRACY_API void ___tracy_emit_plot( const char* name, double val );
TRACY_API void ___tracy_emit_plot_float( const char* name, float val );
TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val );
TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color );
TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color );
TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
#define TracyCPlot( name, val ) ___tracy_emit_plot( name, val );
@@ -337,55 +327,35 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
#define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size );
#ifdef TRACY_HAS_CALLSTACK
# define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
# define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
#define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
#define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
#define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
#define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
#define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
#define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
#define TracyCMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 0, depth )
#define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
#define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
#define TracyCSecureMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 1, depth )
# define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name )
# define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name )
# define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name )
# define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name )
#define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name )
#define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name )
#define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name )
#define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name )
# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
# define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth );
# define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth );
#else
# define TracyCZoneS( ctx, depth, active ) TracyCZone( ctx, active )
# define TracyCZoneNS( ctx, name, depth, active ) TracyCZoneN( ctx, name, active )
# define TracyCZoneCS( ctx, color, depth, active ) TracyCZoneC( ctx, color, active )
# define TracyCZoneNCS( ctx, name, color, depth, active ) TracyCZoneNC( ctx, name, color, active )
# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size )
# define TracyCFreeS( ptr, depth ) TracyCFree( ptr )
# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size )
# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr )
# define TracyCAllocNS( ptr, size, depth, name ) TracyCAllocN( ptr, size, name )
# define TracyCFreeNS( ptr, depth, name ) TracyCFreeN( ptr, name )
# define TracyCSecureAllocNS( ptr, size, depth, name ) TracyCSecureAllocN( ptr, size, name )
# define TracyCSecureFreeNS( ptr, depth, name ) TracyCSecureFreeN( ptr, name )
# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size )
# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt )
# define TracyCMessageCS( txt, size, color, depth ) TracyCMessageC( txt, size, color )
# define TracyCMessageLCS( txt, color, depth ) TracyCMessageLC( txt, color )
#endif
#define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
#define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
#define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth );
#define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth );
TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc );
TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
TRACY_API int ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int acquired );
TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired );
TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc );
TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz );
@@ -400,6 +370,12 @@ TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_contex
#define TracyCIsConnected ___tracy_connected()
TRACY_API int ___tracy_begin_sampling_profiler( void );
TRACY_API void ___tracy_end_sampling_profiler( void );
#define TracyCBeginSamplingProfiling() ___tracy_begin_sampling_profiling()
#define TracyCEndSamplingProfiling() ___tracy_end_sampling_profiling()
#ifdef TRACY_FIBERS
TRACY_API void ___tracy_fiber_enter( const char* fiber );
TRACY_API void ___tracy_fiber_leave( void );
File diff suppressed because it is too large Load Diff
+27 -7
View File
@@ -95,6 +95,10 @@ public:
int64_t tcpu0 = Profiler::GetTime();
WaitForQuery(m_disjointQuery);
// NOTE: one would expect that by waiting for the enclosing disjoint query to finish,
// all timestamp queries within would also be readily available, but that does not
// seem to be the case here... See https://github.com/wolfpld/tracy/issues/947
WaitForQuery(m_queries[0]);
int64_t tcpu1 = Profiler::GetTime();
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { };
@@ -109,7 +113,7 @@ public:
UINT64 timestamp = 0;
if (m_immediateDevCtx->GetData(m_queries[0], &timestamp, sizeof(timestamp), 0) != S_OK)
continue; // this should never happen, since the enclosing disjoint query succeeded
continue; // this should never happen (we waited for the query to finish above)
tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2;
tgpu = timestamp * (1000000000 / disjoint.Frequency);
@@ -307,13 +311,21 @@ public:
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcloc));
}
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool active )
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int32_t depth, bool active )
: D3D11ZoneScope(ctx, active)
{
if( !m_active ) return;
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcloc));
if( depth > 0 && has_callstack() )
{
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcloc));
}
else
{
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcloc));
}
}
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active)
@@ -327,15 +339,23 @@ public:
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
}
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool active)
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool active)
: D3D11ZoneScope(ctx, active)
{
if( !m_active ) return;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
if ( depth > 0 && has_callstack() )
{
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
}
else
{
auto* item = Profiler::QueueSerial();
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
}
}
tracy_force_inline ~D3D11ZoneScope()
+2 -2
View File
@@ -385,7 +385,7 @@ namespace tracy
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcLocation));
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active)
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int32_t depth, bool active)
: D3D12ZoneScope(ctx, cmdList, active)
{
if (!m_active) return;
@@ -405,7 +405,7 @@ namespace tracy
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active)
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int32_t depth, bool active)
: D3D12ZoneScope(ctx, cmdList, active)
{
if (!m_active) return;
+47 -7
View File
@@ -120,6 +120,8 @@ static inline void LuaRemove( char* script )
}
}
static inline void LuaHook( lua_State* L, lua_Debug* ar ) {}
}
#else
@@ -143,6 +145,13 @@ TRACY_API LuaZoneState& GetLuaZoneState();
namespace detail
{
static inline void LuaShortenSrc( char* dst, const char* src )
{
size_t l = std::min( (size_t)255, strlen( src ) );
memcpy( dst, src, l );
dst[l] = 0;
}
#ifdef TRACY_HAS_CALLSTACK
static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
{
@@ -188,13 +197,6 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
TracyQueueCommit( callstackAllocFatThread );
}
static inline void LuaShortenSrc( char* dst, const char* src )
{
size_t l = std::min( (size_t)255, strlen( src ) );
memcpy( dst, src, l );
dst[l] = 0;
}
static inline int LuaZoneBeginS( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
@@ -439,6 +441,44 @@ static inline void LuaRegister( lua_State* L )
static inline void LuaRemove( char* script ) {}
static inline void LuaHook( lua_State* L, lua_Debug* ar )
{
if ( ar->event == LUA_HOOKCALL )
{
#ifdef TRACY_ON_DEMAND
const auto zoneCnt = GetLuaZoneState().counter++;
if ( zoneCnt != 0 && !GetLuaZoneState().active ) return;
GetLuaZoneState().active = GetProfiler().IsConnected();
if ( !GetLuaZoneState().active ) return;
#endif
lua_getinfo( L, "Snl", ar );
char src[256];
detail::LuaShortenSrc( src, ar->short_src );
const auto srcloc = Profiler::AllocSourceLocation( ar->currentline, src, ar->name ? ar->name : ar->short_src );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
}
else if (ar->event == LUA_HOOKRET) {
#ifdef TRACY_ON_DEMAND
assert( GetLuaZoneState().counter != 0 );
GetLuaZoneState().counter--;
if ( !GetLuaZoneState().active ) return;
if ( !GetProfiler().IsConnected() )
{
GetLuaZoneState().active = false;
return;
}
#endif
TracyQueuePrepare( QueueType::ZoneEnd );
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
TracyQueueCommit( zoneEndThread );
}
}
}
#endif
+644
View File
@@ -0,0 +1,644 @@
#ifndef __TRACYMETAL_HMM__
#define __TRACYMETAL_HMM__
/* This file implements a Metal API back-end for Tracy (it has only been tested on Apple
Silicon devices, but it should also work on Intel-based Macs and older iOS devices).
The Metal back-end in Tracy operates differently than other GPU back-ends like Vulkan,
Direct3D and OpenGL. Specifically, TracyMetalZone() must be placed around the site where
a command encoder is created. This is because not all hardware supports timestamps at
command granularity, and can only provide timestamps around an entire command encoder.
This accommodates for all tiers of hardware; in the future, variants of TracyMetalZone()
will be added to support the habitual command-level granularity of Tracy GPU back-ends.
Metal also imposes a few restrictions that make the process of requesting and collecting
queries more complicated in Tracy:
a) timestamp query buffers are limited to 4096 queries (32KB, where each query is 8 bytes)
b) when a timestamp query buffer is created, Metal initializes all timestamps with zeroes,
and there's no way to reset them back to zero after timestamps get resolved; the only
way to clear the timestamps is by allocating a new timestamp query buffer
c) if a command encoder records no commands and its corresponding command buffer ends up
committed to the command queue, Metal will "optimize-away" the encoder along with any
timestamp queries associated with it (the timestamp will remain as zero and will never
get resolved)
Because of the limitations above, two timestamp buffers are managed internally. Once one
of the buffers fills up with requests, the second buffer can start serving new requests.
Once all requests in a buffer get resolved and collected, the entire buffer is discarded
and a new one allocated for future requests. (Proper cycling through a ring buffer would
require bookkeeping and completion handlers to collect only the known complete queries.)
In the current implementation, there is potential for a race condition when the buffer is
discarded and reallocated. In practice, the race condition will never materialize so long
as TracyMetalCollect() is called frequently to keep the amount of unresolved queries low.
Finally, there's a timeout mechanism during timestamp collection to detect "empty" command
encoders and ensure progress.
*/
#ifndef TRACY_ENABLE
#define TracyMetalContext(device) nullptr
#define TracyMetalDestroy(ctx)
#define TracyMetalContextName(ctx, name, size)
#define TracyMetalZone(ctx, encoderDesc, name)
#define TracyMetalZoneC(ctx, encoderDesc, name, color)
#define TracyMetalNamedZone(ctx, varname, encoderDesc, name, active)
#define TracyMetalNamedZoneC(ctx, varname, encoderDesc, name, color, active)
#define TracyMetalCollect(ctx)
namespace tracy
{
class MetalZoneScope {};
}
using TracyMetalCtx = void;
#else
#if not __has_feature(objc_arc)
#error TracyMetal requires ARC to be enabled.
#endif
#include <atomic>
#include <cassert>
#include <cstdlib>
#include "Tracy.hpp"
#include "../client/TracyProfiler.hpp"
#include "../client/TracyCallstack.hpp"
#include "../common/TracyAlign.hpp"
#include "../common/TracyAlloc.hpp"
// ok to import if in obj-c code
#import <Metal/Metal.h>
#define TRACY_METAL_VA_ARGS(...) , ##__VA_ARGS__
#define TracyMetalPanic(ret, msg, ...) do { \
char buffer [1024]; \
snprintf(buffer, sizeof(buffer), "TracyMetal: " msg TRACY_METAL_VA_ARGS(__VA_ARGS__)); \
TracyMessageC(buffer, strlen(buffer), tracy::Color::OrangeRed); \
fprintf(stderr, "%s\n", buffer); \
ret; \
} while(false);
#ifndef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
#define TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT 0.200f
#endif//TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
#ifndef TRACY_METAL_DEBUG_MASK
#define TRACY_METAL_DEBUG_MASK (0)
#endif//TRACY_METAL_DEBUG_MASK
#if TRACY_METAL_DEBUG_MASK
#define TracyMetalDebugMasked(mask, ...) if constexpr (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; }
#else
#define TracyMetalDebugMasked(mask, ...)
#endif
#if TRACY_METAL_DEBUG_MASK & (1 << 1)
#define TracyMetalDebug_0b00010(...) __VA_ARGS__;
#else
#define TracyMetalDebug_0b00010(...)
#endif
#if TRACY_METAL_DEBUG_MASK & (1 << 4)
#define TracyMetalDebug_0b10000(...) __VA_ARGS__;
#else
#define TracyMetalDebug_0b10000(...)
#endif
#ifndef TracyMetalDebugZoneScopeWireTap
#define TracyMetalDebugZoneScopeWireTap
#endif//TracyMetalDebugZoneScopeWireTap
namespace tracy
{
class MetalCtx
{
friend class MetalZoneScope;
enum { MaxQueries = 4 * 1024 }; // Metal: between 8 and 32768 _BYTES_...
public:
static MetalCtx* Create(id<MTLDevice> device)
{
ZoneScopedNC("tracy::MetalCtx::Create", Color::Red4);
auto ctx = static_cast<MetalCtx*>(tracy_malloc(sizeof(MetalCtx)));
new (ctx) MetalCtx(device);
if (ctx->m_contextId == 255)
{
TracyMetalPanic({assert(false);} return nullptr, "ERROR: unable to create context.");
Destroy(ctx);
}
return ctx;
}
static void Destroy(MetalCtx* ctx)
{
ZoneScopedNC("tracy::MetalCtx::Destroy", Color::Red4);
ctx->~MetalCtx();
tracy_free(ctx);
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_contextId );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
SubmitQueueItem(item);
}
bool Collect()
{
ZoneScopedNC("tracy::MetalCtx::Collect", Color::Red4);
#ifdef TRACY_ON_DEMAND
if (!GetProfiler().IsConnected())
{
return true;
}
#endif
// Only one thread is allowed to collect timestamps at any given time
// but there's no need to block contending threads
if (!m_collectionMutex.try_lock())
{
return true;
}
std::unique_lock lock (m_collectionMutex, std::adopt_lock);
uintptr_t begin = m_previousCheckpoint.load();
uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?;
TracyMetalDebugMasked(1<<3, ZoneValue(begin));
TracyMetalDebugMasked(1<<3, ZoneValue(latestCheckpoint));
uint32_t count = RingCount(begin, latestCheckpoint);
if (count == 0) // no pending timestamp queries
{
//uintptr_t nextCheckpoint = m_queryCounter.load();
//if (nextCheckpoint != latestCheckpoint)
//{
// // TODO: signal event / fence now?
//}
return true;
}
// resolve up until the ring buffer boundary and let a subsequenty call
// to Collect handle the wrap-around
bool reallocateBuffer = false;
if (RingIndex(begin) + count >= RingSize())
{
count = RingSize() - RingIndex(begin);
reallocateBuffer = true;
}
TracyMetalDebugMasked(1<<3, ZoneValue(count));
auto buffer_idx = (begin / MaxQueries) % 2;
auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx];
if (count >= RingSize())
{
TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count);
}
TracyMetalDebugMasked(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count));
NSRange range = NSMakeRange(RingIndex(begin), count);
NSData* data = [counterSampleBuffer resolveCounterRange:range];
NSUInteger numResolvedTimestamps = data.length / sizeof(MTLCounterResultTimestamp);
MTLCounterResultTimestamp* timestamps = (MTLCounterResultTimestamp *)(data.bytes);
if (timestamps == nil)
{
TracyMetalPanic(return false, "Collect: unable to resolve timestamps.");
}
if (numResolvedTimestamps != count)
{
TracyMetalPanic(, "Collect: numResolvedTimestamps != count : %u != %u", (uint32_t)numResolvedTimestamps, count);
}
int resolved = 0;
for (auto i = 0; i < numResolvedTimestamps; i += 2)
{
TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::[i]") );
MTLTimestamp t_start = timestamps[i+0].timestamp;
MTLTimestamp t_end = timestamps[i+1].timestamp;
uint32_t k = RingIndex(begin + i);
TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start)));
if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue))
{
TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k);
break;
}
// Metal will initialize timestamp buffer with zeroes; encountering a zero-value
// timestamp means that the timestamp has not been written and resolved yet
if ((t_start == 0) || (t_end == 0))
{
auto checkTime = std::chrono::high_resolution_clock::now();
auto requestTime = m_timestampRequestTime[k];
auto ms_in_flight = std::chrono::duration<float>(checkTime-requestTime).count()*1000.0f;
TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight));
const float timeout_ms = TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT * 1000.0f;
if (ms_in_flight < timeout_ms)
break;
TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::Drop") );
TracyMetalPanic(, "Collect: giving up on timestamp at %u [%.0fms in flight].", k, ms_in_flight);
t_start = m_mostRecentTimestamp + 5;
t_end = t_start + 5;
}
TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone"));
TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone"));
{
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_start));
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k));
MemWrite(&item->gpuTime.context, m_contextId);
Profiler::QueueSerialFinish();
}
{
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_end));
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k+1));
MemWrite(&item->gpuTime.context, m_contextId);
Profiler::QueueSerialFinish();
}
m_mostRecentTimestamp = (t_end > m_mostRecentTimestamp) ? t_end : m_mostRecentTimestamp;
TracyMetalDebugMasked(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId"));
resolved += 2;
}
TracyMetalDebugMasked(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load())));
m_previousCheckpoint += resolved;
// Check whether the timestamp buffer has been fully resolved/collected:
// WARN: there's technically a race condition here: NextQuery() may reference the
// buffer that is being released instead of the new one. In practice, this should
// never happen so long as Collect is called frequently enough to prevent pending
// timestamp query requests from piling up too quickly.
if ((resolved == count) && (m_previousCheckpoint.load() % MaxQueries) == 0)
{
m_counterSampleBuffers[buffer_idx] = NewTimestampSampleBuffer(m_device, MaxQueries);
}
//RecalibrateClocks(); // to account for drift
return true;
}
private:
MetalCtx(id<MTLDevice> device)
: m_device(device)
{
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue));
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample));
if (m_device == nil)
{
TracyMetalPanic({assert(false);} return, "device is nil.");
}
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary])
{
TracyMetalPanic({assert(false);} return, "ERROR: timestamp sampling at pipeline stage boundary is not supported.");
}
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary])
{
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n"));
}
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary])
{
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n"));
}
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary])
{
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n"));
}
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary])
{
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n"));
}
m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries);
m_counterSampleBuffers[1] = NewTimestampSampleBuffer(m_device, MaxQueries);
m_timestampRequestTime.resize(MaxQueries);
MTLTimestamp cpuTimestamp = 0;
MTLTimestamp gpuTimestamp = 0;
[m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp];
m_mostRecentTimestamp = gpuTimestamp;
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp));
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp));
cpuTimestamp = Profiler::GetTime();
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp));
float period = 1.0f;
m_contextId = GetGpuCtxCounter().fetch_add(1);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp));
MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp));
MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()?
MemWrite(&item->gpuNewContext.period, period);
MemWrite(&item->gpuNewContext.context, m_contextId);
//MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0));
MemWrite(&item->gpuNewContext.type, GpuContextType::Metal);
SubmitQueueItem(item);
}
~MetalCtx()
{
// collect the last remnants of Metal GPU activity...
// TODO: add a timeout to this loop?
while (m_previousCheckpoint.load() != m_queryCounter.load())
Collect();
}
tracy_force_inline void SubmitQueueItem(QueueItem* item)
{
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem(*item);
#endif
Profiler::QueueSerialFinish();
}
tracy_force_inline uint32_t RingIndex(uintptr_t index)
{
index %= MaxQueries;
return static_cast<uint32_t>(index);
}
tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end)
{
// wrap-around safe: all unsigned
uintptr_t count = end - begin;
return static_cast<uint32_t>(count);
}
tracy_force_inline uint32_t RingSize() const
{
return MaxQueries;
}
struct Query { id<MTLCounterSampleBuffer> buffer; uint32_t idx; };
tracy_force_inline Query NextQuery()
{
TracyMetalDebug_0b00010( ZoneScopedNC("Tracy::MetalCtx::NextQuery", tracy::Color::LightCoral) );
auto id = m_queryCounter.fetch_add(2);
TracyMetalDebug_0b00010( ZoneValue(id) );
auto count = RingCount(m_previousCheckpoint, id);
if (count >= MaxQueries)
{
// TODO: return a proper (hidden) "sentinel" query
Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 };
TracyMetalPanic(
return sentinel,
"NextQueryId: FULL! too many pending timestamp queries. Consider calling TracyMetalCollect() more frequently. [%llu, %llu] (%u)",
m_previousCheckpoint.load(), id, count
);
}
uint32_t buffer_idx = (id / MaxQueries) % 2;
TracyMetalDebug_0b00010( ZoneValue(buffer_idx) );
auto buffer = m_counterSampleBuffers[buffer_idx];
if (buffer == nil)
TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id);
uint32_t idx = RingIndex(id);
TracyMetalDebug_0b00010( ZoneValue(idx) );
TracyMetalDebug_0b00010( TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId") );
m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now();
return Query{ buffer, idx };
}
tracy_force_inline uint8_t GetContextId() const
{
return m_contextId;
}
static id<MTLCounterSampleBuffer> NewTimestampSampleBuffer(id<MTLDevice> device, size_t count)
{
ZoneScopedN("tracy::MetalCtx::NewTimestampSampleBuffer");
id<MTLCounterSet> timestampCounterSet = nil;
for (id<MTLCounterSet> counterSet in device.counterSets)
{
if ([counterSet.name isEqualToString:MTLCommonCounterSetTimestamp])
{
timestampCounterSet = counterSet;
break;
}
}
if (timestampCounterSet == nil)
{
TracyMetalPanic({assert(false);} return nil, "ERROR: timestamp counters are not supported on the platform.");
}
MTLCounterSampleBufferDescriptor* sampleDescriptor = [[MTLCounterSampleBufferDescriptor alloc] init];
sampleDescriptor.counterSet = timestampCounterSet;
sampleDescriptor.sampleCount = MaxQueries;
sampleDescriptor.storageMode = MTLStorageModeShared;
sampleDescriptor.label = @"TracyMetalTimestampPool";
NSError* error = nil;
id<MTLCounterSampleBuffer> counterSampleBuffer = [device newCounterSampleBufferWithDescriptor:sampleDescriptor error:&error];
if (error != nil)
{
//NSLog(@"%@ | %@", error.localizedDescription, error.localizedFailureReason);
TracyMetalPanic({assert(false);} return nil,
"ERROR: unable to create sample buffer for timestamp counters : %s | %s",
[error.localizedDescription cString], [error.localizedFailureReason cString]);
}
return counterSampleBuffer;
}
uint8_t m_contextId = 255;
id<MTLDevice> m_device = nil;
id<MTLCounterSampleBuffer> m_counterSampleBuffers [2] = {};
using atomic_counter = std::atomic<uintptr_t>;
static_assert(atomic_counter::is_always_lock_free);
atomic_counter m_queryCounter = 0;
atomic_counter m_previousCheckpoint = 0;
MTLTimestamp m_mostRecentTimestamp = 0;
std::vector<std::chrono::high_resolution_clock::time_point> m_timestampRequestTime;
std::mutex m_collectionMutex;
};
class MetalZoneScope
{
public:
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLComputePassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if ( !m_active ) return;
if (desc == nil) TracyMetalPanic({assert(false);} return, "compute pass descriptor is nil.");
m_ctx = ctx;
auto& query = m_query = ctx->NextQuery();
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0;
desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1;
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
}
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLBlitPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if ( !m_active ) return;
if (desc == nil) TracyMetalPanic({assert(false); }return, "blit pass descriptor is nil.");
m_ctx = ctx;
auto& query = m_query = ctx->NextQuery();
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0;
desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1;
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
}
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLRenderPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if ( !m_active ) return;
if (desc == nil) TracyMetalPanic({assert(false);} return, "render pass descriptor is nil.");
m_ctx = ctx;
auto& query = m_query = ctx->NextQuery();
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
desc.sampleBufferAttachments[0].startOfVertexSampleIndex = query.idx+0;
desc.sampleBufferAttachments[0].endOfVertexSampleIndex = MTLCounterDontSample;
desc.sampleBufferAttachments[0].startOfFragmentSampleIndex = MTLCounterDontSample;
desc.sampleBufferAttachments[0].endOfFragmentSampleIndex = query.idx+1;
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
}
/* TODO: implement this constructor interfarce for "command-level" profiling, if the device supports it
tracy_force_inline MetalZoneScope( MetalCtx* ctx, id<MTLComputeCommandEncoder> cmdEncoder, const SourceLocationData* srcloc, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_ctx = ctx;
m_cmdEncoder = cmdEncoder;
auto& query = m_query = ctx->NextQueryId();
[m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:query.idx withBarrier:YES];
SubmitZoneBeginGpu(ctx, query.idx, srcloc);
}
*/
tracy_force_inline ~MetalZoneScope()
{
if( !m_active ) return;
SubmitZoneEndGpu(m_ctx, m_query.idx + 1);
}
TracyMetalDebugZoneScopeWireTap;
private:
const bool m_active;
MetalCtx* m_ctx;
/* TODO: declare it for "command-level" profiling
id<MTLComputeCommandEncoder> m_cmdEncoder;
*/
static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc)
{
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() );
Profiler::QueueSerialFinish();
TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone"));
}
static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId)
{
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() );
Profiler::QueueSerialFinish();
TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone"));
}
MetalCtx::Query m_query = {};
};
}
using TracyMetalCtx = tracy::MetalCtx;
#define TracyMetalContext(device) tracy::MetalCtx::Create(device)
#define TracyMetalDestroy(ctx) tracy::MetalCtx::Destroy(ctx)
#define TracyMetalContextName(ctx, name, size) ctx->Name(name, size)
#define TracyMetalZone( ctx, encoderDesc, name ) TracyMetalNamedZone( ctx, ___tracy_gpu_zone, encoderDesc, name, true )
#define TracyMetalZoneC( ctx, encoderDesc, name, color ) TracyMetalNamedZoneC( ctx, ___tracy_gpu_zone, encoderDesc, name, color, true )
#define TracyMetalNamedZone( ctx, varname, encoderDesc, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
#define TracyMetalNamedZoneC( ctx, varname, encoderDesc, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
#define TracyMetalCollect( ctx ) ctx->Collect();
#undef TracyMetalDebug_ZoneScopeWireTap
#undef TracyMetalDebug_0b00010
#undef TracyMetalDebug_0b10000
#undef TracyMetalDebugMasked
#undef TRACY_METAL_DEBUG_MASK
#undef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
#undef TracyMetalPanic
#undef TRACY_METAL_VA_ARGS
#endif
#endif//__TRACYMETAL_HMM__
+2 -2
View File
@@ -255,7 +255,7 @@ namespace tracy {
Profiler::QueueSerialFinish();
}
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int depth, bool is_active)
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int32_t depth, bool is_active)
#ifdef TRACY_ON_DEMAND
: m_active(is_active&& GetProfiler().IsConnected())
#else
@@ -304,7 +304,7 @@ namespace tracy {
Profiler::QueueSerialFinish();
}
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active)
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active)
#ifdef TRACY_ON_DEMAND
: m_active(is_active && GetProfiler().IsConnected())
#else
+3 -3
View File
@@ -25,7 +25,7 @@ class GpuCtxScope
{
public:
GpuCtxScope( const SourceLocationData*, bool ) {}
GpuCtxScope( const SourceLocationData*, int, bool ) {}
GpuCtxScope( const SourceLocationData*, int32_t, bool ) {}
};
}
@@ -222,7 +222,7 @@ public:
TracyLfqCommit;
}
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active )
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int32_t depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
@@ -271,7 +271,7 @@ public:
TracyLfqCommit;
}
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active )
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
+33 -9
View File
@@ -16,6 +16,7 @@
#define TracyVkZoneC(c,x,y,z)
#define TracyVkZoneTransient(c,x,y,z,w)
#define TracyVkCollect(c,x)
#define TracyVkCollectHost(c)
#define TracyVkNamedZoneS(c,x,y,z,w,a)
#define TracyVkNamedZoneCS(c,x,y,z,w,v,a)
@@ -256,7 +257,9 @@ public:
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() )
{
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) );
cmdbuf ?
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ) :
VK_FUNCTION_WRAPPER( vkResetQueryPool( m_device, m_query, 0, m_queryCount ) );
m_tail = head;
m_oldCnt = 0;
int64_t tgpu;
@@ -265,7 +268,7 @@ public:
}
#endif
assert( head > m_tail );
const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount );
unsigned int cnt;
@@ -325,7 +328,9 @@ public:
}
}
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) );
cmdbuf ?
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) ) :
VK_FUNCTION_WRAPPER( vkResetQueryPool( m_device, m_query, wrappedTail, cnt ) );
m_tail += cnt;
}
@@ -531,7 +536,7 @@ public:
Profiler::QueueSerialFinish();
}
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth, bool is_active )
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int32_t depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
@@ -545,8 +550,17 @@ public:
const auto queryId = ctx->NextQueryId();
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
QueueItem *item;
if( depth > 0 && has_callstack() )
{
item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
}
else
{
item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
}
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
@@ -580,7 +594,7 @@ public:
Profiler::QueueSerialFinish();
}
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int depth, bool is_active )
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int32_t depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
@@ -595,8 +609,17 @@ public:
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
QueueItem *item;
if( depth > 0 && has_callstack() )
{
item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
}
else
{
item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
}
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
@@ -703,6 +726,7 @@ using TracyVkCtx = tracy::VkCtx*;
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, active );
#endif
#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf );
#define TracyVkCollectHost( ctx ) ctx->Collect( VK_NULL_HANDLE );
#ifdef TRACY_HAS_CALLSTACK
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active );