update tracy from 11.0 to 13.1 and fix build with tracy enabled

This commit is contained in:
Sven Balzer
2026-05-01 18:24:04 +02:00
parent 7fa5294e02
commit 2adf75973a
304 changed files with 20579 additions and 170182 deletions
+272 -206
View File
@@ -24,15 +24,33 @@
# pragma warning( disable : 4091 )
# endif
# include <dbghelp.h>
# pragma comment( lib, "dbghelp.lib" )
# ifdef _MSC_VER
# pragma warning( pop )
# endif
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
#elif defined(TRACY_USE_LIBBACKTRACE)
# include "../libbacktrace/backtrace.hpp"
# include <algorithm>
# include <dlfcn.h>
# include <cxxabi.h>
# include <stdlib.h>
// Implementation files
# include "../libbacktrace/alloc.cpp"
# include "../libbacktrace/dwarf.cpp"
# include "../libbacktrace/fileline.cpp"
# include "../libbacktrace/mmapio.cpp"
# include "../libbacktrace/posix.cpp"
# include "../libbacktrace/sort.cpp"
# include "../libbacktrace/state.cpp"
# if TRACY_HAS_CALLSTACK == 4
# include "../libbacktrace/macho.cpp"
# else
# include "../libbacktrace/elf.cpp"
# endif
# include "../common/TracyStackFrames.cpp"
#elif TRACY_HAS_CALLSTACK == 5
# include <dlfcn.h>
# include <cxxabi.h>
@@ -53,7 +71,7 @@ extern "C"
};
#endif
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 5 || TRACY_HAS_CALLSTACK == 6
#if defined(TRACY_USE_LIBBACKTRACE) || TRACY_HAS_CALLSTACK == 5
// If you want to use your own demangling functionality (e.g. for another language),
// define TRACY_DEMANGLE and provide your own implementation of the __tracy_demangle
// function. The input parameter is a function name. The demangle function must
@@ -91,94 +109,147 @@ extern "C" const char* ___tracy_demangle( const char* mangled )
#endif
#endif
#if TRACY_HAS_CALLSTACK == 3
# define TRACY_USE_IMAGE_CACHE
#if defined(TRACY_USE_LIBBACKTRACE) && TRACY_HAS_CALLSTACK != 4 // dl_iterate_phdr is required for the current image cache. Need to move it to libbacktrace?
# define TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
# include <link.h>
#endif
namespace tracy
{
#ifdef TRACY_USE_IMAGE_CACHE
// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths
// so we can quickly determine which image an address falls into.
// We refresh this cache only when we hit an address that doesn't fall into any known range.
static bool IsKernelAddress(uint64_t addr) {
return (addr >> 63) != 0;
}
void DestroyImageEntry( ImageEntry& entry )
{
tracy_free( entry.m_path );
tracy_free( entry.m_name );
}
class ImageCache
{
public:
struct ImageEntry
ImageCache( size_t imageCacheCapacity = 512 )
: m_images( imageCacheCapacity )
{
void* m_startAddress = nullptr;
void* m_endAddress = nullptr;
char* m_name = nullptr;
};
ImageCache()
: m_images( 512 )
{
Refresh();
}
~ImageCache()
{
Clear();
}
const ImageEntry* GetImageForAddress( void* address )
ImageEntry* AddEntry( const ImageEntry& entry )
{
const ImageEntry* entry = GetImageForAddressImpl( address );
if( m_sorted ) m_sorted = m_images.empty() || ( entry.m_startAddress < m_images.back().m_startAddress );
ImageEntry* newEntry = m_images.push_next();
*newEntry = entry;
return newEntry;
}
const ImageEntry* GetImageForAddress( uint64_t address )
{
Sort();
auto it = std::lower_bound( m_images.begin(), m_images.end(), address,
[]( const ImageEntry& lhs, const uint64_t rhs ) { return lhs.m_startAddress > rhs; } );
if( it != m_images.end() && address < it->m_endAddress )
{
return it;
}
return nullptr;
}
void Sort()
{
if( m_sorted ) return;
std::sort( m_images.begin(), m_images.end(),
[]( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } );
m_sorted = true;
}
void Clear()
{
for( ImageEntry& entry : m_images )
{
DestroyImageEntry( entry );
}
m_sorted = true;
m_images.clear();
}
bool ContainsImage( uint64_t startAddress ) const
{
return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } );
}
protected:
tracy::FastVector<ImageEntry> m_images;
bool m_sorted = true;
};
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths
// so we can quickly determine which image an address falls into.
// We refresh this cache only when we hit an address that doesn't fall into any known range.
class ImageCacheDlIteratePhdr : public ImageCache
{
public:
ImageCacheDlIteratePhdr()
{
Refresh();
}
~ImageCacheDlIteratePhdr()
{
}
const ImageEntry* GetImageForAddress( uint64_t address )
{
const ImageEntry* entry = ImageCache::GetImageForAddress( address );
if( !entry )
{
Refresh();
return GetImageForAddressImpl( address );
return ImageCache::GetImageForAddress( address );
}
return entry;
}
private:
tracy::FastVector<ImageEntry> m_images;
bool m_updated = false;
bool m_haveMainImageName = false;
static int Callback( struct dl_phdr_info* info, size_t size, void* data )
{
ImageCache* cache = reinterpret_cast<ImageCache*>( data );
ImageCacheDlIteratePhdr* cache = reinterpret_cast<ImageCacheDlIteratePhdr*>( data );
const auto startAddress = reinterpret_cast<void*>( info->dlpi_addr );
if( cache->Contains( startAddress ) ) return 0;
const auto startAddress = static_cast<uint64_t>( info->dlpi_addr );
if( cache->ContainsImage( startAddress ) ) return 0;
const uint32_t headerCount = info->dlpi_phnum;
assert( headerCount > 0);
const auto endAddress = reinterpret_cast<void*>( info->dlpi_addr +
const auto endAddress = static_cast<uint64_t>( info->dlpi_addr +
info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz);
ImageEntry* image = cache->m_images.push_next();
image->m_startAddress = startAddress;
image->m_endAddress = endAddress;
ImageEntry image{};
image.m_startAddress = startAddress;
image.m_endAddress = endAddress;
// the base executable name isn't provided when iterating with dl_iterate_phdr,
// we will have to patch the executable image name outside this callback
if( info->dlpi_name && info->dlpi_name[0] != '\0' )
{
size_t sz = strlen( info->dlpi_name ) + 1;
image->m_name = (char*)tracy_malloc( sz );
memcpy( image->m_name, info->dlpi_name, sz );
}
else
{
image->m_name = nullptr;
}
image.m_name = info->dlpi_name && info->dlpi_name[0] != '\0' ? CopyStringFast( info->dlpi_name ) : nullptr;
cache->AddEntry( image );
cache->m_updated = true;
return 0;
}
bool Contains( void* startAddress ) const
{
return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } );
}
void Refresh()
{
m_updated = false;
@@ -186,9 +257,7 @@ private:
if( m_updated )
{
std::sort( m_images.begin(), m_images.end(),
[]( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } );
Sort();
// patch the main executable image name here, as calling dl_* functions inside the dl_iterate_phdr callback might cause deadlocks
UpdateMainImageName();
}
@@ -223,31 +292,45 @@ private:
m_haveMainImageName = true;
}
const ImageEntry* GetImageForAddressImpl( void* address ) const
{
auto it = std::lower_bound( m_images.begin(), m_images.end(), address,
[]( const ImageEntry& lhs, const void* rhs ) { return lhs.m_startAddress > rhs; } );
if( it != m_images.end() && address < it->m_endAddress )
{
return it;
}
return nullptr;
}
void Clear()
{
for( ImageEntry& entry : m_images )
{
tracy_free( entry.m_name );
}
m_images.clear();
ImageCache::Clear();
m_haveMainImageName = false;
}
};
#endif //#ifdef TRACY_USE_IMAGE_CACHE
using UserlandImageCache = ImageCacheDlIteratePhdr;
#else
using UserlandImageCache = ImageCache;
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
static UserlandImageCache* s_imageCache;
static ImageCache* s_krnlCache;
void CreateImageCaches()
{
assert( s_imageCache == nullptr && s_krnlCache == nullptr );
s_imageCache = new ( tracy_malloc( sizeof( UserlandImageCache ) ) ) UserlandImageCache();
s_krnlCache = new ( tracy_malloc( sizeof( ImageCache ) ) ) ImageCache();
}
void DestroyImageCaches()
{
if( s_krnlCache != nullptr )
{
s_krnlCache->~ImageCache();
tracy_free( s_krnlCache );
s_krnlCache = nullptr;
}
if( s_imageCache != nullptr )
{
s_imageCache->~UserlandImageCache();
tracy_free( s_imageCache );
s_imageCache = nullptr;
}
}
// when "TRACY_SYMBOL_OFFLINE_RESOLVE" is set, instead of fully resolving symbols at runtime,
// simply resolve the offset and image name (which will be enough the resolving to be done offline)
@@ -282,32 +365,17 @@ extern "C"
t_SymFromInlineContext _SymFromInlineContext = 0;
t_SymGetLineFromInlineContext _SymGetLineFromInlineContext = 0;
TRACY_API ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain = 0;
typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChainPtr = nullptr;
TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void** callers, unsigned long count, unsigned long flags)
{
return ___tracy_RtlWalkFrameChainPtr(callers, count, flags);
}
}
struct ModuleCache
{
uint64_t start;
uint64_t end;
char* name;
};
static FastVector<ModuleCache>* s_modCache;
struct KernelDriver
{
uint64_t addr;
const char* mod;
const char* path;
};
KernelDriver* s_krnlCache = nullptr;
size_t s_krnlCacheCnt;
void InitCallstackCritical()
{
___tracy_RtlWalkFrameChain = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" );
___tracy_RtlWalkFrameChainPtr = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" );
}
void DbgHelpInit()
@@ -338,75 +406,57 @@ DWORD64 DbgHelpLoadSymbolsForModule( const char* imageName, uint64_t baseOfDll,
return SymLoadModuleEx( GetCurrentProcess(), nullptr, imageName, nullptr, baseOfDll, bllSize, nullptr, 0 );
}
ModuleCache* LoadSymbolsForModuleAndCache( const char* imageName, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
char* FormatImageName( const char* imageName, uint32_t imageNameLength )
{
DbgHelpLoadSymbolsForModule( imageName, baseOfDll, dllSize );
ModuleCache* cachedModule = s_modCache->push_next();
cachedModule->start = baseOfDll;
cachedModule->end = baseOfDll + dllSize;
// when doing offline symbol resolution, we must store the full path of the dll for the resolving to work
if( s_shouldResolveSymbolsOffline )
{
cachedModule->name = (char*)tracy_malloc_fast(imageNameLength + 1);
memcpy(cachedModule->name, imageName, imageNameLength);
cachedModule->name[imageNameLength] = '\0';
return CopyStringFast( imageName, imageNameLength );
}
else
{
auto ptr = imageName + imageNameLength;
while (ptr > imageName && *ptr != '\\' && *ptr != '/') ptr--;
if (ptr > imageName) ptr++;
const char* ptr = imageName + imageNameLength;
while( ptr > imageName && *ptr != '\\' && *ptr != '/' ) ptr--;
if( ptr > imageName ) ptr++;
const auto namelen = imageName + imageNameLength - ptr;
cachedModule->name = (char*)tracy_malloc_fast(namelen + 3);
cachedModule->name[0] = '[';
memcpy(cachedModule->name + 1, ptr, namelen);
cachedModule->name[namelen + 1] = ']';
cachedModule->name[namelen + 2] = '\0';
}
return cachedModule;
char* alloc = (char*)tracy_malloc_fast( namelen + 3 );
alloc[0] = '[';
memcpy( alloc + 1, ptr, namelen );
alloc[namelen + 1] = ']';
alloc[namelen + 2] = '\0';
return alloc;
}
}
void InitCallstack()
ImageEntry* CacheModuleInfo( const char* imagePath, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
{
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
if( s_shouldResolveSymbolsOffline )
{
TracyDebug("TRACY: enabling offline symbol resolving!\n");
}
ImageEntry moduleEntry = {};
moduleEntry.m_startAddress = baseOfDll;
moduleEntry.m_endAddress = baseOfDll + dllSize;
moduleEntry.m_path = CopyStringFast( imagePath, imageNameLength );
moduleEntry.m_name = FormatImageName( imagePath, imageNameLength );
DbgHelpInit();
return s_imageCache->AddEntry( moduleEntry );
}
#ifdef TRACY_DBGHELP_LOCK
DBGHELP_LOCK;
#endif
// use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver
// and process module symbol loading at startup time - they will be loaded on demand later
// Sometimes this process can take a very long time and prevent resolving callstack frames
// symbols during that time.
const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" );
const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' );
if ( !initTimeModuleLoad )
{
TracyDebug("TRACY: skipping init time dbghelper module load\n");
}
ImageEntry* LoadSymbolsForModuleAndCache( const char* imagePath, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
{
DbgHelpLoadSymbolsForModule( imagePath, baseOfDll, dllSize );
return CacheModuleInfo( imagePath, imageNameLength, baseOfDll, dllSize );
}
static void CacheProcessDrivers()
{
DWORD needed;
LPVOID dev[4096];
if( initTimeModuleLoad && EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 )
if( EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 )
{
char windir[MAX_PATH];
if( !GetWindowsDirectoryA( windir, sizeof( windir ) ) ) memcpy( windir, "c:\\windows", 11 );
const auto windirlen = strlen( windir );
const auto sz = needed / sizeof( LPVOID );
s_krnlCache = (KernelDriver*)tracy_malloc( sizeof(KernelDriver) * sz );
int cnt = 0;
for( size_t i=0; i<sz; i++ )
{
char fn[MAX_PATH];
@@ -417,7 +467,12 @@ void InitCallstack()
buf[0] = '<';
memcpy( buf+1, fn, len );
memcpy( buf+len+1, ">", 2 );
s_krnlCache[cnt] = KernelDriver { (uint64_t)dev[i], buf };
ImageEntry kernelDriver{};
kernelDriver.m_startAddress = (uint64_t)dev[i];
kernelDriver.m_endAddress = 0;
kernelDriver.m_name = buf;
kernelDriver.m_path = nullptr;
const auto len = GetDeviceDriverFileNameA( dev[i], fn, sizeof( fn ) );
if( len != 0 )
@@ -433,27 +488,23 @@ void InitCallstack()
}
DbgHelpLoadSymbolsForModule( path, (DWORD64)dev[i], 0 );
const auto psz = strlen( path );
auto pptr = (char*)tracy_malloc_fast( psz+1 );
memcpy( pptr, path, psz );
pptr[psz] = '\0';
s_krnlCache[cnt].path = pptr;
kernelDriver.m_path = CopyString( path );
}
cnt++;
s_krnlCache->AddEntry(kernelDriver);
}
}
s_krnlCacheCnt = cnt;
std::sort( s_krnlCache, s_krnlCache + s_krnlCacheCnt, []( const KernelDriver& lhs, const KernelDriver& rhs ) { return lhs.addr > rhs.addr; } );
s_krnlCache->Sort();
}
}
s_modCache = (FastVector<ModuleCache>*)tracy_malloc( sizeof( FastVector<ModuleCache> ) );
new(s_modCache) FastVector<ModuleCache>( 512 );
static void CacheProcessModules()
{
DWORD needed;
HANDLE proc = GetCurrentProcess();
HMODULE mod[1024];
if( initTimeModuleLoad && EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
{
const auto sz = needed / sizeof( HMODULE );
for( size_t i=0; i<sz; i++ )
@@ -472,6 +523,41 @@ void InitCallstack()
}
}
}
}
void InitCallstack()
{
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
if( s_shouldResolveSymbolsOffline )
{
TracyDebug("TRACY: enabling offline symbol resolving!\n");
}
CreateImageCaches();
DbgHelpInit();
#ifdef TRACY_DBGHELP_LOCK
DBGHELP_LOCK;
#endif
// use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver
// and process module symbol loading at startup time - they will be loaded on demand later
// Sometimes this process can take a very long time and prevent resolving callstack frames
// symbols during that time.
const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" );
const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' );
if ( !initTimeModuleLoad )
{
TracyDebug("TRACY: skipping init time dbghelper module load\n");
}
else
{
CacheProcessDrivers();
CacheProcessModules();
}
#ifdef TRACY_DBGHELP_LOCK
DBGHELP_UNLOCK;
@@ -480,6 +566,7 @@ void InitCallstack()
void EndCallstack()
{
DestroyImageCaches();
}
const char* DecodeCallstackPtrFast( uint64_t ptr )
@@ -514,11 +601,11 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
const char* GetKernelModulePath( uint64_t addr )
{
assert( addr >> 63 != 0 );
assert( IsKernelAddress( addr ) );
if( !s_krnlCache ) return nullptr;
auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } );
if( it == s_krnlCache + s_krnlCacheCnt ) return nullptr;
return it->path;
const ImageEntry* imageEntry = s_krnlCache->GetImageForAddress( addr );
if( imageEntry ) return imageEntry->m_path;
return nullptr;
}
struct ModuleNameAndBaseAddress
@@ -529,51 +616,38 @@ struct ModuleNameAndBaseAddress
ModuleNameAndBaseAddress GetModuleNameAndPrepareSymbols( uint64_t addr )
{
if( ( addr >> 63 ) != 0 )
if( IsKernelAddress( addr ) )
{
if( s_krnlCache )
{
auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } );
if( it != s_krnlCache + s_krnlCacheCnt )
{
return ModuleNameAndBaseAddress{ it->mod, it->addr };
}
}
const ImageEntry* entry = s_krnlCache->GetImageForAddress( addr );
if( entry != nullptr ) return ModuleNameAndBaseAddress{ entry->m_name, entry->m_startAddress };
return ModuleNameAndBaseAddress{ "<kernel>", addr };
}
for( auto& v : *s_modCache )
{
if( addr >= v.start && addr < v.end )
{
return ModuleNameAndBaseAddress{ v.name, v.start };
}
}
const ImageEntry* entry = s_imageCache->GetImageForAddress( addr );
if( entry != nullptr ) return ModuleNameAndBaseAddress{ entry->m_name, entry->m_startAddress };
HMODULE mod[1024];
DWORD needed;
HANDLE proc = GetCurrentProcess();
// Do not use FreeLibrary because we set the flag GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT
// see https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-getmodulehandleexa to get more information
constexpr DWORD flag = GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT;
HMODULE mod = NULL;
InitRpmalloc();
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
if( GetModuleHandleExA( flag, (char*)addr, &mod ) != 0 )
{
const auto sz = needed / sizeof( HMODULE );
for( size_t i=0; i<sz; i++ )
MODULEINFO info;
if( GetModuleInformation( proc, mod, &info, sizeof( info ) ) != 0 )
{
MODULEINFO info;
if( GetModuleInformation( proc, mod[i], &info, sizeof( info ) ) != 0 )
const auto base = uint64_t( info.lpBaseOfDll );
if( addr >= base && addr < ( base + info.SizeOfImage ) )
{
const auto base = uint64_t( info.lpBaseOfDll );
if( addr >= base && addr < base + info.SizeOfImage )
char name[1024];
const auto nameLength = GetModuleFileNameA( mod, name, sizeof( name ) );
if( nameLength > 0 )
{
char name[1024];
const auto nameLength = GetModuleFileNameA( mod[i], name, 1021 );
if( nameLength > 0 )
{
// since this is the first time we encounter this module, load its symbols (needed for modules loaded after SymInitialize)
ModuleCache* cachedModule = LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage );
return ModuleNameAndBaseAddress{ cachedModule->name, cachedModule->start };
}
// since this is the first time we encounter this module, load its symbols (needed for modules loaded after SymInitialize)
ImageEntry* cachedModule = LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage );
return ModuleNameAndBaseAddress{ cachedModule->m_name, cachedModule->m_startAddress };
}
}
}
@@ -753,7 +827,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
return { cb_data, uint8_t( cb_num ), moduleNameAndAddress.name };
}
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
#elif defined(TRACY_USE_LIBBACKTRACE)
enum { MaxCbTrace = 64 };
@@ -762,9 +836,6 @@ struct backtrace_state* cb_bts = nullptr;
int cb_num;
CallstackEntry cb_data[MaxCbTrace];
int cb_fixup;
#ifdef TRACY_USE_IMAGE_CACHE
static ImageCache* s_imageCache = nullptr;
#endif //#ifdef TRACY_USE_IMAGE_CACHE
#ifdef TRACY_DEBUGINFOD
debuginfod_client* s_debuginfod;
@@ -959,10 +1030,9 @@ void InitCallstack()
{
InitRpmalloc();
#ifdef TRACY_USE_IMAGE_CACHE
s_imageCache = (ImageCache*)tracy_malloc( sizeof( ImageCache ) );
new(s_imageCache) ImageCache();
#endif //#ifdef TRACY_USE_IMAGE_CACHE
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
CreateImageCaches();
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
@@ -1056,13 +1126,9 @@ debuginfod_client* GetDebuginfodClient()
void EndCallstack()
{
#ifdef TRACY_USE_IMAGE_CACHE
if( s_imageCache )
{
s_imageCache->~ImageCache();
tracy_free( s_imageCache );
}
#endif //#ifdef TRACY_USE_IMAGE_CACHE
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
DestroyImageCaches();
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
#ifndef TRACY_DEMANGLE
___tracy_free_demangle_buffer();
#endif
@@ -1252,17 +1318,17 @@ void GetSymbolForOfflineResolve(void* address, uint64_t imageBaseAddress, Callst
CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
{
InitRpmalloc();
if( ptr >> 63 == 0 )
if ( !IsKernelAddress( ptr ) )
{
const char* imageName = nullptr;
uint64_t imageBaseAddress = 0x0;
#ifdef TRACY_USE_IMAGE_CACHE
const auto* image = s_imageCache->GetImageForAddress((void*)ptr);
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
const auto* image = s_imageCache->GetImageForAddress( ptr );
if( image )
{
imageName = image->m_name;
imageBaseAddress = uint64_t(image->m_startAddress);
imageBaseAddress = uint64_t( image->m_startAddress );
}
#else
Dl_info dlinfo;
+6 -2
View File
@@ -8,8 +8,8 @@
# endif
# if defined _WIN32
# include "../common/TracyUwp.hpp"
# ifndef TRACY_UWP
# include "../common/TracyWinFamily.hpp"
# if !defined TRACY_WIN32_NO_DESKTOP
# define TRACY_HAS_CALLSTACK 1
# endif
# elif defined __ANDROID__
@@ -30,6 +30,10 @@
# define TRACY_HAS_CALLSTACK 6
# endif
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
#define TRACY_USE_LIBBACKTRACE
#endif
#endif
#endif
+23 -6
View File
@@ -1,15 +1,31 @@
#ifndef __TRACYCALLSTACK_HPP__
#define __TRACYCALLSTACK_HPP__
#include <stdint.h>
#include "../common/TracyApi.h"
#include "../common/TracyForceInline.hpp"
#include "TracyCallstack.h"
namespace tracy
{
struct ImageEntry
{
uint64_t m_startAddress = 0;
uint64_t m_endAddress = 0;
char* m_name = nullptr;
char* m_path = nullptr;
};
}
#ifndef TRACY_HAS_CALLSTACK
namespace tracy
{
static tracy_force_inline void* Callstack( int /*depth*/ ) { return nullptr; }
static constexpr bool has_callstack() { return false; }
static tracy_force_inline void* Callstack( int32_t /*depth*/ ) { return nullptr; }
}
#else
@@ -38,6 +54,8 @@ static tracy_force_inline void* Callstack( int /*depth*/ ) { return nullptr; }
namespace tracy
{
static constexpr bool has_callstack() { return true; }
struct CallstackSymbolData
{
const char* file;
@@ -79,11 +97,10 @@ debuginfod_client* GetDebuginfodClient();
extern "C"
{
typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
TRACY_API extern ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain;
TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void**, unsigned long, unsigned long );
}
static tracy_force_inline void* Callstack( int depth )
static tracy_force_inline void* Callstack( int32_t depth )
{
assert( depth >= 1 && depth < 63 );
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
@@ -112,7 +129,7 @@ static _Unwind_Reason_Code tracy_unwind_callback( struct _Unwind_Context* ctx, v
return _URC_NO_REASON;
}
static tracy_force_inline void* Callstack( int depth )
static tracy_force_inline void* Callstack( int32_t depth )
{
assert( depth >= 1 && depth < 63 );
@@ -127,7 +144,7 @@ static tracy_force_inline void* Callstack( int depth )
#elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
static tracy_force_inline void* Callstack( int depth )
static tracy_force_inline void* Callstack( int32_t depth )
{
assert( depth >= 1 );
+4 -2
View File
@@ -219,8 +219,9 @@ public:
m_ctx.CustomName( name, size );
}
private:
T m_lockable;
private:
LockableCtx m_ctx;
};
@@ -535,8 +536,9 @@ public:
m_ctx.CustomName( name, size );
}
private:
T m_lockable;
private:
SharedLockableCtx m_ctx;
};
File diff suppressed because it is too large Load Diff
+193 -102
View File
@@ -58,6 +58,9 @@ TRACY_API bool IsProfilerStarted();
# define TracyIsStarted true
#endif
TRACY_API bool BeginSamplingProfiling();
TRACY_API void EndSamplingProfiling();
class GpuCtx;
class Profiler;
class Socket;
@@ -114,11 +117,11 @@ struct LuaZoneState
#define TracyLfqPrepare( _type ) \
moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
auto __token = GetToken(); \
tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
auto __token = tracy::GetToken(); \
auto& __tail = __token->get_tail_index(); \
auto item = __token->enqueue_begin( __magic ); \
MemWrite( &item->hdr.type, _type );
tracy::MemWrite( &item->hdr.type, _type );
#define TracyLfqCommit \
__tail.store( __magic + 1, std::memory_order_release );
@@ -136,11 +139,11 @@ struct LuaZoneState
#ifdef TRACY_FIBERS
# define TracyQueuePrepare( _type ) \
auto item = Profiler::QueueSerial(); \
MemWrite( &item->hdr.type, _type );
auto item = tracy::Profiler::QueueSerial(); \
tracy::MemWrite( &item->hdr.type, _type );
# define TracyQueueCommit( _name ) \
MemWrite( &item->_name.thread, GetThreadHandle() ); \
Profiler::QueueSerialFinish();
tracy::MemWrite( &item->_name.thread, tracy::GetThreadHandle() ); \
tracy::Profiler::QueueSerialFinish();
# define TracyQueuePrepareC( _type ) \
auto item = tracy::Profiler::QueueSerial(); \
tracy::MemWrite( &item->hdr.type, _type );
@@ -252,6 +255,9 @@ public:
#endif
}
bool BeginSamplingProfiling();
void EndSamplingProfiling();
tracy_force_inline uint32_t GetNextZoneId()
{
return m_zoneId.fetch_add( 1, std::memory_order_relaxed );
@@ -387,58 +393,58 @@ public:
TracyLfqCommit;
}
static tracy_force_inline void Message( const char* txt, size_t size, int callstack )
static tracy_force_inline void Message( const char* txt, size_t size, int32_t callstack_depth )
{
assert( size < (std::numeric_limits<uint16_t>::max)() );
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
if( callstack != 0 )
if( callstack_depth != 0 && has_callstack() )
{
tracy::GetProfiler().SendCallstack( callstack );
tracy::GetProfiler().SendCallstack( callstack_depth );
}
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
TracyQueuePrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
TracyQueuePrepare( callstack_depth == 0 ? QueueType::Message : QueueType::MessageCallstack );
MemWrite( &item->messageFat.time, GetTime() );
MemWrite( &item->messageFat.text, (uint64_t)ptr );
MemWrite( &item->messageFat.size, (uint16_t)size );
TracyQueueCommit( messageFatThread );
}
static tracy_force_inline void Message( const char* txt, int callstack )
static tracy_force_inline void Message( const char* txt, int32_t callstack_depth )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
if( callstack != 0 )
if( callstack_depth != 0 && has_callstack() )
{
tracy::GetProfiler().SendCallstack( callstack );
tracy::GetProfiler().SendCallstack( callstack_depth );
}
TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
MemWrite( &item->messageLiteral.time, GetTime() );
MemWrite( &item->messageLiteral.text, (uint64_t)txt );
TracyQueueCommit( messageLiteralThread );
}
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack )
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int32_t callstack_depth )
{
assert( size < (std::numeric_limits<uint16_t>::max)() );
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
if( callstack != 0 )
if( callstack_depth != 0 && has_callstack() )
{
tracy::GetProfiler().SendCallstack( callstack );
tracy::GetProfiler().SendCallstack( callstack_depth );
}
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
TracyQueuePrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
MemWrite( &item->messageColorFat.time, GetTime() );
MemWrite( &item->messageColorFat.text, (uint64_t)ptr );
MemWrite( &item->messageColorFat.b, uint8_t( ( color ) & 0xFF ) );
@@ -448,17 +454,17 @@ public:
TracyQueueCommit( messageColorFatThread );
}
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack )
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int32_t callstack_depth )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
if( callstack != 0 )
if( callstack_depth != 0 && has_callstack() )
{
tracy::GetProfiler().SendCallstack( callstack );
tracy::GetProfiler().SendCallstack( callstack_depth );
}
TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
MemWrite( &item->messageColorLiteral.time, GetTime() );
MemWrite( &item->messageColorLiteral.text, (uint64_t)txt );
MemWrite( &item->messageColorLiteral.b, uint8_t( ( color ) & 0xFF ) );
@@ -510,29 +516,31 @@ public:
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure )
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int32_t depth, bool secure )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
if( depth > 0 && has_callstack() )
{
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
const auto thread = GetThreadHandle();
auto callstack = Callstack( depth );
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
profiler.m_serialLock.unlock();
#else
static_cast<void>(depth); // unused
MemAlloc( ptr, size, secure );
#endif
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
profiler.m_serialLock.unlock();
}
else
{
MemAlloc( ptr, size, secure );
}
}
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure )
static tracy_force_inline void MemFreeCallstack( const void* ptr, int32_t depth, bool secure )
{
if( secure && !ProfilerAvailable() ) return;
if( !ProfilerAllocatorAvailable() )
@@ -540,23 +548,25 @@ public:
MemFree( ptr, secure );
return;
}
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
if( depth > 0 && has_callstack() )
{
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
const auto thread = GetThreadHandle();
auto callstack = Callstack( depth );
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
profiler.m_serialLock.unlock();
#else
static_cast<void>(depth); // unused
MemFree( ptr, secure );
#endif
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
profiler.m_serialLock.unlock();
}
else
{
MemFree( ptr, secure );
}
}
static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name )
@@ -587,62 +597,101 @@ public:
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name )
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int32_t depth, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
if( depth > 0 && has_callstack() )
{
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
const auto thread = GetThreadHandle();
auto callstack = Callstack( depth );
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemName( name );
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
profiler.m_serialLock.unlock();
#else
static_cast<void>(depth); // unused
MemAllocNamed( ptr, size, secure, name );
#endif
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemName( name );
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
profiler.m_serialLock.unlock();
}
else
{
MemAllocNamed( ptr, size, secure, name );
}
}
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name )
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int32_t depth, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
if( depth > 0 && has_callstack() )
{
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
const auto thread = GetThreadHandle();
auto callstack = Callstack( depth );
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemName( name );
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
profiler.m_serialLock.unlock();
#else
static_cast<void>(depth); // unused
MemFreeNamed( ptr, secure, name );
#endif
profiler.m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemName( name );
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
profiler.m_serialLock.unlock();
}
else
{
MemFreeNamed( ptr, secure, name );
}
}
static tracy_force_inline void SendCallstack( int depth )
static tracy_force_inline void MemDiscard( const char* name, bool secure )
{
#ifdef TRACY_HAS_CALLSTACK
auto ptr = Callstack( depth );
TracyQueuePrepare( QueueType::Callstack );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
TracyQueueCommit( callstackFatThread );
#else
static_cast<void>(depth); // unused
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
const auto thread = GetThreadHandle();
GetProfiler().m_serialLock.lock();
SendMemDiscard( QueueType::MemDiscard, thread, name );
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemDiscardCallstack( const char* name, bool secure, int32_t depth )
{
if( secure && !ProfilerAvailable() ) return;
if( depth > 0 && has_callstack() )
{
# ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
auto callstack = Callstack( depth );
GetProfiler().m_serialLock.lock();
SendCallstackSerial( callstack );
SendMemDiscard( QueueType::MemDiscard, thread, name );
GetProfiler().m_serialLock.unlock();
}
else
{
MemDiscard( name, secure );
}
}
static tracy_force_inline void SendCallstack( int32_t depth )
{
if( depth > 0 && has_callstack() )
{
auto ptr = Callstack( depth );
TracyQueuePrepare( QueueType::Callstack );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
TracyQueueCommit( callstackFatThread );
}
}
static tracy_force_inline void ParameterRegister( ParameterCallback cb, void* data )
@@ -677,6 +726,9 @@ public:
#ifdef TRACY_FIBERS
static tracy_force_inline void EnterFiber( const char* fiber, int32_t groupHint )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
TracyQueuePrepare( QueueType::FiberEnter );
MemWrite( &item->fiberEnter.time, GetTime() );
MemWrite( &item->fiberEnter.fiber, (uint64_t)fiber );
@@ -686,13 +738,16 @@ public:
static tracy_force_inline void LeaveFiber()
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
TracyQueuePrepare( QueueType::FiberLeave );
MemWrite( &item->fiberLeave.time, GetTime() );
TracyQueueCommit( fiberLeave );
}
#endif
void SendCallstack( int depth, const char* skipBefore );
void SendCallstack( int32_t depth, const char* skipBefore );
static void CutCallstack( void* callstack, const char* skipBefore );
static bool ShouldExit();
@@ -800,7 +855,7 @@ private:
void InstallCrashHandler();
void RemoveCrashHandler();
void ClearQueues( tracy::moodycamel::ConsumerToken& token );
void ClearSerial();
DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token );
@@ -833,6 +888,21 @@ private:
m_bufferOffset += int( len );
}
char* SafeCopyProlog( const char* p, size_t size );
void SafeCopyEpilog( char* buf );
template<class Callable> // must be void( const char* buf, size_t size )
bool WithSafeCopy( const char* p, size_t size, Callable&& callable )
{
if( char* buf = SafeCopyProlog( p, size ) )
{
callable( buf, size );
SafeCopyEpilog( buf );
return true;
}
return false;
}
bool SendData( const char* data, size_t len );
void SendLongString( uint64_t ptr, const char* str, size_t len, QueueType type );
void SendSourceLocation( uint64_t ptr );
@@ -862,14 +932,13 @@ private:
static tracy_force_inline void SendCallstackSerial( void* ptr )
{
#ifdef TRACY_HAS_CALLSTACK
auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, QueueType::CallstackSerial );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
GetProfiler().m_serialQueue.commit_next();
#else
static_cast<void>(ptr); // unused
#endif
if( has_callstack() )
{
auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, QueueType::CallstackSerial );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
GetProfiler().m_serialQueue.commit_next();
}
}
static tracy_force_inline void SendMemAlloc( QueueType type, const uint32_t thread, const void* ptr, size_t size )
@@ -907,6 +976,18 @@ private:
GetProfiler().m_serialQueue.commit_next();
}
static tracy_force_inline void SendMemDiscard( QueueType type, const uint32_t thread, const char* name )
{
assert( type == QueueType::MemDiscard || type == QueueType::MemDiscardCallstack );
auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, type );
MemWrite( &item->memDiscard.time, GetTime() );
MemWrite( &item->memDiscard.thread, thread );
MemWrite( &item->memDiscard.name, (uint64_t)name );
GetProfiler().m_serialQueue.commit_next();
}
static tracy_force_inline void SendMemName( const char* name )
{
assert( name );
@@ -922,7 +1003,6 @@ private:
double m_timerMul;
uint64_t m_resolution;
uint64_t m_delay;
std::atomic<int64_t> m_timeBegin;
uint32_t m_mainThread;
uint64_t m_epoch, m_exectime;
@@ -963,6 +1043,7 @@ private:
std::atomic<bool> m_isConnected;
#ifdef TRACY_ON_DEMAND
std::atomic<uint64_t> m_connectionId;
std::atomic<bool> m_symbolsBusy;
TracyMutex m_deferredLock;
FastVector<QueueItem> m_deferredQueue;
@@ -990,9 +1071,19 @@ private:
char* m_queryData;
char* m_queryDataPtr;
#if defined _WIN32
void* m_exceptionHandler;
#ifndef NDEBUG
// m_safeSendBuffer and m_pipe should only be used by the Tracy Profiler thread; this ensures that in debug builds.
std::atomic_bool m_inUse{ false };
#endif
char* m_safeSendBuffer;
#if defined _WIN32
void* m_prevHandler;
#else
int m_pipe[2];
int m_pipeBufSize;
#endif
#ifdef __linux__
struct {
struct sigaction pwr, ill, fpe, segv, pipe, bus, abrt;
+556
View File
@@ -0,0 +1,556 @@
#include "../server/tracy_robin_hood.h"
#include "TracyProfiler.hpp"
#include "TracyThread.hpp"
#include "tracy/TracyC.h"
#include <rocprofiler-sdk/registration.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <iostream>
#include <mutex>
#include <set>
#include <shared_mutex>
#include <sstream>
#include <time.h>
#include <unordered_map>
#include <vector>
#define ROCPROFILER_CALL( result, msg ) \
{ \
rocprofiler_status_t CHECKSTATUS = result; \
if( CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS ) \
{ \
std::string status_msg = rocprofiler_get_status_string( CHECKSTATUS ); \
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg << " failed with error code " \
<< CHECKSTATUS << ": " << status_msg << std::endl; \
std::stringstream errmsg{}; \
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" << status_msg \
<< ")"; \
throw std::runtime_error( errmsg.str() ); \
} \
}
namespace
{
using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
struct DispatchData
{
int64_t launch_start;
int64_t launch_end;
uint32_t thread_id;
uint16_t query_id;
};
struct ToolData
{
uint32_t version;
const char* runtime_version;
uint32_t priority;
rocprofiler_client_id_t client_id;
uint8_t context_id;
bool init;
uint64_t query_id;
int64_t previous_cpu_time;
tracy::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data_t> client_kernels;
tracy::unordered_map<rocprofiler_dispatch_id_t, DispatchData> dispatch_data;
tracy::unordered_set<std::string> counter_names = { "SQ_WAVES", "GL2C_MISS", "GL2C_HIT" };
std::unique_ptr<tracy::Thread> cal_thread;
std::mutex mut{};
};
using namespace tracy;
rocprofiler_context_id_t& get_client_ctx()
{
static rocprofiler_context_id_t ctx{ 0 };
return ctx;
}
const char* CTX_NAME = "rocprofv3";
uint8_t gpu_context_allocate( ToolData* data )
{
timespec ts;
clock_gettime( CLOCK_BOOTTIME, &ts );
uint64_t cpu_timestamp = Profiler::GetTime();
uint64_t gpu_timestamp = ( (uint64_t)ts.tv_sec * 1000000000 ) + ts.tv_nsec;
float timestamp_period = 1.0f;
data->previous_cpu_time = cpu_timestamp;
// Allocate the process-unique GPU context ID. There's a max of 255 available;
// if we are recreating devices a lot we may exceed that. Don't do that, or
// wrap around and get weird (but probably still usable) numbers.
uint8_t context_id = tracy::GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed );
if( context_id >= 255 )
{
context_id %= 255;
}
uint8_t context_flags = 0;
#ifdef TRACY_ROCPROF_CALIBRATION
// Tell tracy we'll be passing calibrated timestamps and not to mess with
// the times. We'll periodically send GpuCalibration events in case the
// times drift.
context_flags |= tracy::GpuContextCalibration;
#endif
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext );
tracy::MemWrite( &item->gpuNewContext.cpuTime, cpu_timestamp );
tracy::MemWrite( &item->gpuNewContext.gpuTime, gpu_timestamp );
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
tracy::MemWrite( &item->gpuNewContext.period, timestamp_period );
tracy::MemWrite( &item->gpuNewContext.context, context_id );
tracy::MemWrite( &item->gpuNewContext.flags, context_flags );
tracy::MemWrite( &item->gpuNewContext.type, tracy::GpuContextType::Rocprof );
tracy::Profiler::QueueSerialFinish();
}
// Send the name of the context along.
// NOTE: Tracy will unconditionally free the name so we must clone it here.
// Since internally Tracy will use its own rpmalloc implementation we must
// make sure we allocate from the same source.
size_t name_length = strlen( CTX_NAME );
char* cloned_name = (char*)tracy::tracy_malloc( name_length );
memcpy( cloned_name, CTX_NAME, name_length );
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName );
tracy::MemWrite( &item->gpuContextNameFat.context, context_id );
tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)cloned_name );
tracy::MemWrite( &item->gpuContextNameFat.size, name_length );
tracy::Profiler::QueueSerialFinish();
}
return context_id;
}
uint64_t kernel_src_loc( ToolData* data, uint64_t kernel_id )
{
uint64_t src_loc = 0;
auto _lk = std::unique_lock{ data->mut };
rocprofiler_kernel_id_t kid = kernel_id;
if( data->client_kernels.count( kid ) )
{
auto& sym_data = data->client_kernels[kid];
const char* name = sym_data.kernel_name;
size_t name_len = strlen( name );
uint32_t line = 0;
src_loc = tracy::Profiler::AllocSourceLocation( line, NULL, 0, name, name_len, NULL, 0 );
}
return src_loc;
}
void record_interval( ToolData* data, rocprofiler_timestamp_t start_timestamp, rocprofiler_timestamp_t end_timestamp,
uint64_t src_loc, rocprofiler_dispatch_id_t dispatch_id )
{
uint16_t query_id = 0;
uint8_t context_id = data->context_id;
{
auto _lk = std::unique_lock{ data->mut };
query_id = data->query_id;
data->query_id++;
if( dispatch_id != UINT64_MAX )
{
DispatchData& dispatch_data = data->dispatch_data[dispatch_id];
dispatch_data.query_id = query_id;
dispatch_data.thread_id = tracy::GetThreadHandle();
}
}
uint64_t cpu_start_time = 0, cpu_end_time = 0;
if( dispatch_id == UINT64_MAX )
{
cpu_start_time = tracy::Profiler::GetTime();
cpu_end_time = tracy::Profiler::GetTime();
}
else
{
auto _lk = std::unique_lock{ data->mut };
DispatchData& dispatch_data = data->dispatch_data[dispatch_id];
cpu_start_time = dispatch_data.launch_start;
cpu_end_time = dispatch_data.launch_end;
}
if( src_loc != 0 )
{
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial );
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, cpu_start_time );
tracy::MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)src_loc );
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
tracy::MemWrite( &item->gpuZoneBegin.queryId, query_id );
tracy::MemWrite( &item->gpuZoneBegin.context, context_id );
tracy::Profiler::QueueSerialFinish();
}
}
else
{
static const ___tracy_source_location_data src_loc = { NULL, NULL, NULL, 0, 0 };
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial );
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, cpu_start_time );
tracy::MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)&src_loc );
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
tracy::MemWrite( &item->gpuZoneBegin.queryId, query_id );
tracy::MemWrite( &item->gpuZoneBegin.context, context_id );
tracy::Profiler::QueueSerialFinish();
}
}
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime );
tracy::MemWrite( &item->gpuTime.gpuTime, start_timestamp );
tracy::MemWrite( &item->gpuTime.queryId, query_id );
tracy::MemWrite( &item->gpuTime.context, context_id );
tracy::Profiler::QueueSerialFinish();
}
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial );
tracy::MemWrite( &item->gpuZoneEnd.cpuTime, cpu_end_time );
tracy::MemWrite( &item->gpuZoneEnd.thread, tracy::GetThreadHandle() );
tracy::MemWrite( &item->gpuZoneEnd.queryId, query_id );
tracy::MemWrite( &item->gpuZoneEnd.context, context_id );
tracy::Profiler::QueueSerialFinish();
}
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime );
tracy::MemWrite( &item->gpuTime.gpuTime, end_timestamp );
tracy::MemWrite( &item->gpuTime.queryId, query_id );
tracy::MemWrite( &item->gpuTime.context, context_id );
tracy::Profiler::QueueSerialFinish();
}
}
void record_callback( rocprofiler_dispatch_counting_service_data_t dispatch_data,
rocprofiler_record_counter_t* record_data, size_t record_count,
rocprofiler_user_data_t /*user_data*/, void* callback_data )
{
assert( callback_data != nullptr );
ToolData* data = static_cast<ToolData*>( callback_data );
if( !data->init ) return;
std::unordered_map<rocprofiler_counter_instance_id_t, double> sums;
for( size_t i = 0; i < record_count; ++i )
{
auto _counter_id = rocprofiler_counter_id_t{};
ROCPROFILER_CALL( rocprofiler_query_record_counter_id( record_data[i].id, &_counter_id ),
"query record counter id" );
sums[_counter_id.handle] += record_data[i].counter_value;
}
uint16_t query_id = 0;
uint32_t thread_id = 0;
{
auto _lk = std::unique_lock{ data->mut };
// An assumption is made here that the counter values are supplied after the dispatch
// complete callback.
assert( data->dispatch_data.count( dispatch_data.dispatch_info.dispatch_id ) );
DispatchData& ddata = data->dispatch_data[dispatch_data.dispatch_info.dispatch_id];
query_id = ddata.query_id;
thread_id = ddata.thread_id;
}
for( auto& p : sums )
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneAnnotation );
tracy::MemWrite( &item->zoneAnnotation.noteId, p.first );
tracy::MemWrite( &item->zoneAnnotation.queryId, query_id );
tracy::MemWrite( &item->zoneAnnotation.thread, thread_id );
tracy::MemWrite( &item->zoneAnnotation.value, p.second );
tracy::MemWrite( &item->zoneAnnotation.context, data->context_id );
tracy::Profiler::QueueSerialFinish();
}
}
/**
* Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue.
* rocprofiler_counter_config_id_t* is a return to specify what counters to collect
* for this dispatch (dispatch_packet).
*/
void dispatch_callback( rocprofiler_dispatch_counting_service_data_t dispatch_data,
rocprofiler_profile_config_id_t* config, rocprofiler_user_data_t* /*user_data*/,
void* callback_data )
{
assert( callback_data != nullptr );
ToolData* data = static_cast<ToolData*>( callback_data );
if( !data->init ) return;
/**
* This simple example uses the same profile counter set for all agents.
* We store this in a cache to prevent constructing many identical profile counter
* sets. We first check the cache to see if we have already constructed a counter"
* set for the agent. If we have, return it. Otherwise, construct a new profile counter
* set.
*/
static std::shared_mutex m_mutex = {};
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache = {};
auto search_cache = [&]()
{
if( auto pos = profile_cache.find( dispatch_data.dispatch_info.agent_id.handle ); pos != profile_cache.end() )
{
*config = pos->second;
return true;
}
return false;
};
{
auto rlock = std::shared_lock{ m_mutex };
if( search_cache() ) return;
}
auto wlock = std::unique_lock{ m_mutex };
if( search_cache() ) return;
// GPU Counter IDs
std::vector<rocprofiler_counter_id_t> gpu_counters;
// Iterate through the agents and get the counters available on that agent
ROCPROFILER_CALL(
rocprofiler_iterate_agent_supported_counters(
dispatch_data.dispatch_info.agent_id,
[]( rocprofiler_agent_id_t, rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data )
{
std::vector<rocprofiler_counter_id_t>* vec =
static_cast<std::vector<rocprofiler_counter_id_t>*>( user_data );
for( size_t i = 0; i < num_counters; i++ )
{
vec->push_back( counters[i] );
}
return ROCPROFILER_STATUS_SUCCESS;
},
static_cast<void*>( &gpu_counters ) ),
"Could not fetch supported counters" );
std::vector<rocprofiler_counter_id_t> collect_counters;
collect_counters.reserve( data->counter_names.size() );
// Look for the counters contained in counters_to_collect in gpu_counters
for( auto& counter : gpu_counters )
{
rocprofiler_counter_info_v0_t info;
ROCPROFILER_CALL(
rocprofiler_query_counter_info( counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast<void*>( &info ) ),
"Could not query info" );
if( data->counter_names.count( std::string( info.name ) ) > 0 )
{
collect_counters.push_back( counter );
size_t name_length = strlen( info.name );
char* cloned_name = (char*)tracy::tracy_malloc( name_length );
memcpy( cloned_name, info.name, name_length );
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuAnnotationName );
tracy::MemWrite( &item->gpuAnnotationNameFat.context, data->context_id );
tracy::MemWrite( &item->gpuAnnotationNameFat.noteId, counter.handle );
tracy::MemWrite( &item->gpuAnnotationNameFat.ptr, (uint64_t)cloned_name );
tracy::MemWrite( &item->gpuAnnotationNameFat.size, name_length );
tracy::Profiler::QueueSerialFinish();
}
}
}
// Create a colleciton profile for the counters
rocprofiler_profile_config_id_t profile = { .handle = 0 };
ROCPROFILER_CALL( rocprofiler_create_profile_config( dispatch_data.dispatch_info.agent_id, collect_counters.data(),
collect_counters.size(), &profile ),
"Could not construct profile cfg" );
profile_cache.emplace( dispatch_data.dispatch_info.agent_id.handle, profile );
// Return the profile to collect those counters for this dispatch
*config = profile;
}
void tool_callback_tracing_callback( rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data,
void* callback_data )
{
assert( callback_data != nullptr );
ToolData* data = static_cast<ToolData*>( callback_data );
if( !data->init ) return;
if( record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER )
{
auto* sym_data = static_cast<kernel_symbol_data_t*>( record.payload );
if( record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD )
{
auto _lk = std::unique_lock{ data->mut };
data->client_kernels.emplace( sym_data->kernel_id, *sym_data );
}
else if( record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD )
{
auto _lk = std::unique_lock{ data->mut };
data->client_kernels.erase( sym_data->kernel_id );
}
}
else if( record.kind == ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH )
{
auto* rdata = static_cast<rocprofiler_callback_tracing_kernel_dispatch_data_t*>( record.payload );
if( record.operation == ROCPROFILER_KERNEL_DISPATCH_ENQUEUE )
{
if( record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER )
{
auto _lk = std::unique_lock{ data->mut };
data->dispatch_data[rdata->dispatch_info.dispatch_id].launch_start = tracy::Profiler::GetTime();
}
else if( record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT )
{
auto _lk = std::unique_lock{ data->mut };
data->dispatch_data[rdata->dispatch_info.dispatch_id].launch_end = tracy::Profiler::GetTime();
}
}
else if( record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE )
{
uint64_t src_loc = kernel_src_loc( data, rdata->dispatch_info.kernel_id );
record_interval( data, rdata->start_timestamp, rdata->end_timestamp, src_loc,
rdata->dispatch_info.dispatch_id );
}
}
else if( record.kind == ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY &&
record.operation != ROCPROFILER_MEMORY_COPY_NONE && record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT )
{
auto* rdata = static_cast<rocprofiler_callback_tracing_memory_copy_data_t*>( record.payload );
const char* name = nullptr;
switch( record.operation )
{
case ROCPROFILER_MEMORY_COPY_DEVICE_TO_DEVICE:
name = "DeviceToDeviceCopy";
break;
case ROCPROFILER_MEMORY_COPY_DEVICE_TO_HOST:
name = "DeviceToHostCopy";
break;
case ROCPROFILER_MEMORY_COPY_HOST_TO_DEVICE:
name = "HostToDeviceCopy";
break;
case ROCPROFILER_MEMORY_COPY_HOST_TO_HOST:
name = "HostToHostCopy";
break;
}
size_t name_len = strlen( name );
uint64_t src_loc = tracy::Profiler::AllocSourceLocation( 0, NULL, 0, name, name_len, NULL, 0 );
record_interval( data, rdata->start_timestamp, rdata->end_timestamp, src_loc, UINT64_MAX );
}
}
void calibration_thread( void* ptr )
{
while( !TracyIsStarted )
;
ToolData* data = static_cast<ToolData*>( ptr );
data->context_id = gpu_context_allocate( data );
const char* user_counters = GetEnvVar( "TRACY_ROCPROF_COUNTERS" );
if( user_counters )
{
data->counter_names.clear();
std::stringstream ss( user_counters );
std::string counter;
while( std::getline( ss, counter, ',' ) ) data->counter_names.insert( counter );
}
data->init = true;
#ifdef TRACY_ROCPROF_CALIBRATION
while( data->init )
{
sleep( 1 );
timespec ts;
// HSA performs a linear interpolation of GPU time to CLOCK_BOOTTIME. However, this is
// subject to network time updates and can drift relative to tracy's clock.
clock_gettime( CLOCK_BOOTTIME, &ts );
int64_t cpu_timestamp = Profiler::GetTime();
int64_t gpu_timestamp = ts.tv_nsec + ts.tv_sec * 1e9L;
if( cpu_timestamp > data->previous_cpu_time )
{
auto* item = tracy::Profiler::QueueSerial();
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration );
tracy::MemWrite( &item->gpuCalibration.gpuTime, gpu_timestamp );
tracy::MemWrite( &item->gpuCalibration.cpuTime, cpu_timestamp );
tracy::MemWrite( &item->gpuCalibration.cpuDelta, cpu_timestamp - data->previous_cpu_time );
tracy::MemWrite( &item->gpuCalibration.context, data->context_id );
tracy::Profiler::QueueSerialFinish();
data->previous_cpu_time = cpu_timestamp;
}
}
#endif
}
int tool_init( rocprofiler_client_finalize_t fini_func, void* user_data )
{
ToolData* data = static_cast<ToolData*>( user_data );
data->cal_thread = std::make_unique<tracy::Thread>( calibration_thread, data );
ROCPROFILER_CALL( rocprofiler_create_context( &get_client_ctx() ), "context creation failed" );
ROCPROFILER_CALL( rocprofiler_configure_callback_dispatch_counting_service( get_client_ctx(), dispatch_callback,
user_data, record_callback, user_data ),
"Could not setup counting service" );
rocprofiler_tracing_operation_t ops[] = { ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER };
ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service( get_client_ctx(),
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ops, 1,
tool_callback_tracing_callback, user_data ),
"callback tracing service failed to configure" );
rocprofiler_tracing_operation_t ops2[] = { ROCPROFILER_KERNEL_DISPATCH_COMPLETE,
ROCPROFILER_KERNEL_DISPATCH_ENQUEUE };
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service( get_client_ctx(), ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH,
ops2, 2, tool_callback_tracing_callback, user_data ),
"callback tracing service failed to configure" );
ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service( get_client_ctx(),
ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY, nullptr,
0, tool_callback_tracing_callback, user_data ),
"callback tracing service failed to configure" );
ROCPROFILER_CALL( rocprofiler_start_context( get_client_ctx() ), "start context" );
return 0;
}
void tool_fini( void* tool_data_v )
{
rocprofiler_stop_context( get_client_ctx() );
ToolData* data = static_cast<ToolData*>( tool_data_v );
data->init = false;
data->cal_thread.reset();
}
}
extern "C"
{
rocprofiler_tool_configure_result_t* rocprofiler_configure( uint32_t version, const char* runtime_version,
uint32_t priority, rocprofiler_client_id_t* client_id )
{
// If not the first tool to register, indicate that the tool doesn't want to do anything
if( priority > 0 ) return nullptr;
// (optional) Provide a name for this tool to rocprofiler
client_id->name = "Tracy";
// (optional) create configure data
static ToolData data = ToolData{ version, runtime_version, priority, *client_id, 0, false, 0, 0 };
// construct configure result
static auto cfg = rocprofiler_tool_configure_result_t{ sizeof( rocprofiler_tool_configure_result_t ),
&tool_init, &tool_fini, static_cast<void*>( &data ) };
return &cfg;
}
}
+28 -49
View File
@@ -10,7 +10,14 @@
#include "../common/TracyAlign.hpp"
#include "../common/TracyAlloc.hpp"
#include "TracyProfiler.hpp"
#include "TracyCallstack.hpp"
#if (defined(__GNUC__) || defined(__clang__))
# define TRACY_ATTRIBUTE_FORMAT_PRINTF(fmt_idx, arg_idx) \
__attribute__((format(printf, fmt_idx, arg_idx)))
#else
# define TRACY_ATTRIBUTE_FORMAT_PRINTF(fmt_idx, arg_idx)
#endif
namespace tracy
{
@@ -22,7 +29,7 @@ public:
ScopedZone& operator=( const ScopedZone& ) = delete;
ScopedZone& operator=( ScopedZone&& ) = delete;
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true )
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int32_t depth = -1, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
@@ -33,13 +40,19 @@ public:
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
TracyQueuePrepare( QueueType::ZoneBegin );
auto zoneQueue = QueueType::ZoneBegin;
if( depth > 0 && has_callstack() )
{
GetProfiler().SendCallstack( depth );
zoneQueue = QueueType::ZoneBeginCallstack;
}
TracyQueuePrepare( zoneQueue );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
TracyQueueCommit( zoneBeginThread );
}
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true )
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int32_t depth = -1, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
@@ -50,55 +63,21 @@ public:
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
GetProfiler().SendCallstack( depth );
TracyQueuePrepare( QueueType::ZoneBeginCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
TracyQueueCommit( zoneBeginThread );
}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
auto zoneQueue = QueueType::ZoneBeginAllocSrcLoc;
if( depth > 0 && has_callstack() )
{
GetProfiler().SendCallstack( depth );
zoneQueue = QueueType::ZoneBeginAllocSrcLocCallstack;
}
TracyQueuePrepare( zoneQueue );
const auto srcloc =
Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, static_cast<uint32_t>(0), is_active ) {}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int depth, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
GetProfiler().SendCallstack( depth );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {}
tracy_force_inline ~ScopedZone()
{
@@ -126,7 +105,7 @@ public:
TracyQueueCommit( zoneTextFatThread );
}
void TextFmt( const char* fmt, ... )
void TextFmt( const char* fmt, ... ) TRACY_ATTRIBUTE_FORMAT_PRINTF(2, 3)
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
@@ -165,7 +144,7 @@ public:
TracyQueueCommit( zoneTextFatThread );
}
void NameFmt( const char* fmt, ... )
void NameFmt( const char* fmt, ... ) TRACY_ATTRIBUTE_FORMAT_PRINTF(2, 3)
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
+1 -1
View File
@@ -85,7 +85,7 @@ void SysPower::ScanDirectory( const char* path, int parent )
FILE* f = fopen( tmp, "r" );
if( f )
{
fscanf( f, "%" PRIu64, &maxRange );
(void)fscanf( f, "%" PRIu64, &maxRange );
fclose( f );
}
}
+13 -1
View File
@@ -4,6 +4,7 @@
# if defined _WIN32
# include <windows.h>
# include "../common/TracyWinFamily.hpp"
# elif defined __linux__
# include <stdio.h>
# include <inttypes.h>
@@ -27,13 +28,24 @@ static inline uint64_t ConvertTime( const FILETIME& t )
void SysTime::ReadTimes()
{
FILETIME idleTime;
FILETIME kernelTime;
FILETIME userTime;
# if defined TRACY_GDK
FILETIME creationTime;
FILETIME exitTime;
GetProcessTimes( GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime );
idle = 0;
# else
FILETIME idleTime;
GetSystemTimes( &idleTime, &kernelTime, &userTime );
idle = ConvertTime( idleTime );
# endif
const auto kernel = ConvertTime( kernelTime );
const auto user = ConvertTime( userTime );
used = kernel + user;
+139 -59
View File
@@ -173,8 +173,11 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId );
MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId );
MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber );
MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason );
MemWrite( &item->contextSwitch.state, cswitch->oldThreadState );
MemWrite( &item->contextSwitch.oldThreadWaitReason, cswitch->oldThreadWaitReason );
MemWrite( &item->contextSwitch.oldThreadState, cswitch->oldThreadState );
MemWrite( &item->contextSwitch.newThreadPriority, cswitch->newThreadPriority );
MemWrite( &item->contextSwitch.oldThreadPriority, cswitch->oldThreadPriority );
MemWrite( &item->contextSwitch.previousCState, cswitch->previousCState );
TracyLfqCommit;
}
else if( hdr.EventDescriptor.Opcode == 50 )
@@ -183,7 +186,10 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart );
MemWrite( &item->threadWakeup.cpu, record->BufferContext.ProcessorNumber );
MemWrite( &item->threadWakeup.thread, rt->threadId );
MemWrite( &item->threadWakeup.adjustReason, rt->adjustReason );
MemWrite( &item->threadWakeup.adjustIncrement, rt->adjustIncrement );
TracyLfqCommit;
}
else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 )
@@ -232,6 +238,10 @@ void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record )
#endif
const auto& hdr = record->EventHeader;
// Check for Lost_Event (6a399ae0-4bc6-4de9-870b-3657f8947e7e)
if( hdr.ProviderId.Data1 == 0x6A399AE0 ) return;
assert( hdr.ProviderId.Data1 == 0x802EC45A );
assert( hdr.EventDescriptor.Id == 0x0011 );
@@ -498,11 +508,11 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
if( _GetThreadDescription )
{
PWSTR tmp;
_GetThreadDescription( hnd, &tmp );
char buf[256];
if( tmp )
if ( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) )
{
char buf[256];
auto ret = wcstombs( buf, tmp, 256 );
LocalFree(tmp);
if( ret != 0 )
{
threadName = CopyString( buf, ret );
@@ -521,25 +531,23 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
const auto phnd = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid );
if( phnd != INVALID_HANDLE_VALUE )
{
HMODULE modules[1024];
DWORD needed;
if( _EnumProcessModules( phnd, modules, 1024 * sizeof( HMODULE ), &needed ) != 0 )
MEMORY_BASIC_INFORMATION vmeminfo;
SIZE_T infosize = VirtualQueryEx( phnd, ptr, &vmeminfo, sizeof( vmeminfo ) );
if( infosize == sizeof( vmeminfo ) )
{
const auto sz = std::min( DWORD( needed / sizeof( HMODULE ) ), DWORD( 1024 ) );
for( DWORD i=0; i<sz; i++ )
if (vmeminfo.Type == MEM_IMAGE)
{
// for MEM_IMAGE regions, vmeminfo.AllocationBase _is_ the HMODULE
HMODULE mod = (HMODULE)vmeminfo.AllocationBase;
MODULEINFO info;
if( _GetModuleInformation( phnd, modules[i], &info, sizeof( info ) ) != 0 )
if( _GetModuleInformation( phnd, mod, &info, sizeof( info ) ) != 0 )
{
if( (uint64_t)ptr >= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage )
char buf2[1024];
const auto modlen = _GetModuleBaseNameA( phnd, mod, buf2, 1024 );
if( modlen != 0 )
{
char buf2[1024];
const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 );
if( modlen != 0 )
{
threadName = CopyString( buf2, modlen );
threadSent = true;
}
threadName = CopyString( buf2, modlen );
threadSent = true;
}
}
}
@@ -606,6 +614,7 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
# include <fcntl.h>
# include <inttypes.h>
# include <limits>
# include <mntent.h>
# include <poll.h>
# include <stdio.h>
# include <stdlib.h>
@@ -678,7 +687,7 @@ enum TraceEventId
EventBranchMiss,
EventVsync,
EventContextSwitch,
EventWakeup,
EventWaking,
};
static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid )
@@ -753,6 +762,42 @@ static const char* ReadFile( const char* path )
return tmp;
}
static const char* ReadFile( const char* base, const char* path )
{
const auto blen = strlen( base );
const auto plen = strlen( path );
auto tmp = (char*)tracy_malloc( blen + plen + 1 );
memcpy( tmp, base, blen );
memcpy( tmp + blen, path, plen );
tmp[blen+plen] = '\0';
auto res = ReadFile( tmp );
tracy_free( tmp );
return res;
}
static char* GetTraceFsPath()
{
auto f = setmntent( "/proc/mounts", "r" );
if( !f ) return nullptr;
char* ret = nullptr;
while( auto ent = getmntent( f ) )
{
if( strcmp( ent->mnt_fsname, "tracefs" ) == 0 )
{
auto len = strlen( ent->mnt_dir );
ret = (char*)tracy_malloc( len + 1 );
memcpy( ret, ent->mnt_dir, len );
ret[len] = '\0';
break;
}
}
endmntent( f );
return ret;
}
bool SysTraceStart( int64_t& samplingPeriod )
{
#ifndef CLOCK_MONOTONIC_RAW
@@ -767,16 +812,22 @@ bool SysTraceStart( int64_t& samplingPeriod )
TracyDebug( "perf_event_paranoid: %i\n", paranoidLevel );
#endif
int switchId = -1, wakeupId = -1, vsyncId = -1;
const auto switchIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_switch/id" );
auto traceFsPath = GetTraceFsPath();
if( !traceFsPath ) return false;
TracyDebug( "tracefs path: %s\n", traceFsPath );
int switchId = -1, wakingId = -1, vsyncId = -1;
const auto switchIdStr = ReadFile( traceFsPath, "/events/sched/sched_switch/id" );
if( switchIdStr ) switchId = atoi( switchIdStr );
const auto wakeupIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_wakeup/id" );
if( wakeupIdStr ) wakeupId = atoi( wakeupIdStr );
const auto vsyncIdStr = ReadFile( "/sys/kernel/debug/tracing/events/drm/drm_vblank_event/id" );
const auto wakingIdStr = ReadFile( traceFsPath, "/events/sched/sched_waking/id" );
if( wakingIdStr ) wakingId = atoi( wakingIdStr );
const auto vsyncIdStr = ReadFile( traceFsPath, "/events/drm/drm_vblank_event/id" );
if( vsyncIdStr ) vsyncId = atoi( vsyncIdStr );
tracy_free( traceFsPath );
TracyDebug( "sched_switch id: %i\n", switchId );
TracyDebug( "sched_wakeup id: %i\n", wakeupId );
TracyDebug( "sched_waking id: %i\n", wakingId );
TracyDebug( "drm_vblank_event id: %i\n", vsyncId );
#ifdef TRACY_NO_SAMPLING
@@ -831,7 +882,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
2 + // CPU cycles + instructions retired
2 + // cache reference + miss
2 + // branch retired + miss
2 + // context switches + wakeups
2 + // context switches + waking ups
1 // vsync
);
s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers );
@@ -1076,18 +1127,31 @@ bool SysTraceStart( int64_t& samplingPeriod )
}
}
if( wakeupId != -1 )
if( wakingId != -1 )
{
pe.config = wakeupId;
pe.config &= ~PERF_SAMPLE_CALLCHAIN;
pe = {};
pe.type = PERF_TYPE_TRACEPOINT;
pe.size = sizeof( perf_event_attr );
pe.sample_period = 1;
pe.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_RAW;
// Coult ask for callstack here
//pe.sample_type |= PERF_SAMPLE_CALLCHAIN;
pe.disabled = 1;
pe.inherit = 1;
pe.config = wakingId;
pe.read_format = 0;
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
pe.use_clockid = 1;
pe.clockid = CLOCK_MONOTONIC_RAW;
#endif
TracyDebug( "Setup wakeup capture\n" );
TracyDebug( "Setup waking up capture\n" );
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWakeup, i );
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWaking, i );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
@@ -1332,6 +1396,7 @@ void SysTraceWorker( void* ptr )
hadData = true;
while( activeNum > 0 )
{
// Find the earliest event from the active buffers
int sel = -1;
int selPos;
int64_t t0 = std::numeric_limits<int64_t>::max();
@@ -1369,6 +1434,7 @@ void SysTraceWorker( void* ptr )
}
}
}
// Found any event
if( sel >= 0 )
{
auto& ring = ringArray[ctxBufferIdx + sel];
@@ -1384,10 +1450,10 @@ void SysTraceWorker( void* ptr )
const auto rid = ring.GetId();
if( rid == EventContextSwitch )
{
// Layout:
// u64 time
// u64 cnt
// u64 ip[cnt]
// Layout: See /sys/kernel/debug/tracing/events/sched/sched_switch/format
// u64 time // PERF_SAMPLE_TIME
// u64 cnt // PERF_SAMPLE_CALLCHAIN
// u64 ip[cnt] // PERF_SAMPLE_CALLCHAIN
// u32 size
// u8 data[size]
// Data (not ABI stable, but has not changed since it was added, in 2009):
@@ -1408,35 +1474,43 @@ void SysTraceWorker( void* ptr )
const auto traceOffset = offset;
offset += sizeof( uint64_t ) * cnt + sizeof( uint32_t ) + 8 + 16;
uint32_t prev_pid, next_pid;
uint32_t prev_pid, prev_prio;
uint32_t next_pid, next_prio;
long prev_state;
ring.Read( &prev_pid, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t ) + sizeof( uint32_t );
offset += sizeof( uint32_t );
ring.Read( &prev_prio, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t );
ring.Read( &prev_state, offset, sizeof( long ) );
offset += sizeof( long ) + 16;
ring.Read( &next_pid, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t );
ring.Read( &next_prio, offset, sizeof( uint32_t ) );
uint8_t reason = 100;
uint8_t state;
uint8_t oldThreadWaitReason = 100;
uint8_t oldThreadState;
if( prev_state & 0x0001 ) state = 104;
else if( prev_state & 0x0002 ) state = 101;
else if( prev_state & 0x0004 ) state = 105;
else if( prev_state & 0x0008 ) state = 106;
else if( prev_state & 0x0010 ) state = 108;
else if( prev_state & 0x0020 ) state = 109;
else if( prev_state & 0x0040 ) state = 110;
else if( prev_state & 0x0080 ) state = 102;
else state = 103;
if( prev_state & 0x0001 ) oldThreadState = 104;
else if( prev_state & 0x0002 ) oldThreadState = 101;
else if( prev_state & 0x0004 ) oldThreadState = 105;
else if( prev_state & 0x0008 ) oldThreadState = 106;
else if( prev_state & 0x0010 ) oldThreadState = 108;
else if( prev_state & 0x0020 ) oldThreadState = 109;
else if( prev_state & 0x0040 ) oldThreadState = 110;
else if( prev_state & 0x0080 ) oldThreadState = 102;
else oldThreadState = 103;
TracyLfqPrepare( QueueType::ContextSwitch );
MemWrite( &item->contextSwitch.time, t0 );
MemWrite( &item->contextSwitch.oldThread, prev_pid );
MemWrite( &item->contextSwitch.newThread, next_pid );
MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) );
MemWrite( &item->contextSwitch.reason, reason );
MemWrite( &item->contextSwitch.state, state );
MemWrite( &item->contextSwitch.oldThreadWaitReason, oldThreadWaitReason );
MemWrite( &item->contextSwitch.oldThreadState, oldThreadState );
MemWrite( &item->contextSwitch.previousCState, uint8_t( 0 ) );
MemWrite( &item->contextSwitch.newThreadPriority, int8_t( next_prio ) );
MemWrite( &item->contextSwitch.oldThreadPriority, int8_t( prev_prio ) );
TracyLfqCommit;
if( cnt > 0 && prev_pid != 0 && CurrentProcOwnsThread( prev_pid ) )
@@ -1450,27 +1524,33 @@ void SysTraceWorker( void* ptr )
TracyLfqCommit;
}
}
else if( rid == EventWakeup )
else if( rid == EventWaking)
{
// See /sys/kernel/debug/tracing/events/sched/sched_waking/format
// Layout:
// u64 time
// u64 time // PERF_SAMPLE_TIME
// u32 size
// u8 data[size]
// Data:
// u8 hdr[8]
// u8 comm[16]
// u32 pid
// u32 prio
// u64 target_cpu
offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16;
// i32 prio
// i32 target_cpu
const uint32_t dataOffset = sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t );
offset += dataOffset + 8 + 16;
uint32_t pid;
ring.Read( &pid, offset, sizeof( uint32_t ) );
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, t0 );
MemWrite( &item->threadWakeup.thread, pid );
MemWrite( &item->threadWakeup.cpu, (uint8_t)ring.GetCpu() );
int8_t adjustReason = -1; // Does not exist on Linux
int8_t adjustIncrement = 0; // Should perhaps store the new prio?
MemWrite( &item->threadWakeup.adjustReason, adjustReason );
MemWrite( &item->threadWakeup.adjustIncrement, adjustIncrement );
TracyLfqCommit;
}
else
+2 -2
View File
@@ -2,8 +2,8 @@
#define __TRACYSYSTRACE_HPP__
#if !defined TRACY_NO_SYSTEM_TRACING && ( defined _WIN32 || defined __linux__ )
# include "../common/TracyUwp.hpp"
# ifndef TRACY_UWP
# include "../common/TracyWinFamily.hpp"
# if !defined TRACY_WIN32_NO_DESKTOP
# define TRACY_HAS_SYSTEM_TRACING
# endif
#endif
+4 -2
View File
@@ -690,7 +690,9 @@ static pthread_key_t _memory_thread_heap;
# define _Thread_local __declspec(thread)
# define TLS_MODEL
# else
# ifndef __HAIKU__
# if defined(__ANDROID__) && __ANDROID_API__ >= 29 && defined(__NDK_MAJOR__) && __NDK_MAJOR__ >= 26
# define TLS_MODEL __attribute__((tls_model("local-dynamic")))
# elif !defined(__HAIKU__)
# define TLS_MODEL __attribute__((tls_model("initial-exec")))
# else
# define TLS_MODEL
@@ -2778,7 +2780,7 @@ rpmalloc_initialize_config(const rpmalloc_config_t* config) {
_memory_huge_pages = 1;
}
#if PLATFORM_WINDOWS
#if PLATFORM_WINDOWS && !defined TRACY_GDK
if (_memory_config.enable_huge_pages) {
HANDLE token = 0;
size_t large_page_minimum = GetLargePageMinimum();