update tracy from 11.0 to 13.1 and fix build with tracy enabled
This commit is contained in:
@@ -24,15 +24,33 @@
|
||||
# pragma warning( disable : 4091 )
|
||||
# endif
|
||||
# include <dbghelp.h>
|
||||
# pragma comment( lib, "dbghelp.lib" )
|
||||
# ifdef _MSC_VER
|
||||
# pragma warning( pop )
|
||||
# endif
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
#elif defined(TRACY_USE_LIBBACKTRACE)
|
||||
|
||||
# include "../libbacktrace/backtrace.hpp"
|
||||
# include <algorithm>
|
||||
# include <dlfcn.h>
|
||||
# include <cxxabi.h>
|
||||
# include <stdlib.h>
|
||||
|
||||
// Implementation files
|
||||
# include "../libbacktrace/alloc.cpp"
|
||||
# include "../libbacktrace/dwarf.cpp"
|
||||
# include "../libbacktrace/fileline.cpp"
|
||||
# include "../libbacktrace/mmapio.cpp"
|
||||
# include "../libbacktrace/posix.cpp"
|
||||
# include "../libbacktrace/sort.cpp"
|
||||
# include "../libbacktrace/state.cpp"
|
||||
# if TRACY_HAS_CALLSTACK == 4
|
||||
# include "../libbacktrace/macho.cpp"
|
||||
# else
|
||||
# include "../libbacktrace/elf.cpp"
|
||||
# endif
|
||||
# include "../common/TracyStackFrames.cpp"
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 5
|
||||
# include <dlfcn.h>
|
||||
# include <cxxabi.h>
|
||||
@@ -53,7 +71,7 @@ extern "C"
|
||||
};
|
||||
#endif
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 5 || TRACY_HAS_CALLSTACK == 6
|
||||
#if defined(TRACY_USE_LIBBACKTRACE) || TRACY_HAS_CALLSTACK == 5
|
||||
// If you want to use your own demangling functionality (e.g. for another language),
|
||||
// define TRACY_DEMANGLE and provide your own implementation of the __tracy_demangle
|
||||
// function. The input parameter is a function name. The demangle function must
|
||||
@@ -91,94 +109,147 @@ extern "C" const char* ___tracy_demangle( const char* mangled )
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 3
|
||||
# define TRACY_USE_IMAGE_CACHE
|
||||
#if defined(TRACY_USE_LIBBACKTRACE) && TRACY_HAS_CALLSTACK != 4 // dl_iterate_phdr is required for the current image cache. Need to move it to libbacktrace?
|
||||
# define TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
# include <link.h>
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef TRACY_USE_IMAGE_CACHE
|
||||
// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths
|
||||
// so we can quickly determine which image an address falls into.
|
||||
// We refresh this cache only when we hit an address that doesn't fall into any known range.
|
||||
static bool IsKernelAddress(uint64_t addr) {
|
||||
return (addr >> 63) != 0;
|
||||
}
|
||||
|
||||
void DestroyImageEntry( ImageEntry& entry )
|
||||
{
|
||||
tracy_free( entry.m_path );
|
||||
tracy_free( entry.m_name );
|
||||
}
|
||||
|
||||
class ImageCache
|
||||
{
|
||||
public:
|
||||
struct ImageEntry
|
||||
|
||||
ImageCache( size_t imageCacheCapacity = 512 )
|
||||
: m_images( imageCacheCapacity )
|
||||
{
|
||||
void* m_startAddress = nullptr;
|
||||
void* m_endAddress = nullptr;
|
||||
char* m_name = nullptr;
|
||||
};
|
||||
|
||||
ImageCache()
|
||||
: m_images( 512 )
|
||||
{
|
||||
Refresh();
|
||||
}
|
||||
|
||||
~ImageCache()
|
||||
{
|
||||
Clear();
|
||||
}
|
||||
|
||||
const ImageEntry* GetImageForAddress( void* address )
|
||||
|
||||
ImageEntry* AddEntry( const ImageEntry& entry )
|
||||
{
|
||||
const ImageEntry* entry = GetImageForAddressImpl( address );
|
||||
if( m_sorted ) m_sorted = m_images.empty() || ( entry.m_startAddress < m_images.back().m_startAddress );
|
||||
ImageEntry* newEntry = m_images.push_next();
|
||||
*newEntry = entry;
|
||||
return newEntry;
|
||||
}
|
||||
|
||||
const ImageEntry* GetImageForAddress( uint64_t address )
|
||||
{
|
||||
Sort();
|
||||
|
||||
auto it = std::lower_bound( m_images.begin(), m_images.end(), address,
|
||||
[]( const ImageEntry& lhs, const uint64_t rhs ) { return lhs.m_startAddress > rhs; } );
|
||||
|
||||
if( it != m_images.end() && address < it->m_endAddress )
|
||||
{
|
||||
return it;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Sort()
|
||||
{
|
||||
if( m_sorted ) return;
|
||||
|
||||
std::sort( m_images.begin(), m_images.end(),
|
||||
[]( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } );
|
||||
m_sorted = true;
|
||||
}
|
||||
|
||||
void Clear()
|
||||
{
|
||||
for( ImageEntry& entry : m_images )
|
||||
{
|
||||
DestroyImageEntry( entry );
|
||||
}
|
||||
|
||||
m_sorted = true;
|
||||
m_images.clear();
|
||||
}
|
||||
|
||||
bool ContainsImage( uint64_t startAddress ) const
|
||||
{
|
||||
return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } );
|
||||
}
|
||||
protected:
|
||||
tracy::FastVector<ImageEntry> m_images;
|
||||
bool m_sorted = true;
|
||||
};
|
||||
|
||||
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths
|
||||
// so we can quickly determine which image an address falls into.
|
||||
// We refresh this cache only when we hit an address that doesn't fall into any known range.
|
||||
class ImageCacheDlIteratePhdr : public ImageCache
|
||||
{
|
||||
public:
|
||||
|
||||
ImageCacheDlIteratePhdr()
|
||||
{
|
||||
Refresh();
|
||||
}
|
||||
|
||||
~ImageCacheDlIteratePhdr()
|
||||
{
|
||||
}
|
||||
|
||||
const ImageEntry* GetImageForAddress( uint64_t address )
|
||||
{
|
||||
const ImageEntry* entry = ImageCache::GetImageForAddress( address );
|
||||
if( !entry )
|
||||
{
|
||||
Refresh();
|
||||
return GetImageForAddressImpl( address );
|
||||
return ImageCache::GetImageForAddress( address );
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
private:
|
||||
tracy::FastVector<ImageEntry> m_images;
|
||||
bool m_updated = false;
|
||||
bool m_haveMainImageName = false;
|
||||
|
||||
static int Callback( struct dl_phdr_info* info, size_t size, void* data )
|
||||
{
|
||||
ImageCache* cache = reinterpret_cast<ImageCache*>( data );
|
||||
ImageCacheDlIteratePhdr* cache = reinterpret_cast<ImageCacheDlIteratePhdr*>( data );
|
||||
|
||||
const auto startAddress = reinterpret_cast<void*>( info->dlpi_addr );
|
||||
if( cache->Contains( startAddress ) ) return 0;
|
||||
const auto startAddress = static_cast<uint64_t>( info->dlpi_addr );
|
||||
if( cache->ContainsImage( startAddress ) ) return 0;
|
||||
|
||||
const uint32_t headerCount = info->dlpi_phnum;
|
||||
assert( headerCount > 0);
|
||||
const auto endAddress = reinterpret_cast<void*>( info->dlpi_addr +
|
||||
const auto endAddress = static_cast<uint64_t>( info->dlpi_addr +
|
||||
info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz);
|
||||
|
||||
ImageEntry* image = cache->m_images.push_next();
|
||||
image->m_startAddress = startAddress;
|
||||
image->m_endAddress = endAddress;
|
||||
ImageEntry image{};
|
||||
image.m_startAddress = startAddress;
|
||||
image.m_endAddress = endAddress;
|
||||
|
||||
// the base executable name isn't provided when iterating with dl_iterate_phdr,
|
||||
// we will have to patch the executable image name outside this callback
|
||||
if( info->dlpi_name && info->dlpi_name[0] != '\0' )
|
||||
{
|
||||
size_t sz = strlen( info->dlpi_name ) + 1;
|
||||
image->m_name = (char*)tracy_malloc( sz );
|
||||
memcpy( image->m_name, info->dlpi_name, sz );
|
||||
}
|
||||
else
|
||||
{
|
||||
image->m_name = nullptr;
|
||||
}
|
||||
image.m_name = info->dlpi_name && info->dlpi_name[0] != '\0' ? CopyStringFast( info->dlpi_name ) : nullptr;
|
||||
|
||||
cache->AddEntry( image );
|
||||
cache->m_updated = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool Contains( void* startAddress ) const
|
||||
{
|
||||
return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } );
|
||||
}
|
||||
|
||||
void Refresh()
|
||||
{
|
||||
m_updated = false;
|
||||
@@ -186,9 +257,7 @@ private:
|
||||
|
||||
if( m_updated )
|
||||
{
|
||||
std::sort( m_images.begin(), m_images.end(),
|
||||
[]( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } );
|
||||
|
||||
Sort();
|
||||
// patch the main executable image name here, as calling dl_* functions inside the dl_iterate_phdr callback might cause deadlocks
|
||||
UpdateMainImageName();
|
||||
}
|
||||
@@ -223,31 +292,45 @@ private:
|
||||
|
||||
m_haveMainImageName = true;
|
||||
}
|
||||
|
||||
const ImageEntry* GetImageForAddressImpl( void* address ) const
|
||||
{
|
||||
auto it = std::lower_bound( m_images.begin(), m_images.end(), address,
|
||||
[]( const ImageEntry& lhs, const void* rhs ) { return lhs.m_startAddress > rhs; } );
|
||||
|
||||
if( it != m_images.end() && address < it->m_endAddress )
|
||||
{
|
||||
return it;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Clear()
|
||||
{
|
||||
for( ImageEntry& entry : m_images )
|
||||
{
|
||||
tracy_free( entry.m_name );
|
||||
}
|
||||
|
||||
m_images.clear();
|
||||
ImageCache::Clear();
|
||||
m_haveMainImageName = false;
|
||||
}
|
||||
};
|
||||
#endif //#ifdef TRACY_USE_IMAGE_CACHE
|
||||
using UserlandImageCache = ImageCacheDlIteratePhdr;
|
||||
#else
|
||||
using UserlandImageCache = ImageCache;
|
||||
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
|
||||
static UserlandImageCache* s_imageCache;
|
||||
static ImageCache* s_krnlCache;
|
||||
|
||||
void CreateImageCaches()
|
||||
{
|
||||
assert( s_imageCache == nullptr && s_krnlCache == nullptr );
|
||||
s_imageCache = new ( tracy_malloc( sizeof( UserlandImageCache ) ) ) UserlandImageCache();
|
||||
s_krnlCache = new ( tracy_malloc( sizeof( ImageCache ) ) ) ImageCache();
|
||||
}
|
||||
|
||||
void DestroyImageCaches()
|
||||
{
|
||||
if( s_krnlCache != nullptr )
|
||||
{
|
||||
s_krnlCache->~ImageCache();
|
||||
tracy_free( s_krnlCache );
|
||||
s_krnlCache = nullptr;
|
||||
}
|
||||
|
||||
if( s_imageCache != nullptr )
|
||||
{
|
||||
s_imageCache->~UserlandImageCache();
|
||||
tracy_free( s_imageCache );
|
||||
s_imageCache = nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// when "TRACY_SYMBOL_OFFLINE_RESOLVE" is set, instead of fully resolving symbols at runtime,
|
||||
// simply resolve the offset and image name (which will be enough the resolving to be done offline)
|
||||
@@ -282,32 +365,17 @@ extern "C"
|
||||
t_SymFromInlineContext _SymFromInlineContext = 0;
|
||||
t_SymGetLineFromInlineContext _SymGetLineFromInlineContext = 0;
|
||||
|
||||
TRACY_API ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain = 0;
|
||||
typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
|
||||
___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChainPtr = nullptr;
|
||||
TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void** callers, unsigned long count, unsigned long flags)
|
||||
{
|
||||
return ___tracy_RtlWalkFrameChainPtr(callers, count, flags);
|
||||
}
|
||||
}
|
||||
|
||||
struct ModuleCache
|
||||
{
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
char* name;
|
||||
};
|
||||
|
||||
static FastVector<ModuleCache>* s_modCache;
|
||||
|
||||
|
||||
struct KernelDriver
|
||||
{
|
||||
uint64_t addr;
|
||||
const char* mod;
|
||||
const char* path;
|
||||
};
|
||||
|
||||
KernelDriver* s_krnlCache = nullptr;
|
||||
size_t s_krnlCacheCnt;
|
||||
|
||||
void InitCallstackCritical()
|
||||
{
|
||||
___tracy_RtlWalkFrameChain = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" );
|
||||
___tracy_RtlWalkFrameChainPtr = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" );
|
||||
}
|
||||
|
||||
void DbgHelpInit()
|
||||
@@ -338,75 +406,57 @@ DWORD64 DbgHelpLoadSymbolsForModule( const char* imageName, uint64_t baseOfDll,
|
||||
return SymLoadModuleEx( GetCurrentProcess(), nullptr, imageName, nullptr, baseOfDll, bllSize, nullptr, 0 );
|
||||
}
|
||||
|
||||
ModuleCache* LoadSymbolsForModuleAndCache( const char* imageName, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
|
||||
char* FormatImageName( const char* imageName, uint32_t imageNameLength )
|
||||
{
|
||||
DbgHelpLoadSymbolsForModule( imageName, baseOfDll, dllSize );
|
||||
|
||||
ModuleCache* cachedModule = s_modCache->push_next();
|
||||
cachedModule->start = baseOfDll;
|
||||
cachedModule->end = baseOfDll + dllSize;
|
||||
|
||||
// when doing offline symbol resolution, we must store the full path of the dll for the resolving to work
|
||||
if( s_shouldResolveSymbolsOffline )
|
||||
{
|
||||
cachedModule->name = (char*)tracy_malloc_fast(imageNameLength + 1);
|
||||
memcpy(cachedModule->name, imageName, imageNameLength);
|
||||
cachedModule->name[imageNameLength] = '\0';
|
||||
return CopyStringFast( imageName, imageNameLength );
|
||||
}
|
||||
else
|
||||
{
|
||||
auto ptr = imageName + imageNameLength;
|
||||
while (ptr > imageName && *ptr != '\\' && *ptr != '/') ptr--;
|
||||
if (ptr > imageName) ptr++;
|
||||
const char* ptr = imageName + imageNameLength;
|
||||
while( ptr > imageName && *ptr != '\\' && *ptr != '/' ) ptr--;
|
||||
if( ptr > imageName ) ptr++;
|
||||
const auto namelen = imageName + imageNameLength - ptr;
|
||||
cachedModule->name = (char*)tracy_malloc_fast(namelen + 3);
|
||||
cachedModule->name[0] = '[';
|
||||
memcpy(cachedModule->name + 1, ptr, namelen);
|
||||
cachedModule->name[namelen + 1] = ']';
|
||||
cachedModule->name[namelen + 2] = '\0';
|
||||
}
|
||||
|
||||
return cachedModule;
|
||||
char* alloc = (char*)tracy_malloc_fast( namelen + 3 );
|
||||
alloc[0] = '[';
|
||||
memcpy( alloc + 1, ptr, namelen );
|
||||
alloc[namelen + 1] = ']';
|
||||
alloc[namelen + 2] = '\0';
|
||||
return alloc;
|
||||
}
|
||||
}
|
||||
|
||||
void InitCallstack()
|
||||
ImageEntry* CacheModuleInfo( const char* imagePath, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
|
||||
{
|
||||
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
|
||||
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
|
||||
#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
|
||||
if( s_shouldResolveSymbolsOffline )
|
||||
{
|
||||
TracyDebug("TRACY: enabling offline symbol resolving!\n");
|
||||
}
|
||||
ImageEntry moduleEntry = {};
|
||||
moduleEntry.m_startAddress = baseOfDll;
|
||||
moduleEntry.m_endAddress = baseOfDll + dllSize;
|
||||
moduleEntry.m_path = CopyStringFast( imagePath, imageNameLength );
|
||||
moduleEntry.m_name = FormatImageName( imagePath, imageNameLength );
|
||||
|
||||
DbgHelpInit();
|
||||
return s_imageCache->AddEntry( moduleEntry );
|
||||
}
|
||||
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_LOCK;
|
||||
#endif
|
||||
|
||||
// use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver
|
||||
// and process module symbol loading at startup time - they will be loaded on demand later
|
||||
// Sometimes this process can take a very long time and prevent resolving callstack frames
|
||||
// symbols during that time.
|
||||
const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" );
|
||||
const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' );
|
||||
if ( !initTimeModuleLoad )
|
||||
{
|
||||
TracyDebug("TRACY: skipping init time dbghelper module load\n");
|
||||
}
|
||||
ImageEntry* LoadSymbolsForModuleAndCache( const char* imagePath, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
|
||||
{
|
||||
DbgHelpLoadSymbolsForModule( imagePath, baseOfDll, dllSize );
|
||||
return CacheModuleInfo( imagePath, imageNameLength, baseOfDll, dllSize );
|
||||
}
|
||||
|
||||
static void CacheProcessDrivers()
|
||||
{
|
||||
DWORD needed;
|
||||
LPVOID dev[4096];
|
||||
if( initTimeModuleLoad && EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 )
|
||||
if( EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 )
|
||||
{
|
||||
char windir[MAX_PATH];
|
||||
if( !GetWindowsDirectoryA( windir, sizeof( windir ) ) ) memcpy( windir, "c:\\windows", 11 );
|
||||
const auto windirlen = strlen( windir );
|
||||
|
||||
const auto sz = needed / sizeof( LPVOID );
|
||||
s_krnlCache = (KernelDriver*)tracy_malloc( sizeof(KernelDriver) * sz );
|
||||
int cnt = 0;
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
{
|
||||
char fn[MAX_PATH];
|
||||
@@ -417,7 +467,12 @@ void InitCallstack()
|
||||
buf[0] = '<';
|
||||
memcpy( buf+1, fn, len );
|
||||
memcpy( buf+len+1, ">", 2 );
|
||||
s_krnlCache[cnt] = KernelDriver { (uint64_t)dev[i], buf };
|
||||
|
||||
ImageEntry kernelDriver{};
|
||||
kernelDriver.m_startAddress = (uint64_t)dev[i];
|
||||
kernelDriver.m_endAddress = 0;
|
||||
kernelDriver.m_name = buf;
|
||||
kernelDriver.m_path = nullptr;
|
||||
|
||||
const auto len = GetDeviceDriverFileNameA( dev[i], fn, sizeof( fn ) );
|
||||
if( len != 0 )
|
||||
@@ -433,27 +488,23 @@ void InitCallstack()
|
||||
}
|
||||
|
||||
DbgHelpLoadSymbolsForModule( path, (DWORD64)dev[i], 0 );
|
||||
|
||||
const auto psz = strlen( path );
|
||||
auto pptr = (char*)tracy_malloc_fast( psz+1 );
|
||||
memcpy( pptr, path, psz );
|
||||
pptr[psz] = '\0';
|
||||
s_krnlCache[cnt].path = pptr;
|
||||
|
||||
kernelDriver.m_path = CopyString( path );
|
||||
}
|
||||
|
||||
cnt++;
|
||||
s_krnlCache->AddEntry(kernelDriver);
|
||||
}
|
||||
}
|
||||
s_krnlCacheCnt = cnt;
|
||||
std::sort( s_krnlCache, s_krnlCache + s_krnlCacheCnt, []( const KernelDriver& lhs, const KernelDriver& rhs ) { return lhs.addr > rhs.addr; } );
|
||||
s_krnlCache->Sort();
|
||||
}
|
||||
}
|
||||
|
||||
s_modCache = (FastVector<ModuleCache>*)tracy_malloc( sizeof( FastVector<ModuleCache> ) );
|
||||
new(s_modCache) FastVector<ModuleCache>( 512 );
|
||||
|
||||
static void CacheProcessModules()
|
||||
{
|
||||
DWORD needed;
|
||||
HANDLE proc = GetCurrentProcess();
|
||||
HMODULE mod[1024];
|
||||
if( initTimeModuleLoad && EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
|
||||
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
|
||||
{
|
||||
const auto sz = needed / sizeof( HMODULE );
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
@@ -472,6 +523,41 @@ void InitCallstack()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InitCallstack()
|
||||
{
|
||||
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
|
||||
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
|
||||
#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
|
||||
if( s_shouldResolveSymbolsOffline )
|
||||
{
|
||||
TracyDebug("TRACY: enabling offline symbol resolving!\n");
|
||||
}
|
||||
|
||||
CreateImageCaches();
|
||||
|
||||
DbgHelpInit();
|
||||
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_LOCK;
|
||||
#endif
|
||||
|
||||
// use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver
|
||||
// and process module symbol loading at startup time - they will be loaded on demand later
|
||||
// Sometimes this process can take a very long time and prevent resolving callstack frames
|
||||
// symbols during that time.
|
||||
const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" );
|
||||
const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' );
|
||||
if ( !initTimeModuleLoad )
|
||||
{
|
||||
TracyDebug("TRACY: skipping init time dbghelper module load\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
CacheProcessDrivers();
|
||||
CacheProcessModules();
|
||||
}
|
||||
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_UNLOCK;
|
||||
@@ -480,6 +566,7 @@ void InitCallstack()
|
||||
|
||||
void EndCallstack()
|
||||
{
|
||||
DestroyImageCaches();
|
||||
}
|
||||
|
||||
const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
@@ -514,11 +601,11 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
|
||||
const char* GetKernelModulePath( uint64_t addr )
|
||||
{
|
||||
assert( addr >> 63 != 0 );
|
||||
assert( IsKernelAddress( addr ) );
|
||||
if( !s_krnlCache ) return nullptr;
|
||||
auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } );
|
||||
if( it == s_krnlCache + s_krnlCacheCnt ) return nullptr;
|
||||
return it->path;
|
||||
const ImageEntry* imageEntry = s_krnlCache->GetImageForAddress( addr );
|
||||
if( imageEntry ) return imageEntry->m_path;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
struct ModuleNameAndBaseAddress
|
||||
@@ -529,51 +616,38 @@ struct ModuleNameAndBaseAddress
|
||||
|
||||
ModuleNameAndBaseAddress GetModuleNameAndPrepareSymbols( uint64_t addr )
|
||||
{
|
||||
if( ( addr >> 63 ) != 0 )
|
||||
if( IsKernelAddress( addr ) )
|
||||
{
|
||||
if( s_krnlCache )
|
||||
{
|
||||
auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } );
|
||||
if( it != s_krnlCache + s_krnlCacheCnt )
|
||||
{
|
||||
return ModuleNameAndBaseAddress{ it->mod, it->addr };
|
||||
}
|
||||
}
|
||||
const ImageEntry* entry = s_krnlCache->GetImageForAddress( addr );
|
||||
if( entry != nullptr ) return ModuleNameAndBaseAddress{ entry->m_name, entry->m_startAddress };
|
||||
return ModuleNameAndBaseAddress{ "<kernel>", addr };
|
||||
}
|
||||
|
||||
for( auto& v : *s_modCache )
|
||||
{
|
||||
if( addr >= v.start && addr < v.end )
|
||||
{
|
||||
return ModuleNameAndBaseAddress{ v.name, v.start };
|
||||
}
|
||||
}
|
||||
const ImageEntry* entry = s_imageCache->GetImageForAddress( addr );
|
||||
if( entry != nullptr ) return ModuleNameAndBaseAddress{ entry->m_name, entry->m_startAddress };
|
||||
|
||||
HMODULE mod[1024];
|
||||
DWORD needed;
|
||||
HANDLE proc = GetCurrentProcess();
|
||||
// Do not use FreeLibrary because we set the flag GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT
|
||||
// see https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-getmodulehandleexa to get more information
|
||||
constexpr DWORD flag = GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT;
|
||||
HMODULE mod = NULL;
|
||||
|
||||
InitRpmalloc();
|
||||
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
|
||||
if( GetModuleHandleExA( flag, (char*)addr, &mod ) != 0 )
|
||||
{
|
||||
const auto sz = needed / sizeof( HMODULE );
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
MODULEINFO info;
|
||||
if( GetModuleInformation( proc, mod, &info, sizeof( info ) ) != 0 )
|
||||
{
|
||||
MODULEINFO info;
|
||||
if( GetModuleInformation( proc, mod[i], &info, sizeof( info ) ) != 0 )
|
||||
const auto base = uint64_t( info.lpBaseOfDll );
|
||||
if( addr >= base && addr < ( base + info.SizeOfImage ) )
|
||||
{
|
||||
const auto base = uint64_t( info.lpBaseOfDll );
|
||||
if( addr >= base && addr < base + info.SizeOfImage )
|
||||
char name[1024];
|
||||
const auto nameLength = GetModuleFileNameA( mod, name, sizeof( name ) );
|
||||
if( nameLength > 0 )
|
||||
{
|
||||
char name[1024];
|
||||
const auto nameLength = GetModuleFileNameA( mod[i], name, 1021 );
|
||||
if( nameLength > 0 )
|
||||
{
|
||||
// since this is the first time we encounter this module, load its symbols (needed for modules loaded after SymInitialize)
|
||||
ModuleCache* cachedModule = LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage );
|
||||
return ModuleNameAndBaseAddress{ cachedModule->name, cachedModule->start };
|
||||
}
|
||||
// since this is the first time we encounter this module, load its symbols (needed for modules loaded after SymInitialize)
|
||||
ImageEntry* cachedModule = LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage );
|
||||
return ModuleNameAndBaseAddress{ cachedModule->m_name, cachedModule->m_startAddress };
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -753,7 +827,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
return { cb_data, uint8_t( cb_num ), moduleNameAndAddress.name };
|
||||
}
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
#elif defined(TRACY_USE_LIBBACKTRACE)
|
||||
|
||||
enum { MaxCbTrace = 64 };
|
||||
|
||||
@@ -762,9 +836,6 @@ struct backtrace_state* cb_bts = nullptr;
|
||||
int cb_num;
|
||||
CallstackEntry cb_data[MaxCbTrace];
|
||||
int cb_fixup;
|
||||
#ifdef TRACY_USE_IMAGE_CACHE
|
||||
static ImageCache* s_imageCache = nullptr;
|
||||
#endif //#ifdef TRACY_USE_IMAGE_CACHE
|
||||
|
||||
#ifdef TRACY_DEBUGINFOD
|
||||
debuginfod_client* s_debuginfod;
|
||||
@@ -959,10 +1030,9 @@ void InitCallstack()
|
||||
{
|
||||
InitRpmalloc();
|
||||
|
||||
#ifdef TRACY_USE_IMAGE_CACHE
|
||||
s_imageCache = (ImageCache*)tracy_malloc( sizeof( ImageCache ) );
|
||||
new(s_imageCache) ImageCache();
|
||||
#endif //#ifdef TRACY_USE_IMAGE_CACHE
|
||||
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
CreateImageCaches();
|
||||
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
|
||||
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
|
||||
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
|
||||
@@ -1056,13 +1126,9 @@ debuginfod_client* GetDebuginfodClient()
|
||||
|
||||
void EndCallstack()
|
||||
{
|
||||
#ifdef TRACY_USE_IMAGE_CACHE
|
||||
if( s_imageCache )
|
||||
{
|
||||
s_imageCache->~ImageCache();
|
||||
tracy_free( s_imageCache );
|
||||
}
|
||||
#endif //#ifdef TRACY_USE_IMAGE_CACHE
|
||||
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
DestroyImageCaches();
|
||||
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
#ifndef TRACY_DEMANGLE
|
||||
___tracy_free_demangle_buffer();
|
||||
#endif
|
||||
@@ -1252,17 +1318,17 @@ void GetSymbolForOfflineResolve(void* address, uint64_t imageBaseAddress, Callst
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
{
|
||||
InitRpmalloc();
|
||||
if( ptr >> 63 == 0 )
|
||||
if ( !IsKernelAddress( ptr ) )
|
||||
{
|
||||
const char* imageName = nullptr;
|
||||
uint64_t imageBaseAddress = 0x0;
|
||||
|
||||
#ifdef TRACY_USE_IMAGE_CACHE
|
||||
const auto* image = s_imageCache->GetImageForAddress((void*)ptr);
|
||||
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
const auto* image = s_imageCache->GetImageForAddress( ptr );
|
||||
if( image )
|
||||
{
|
||||
imageName = image->m_name;
|
||||
imageBaseAddress = uint64_t(image->m_startAddress);
|
||||
imageBaseAddress = uint64_t( image->m_startAddress );
|
||||
}
|
||||
#else
|
||||
Dl_info dlinfo;
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
# endif
|
||||
|
||||
# if defined _WIN32
|
||||
# include "../common/TracyUwp.hpp"
|
||||
# ifndef TRACY_UWP
|
||||
# include "../common/TracyWinFamily.hpp"
|
||||
# if !defined TRACY_WIN32_NO_DESKTOP
|
||||
# define TRACY_HAS_CALLSTACK 1
|
||||
# endif
|
||||
# elif defined __ANDROID__
|
||||
@@ -30,6 +30,10 @@
|
||||
# define TRACY_HAS_CALLSTACK 6
|
||||
# endif
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
#define TRACY_USE_LIBBACKTRACE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,15 +1,31 @@
|
||||
#ifndef __TRACYCALLSTACK_HPP__
|
||||
#define __TRACYCALLSTACK_HPP__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../common/TracyApi.h"
|
||||
#include "../common/TracyForceInline.hpp"
|
||||
#include "TracyCallstack.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct ImageEntry
|
||||
{
|
||||
uint64_t m_startAddress = 0;
|
||||
uint64_t m_endAddress = 0;
|
||||
char* m_name = nullptr;
|
||||
char* m_path = nullptr;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#ifndef TRACY_HAS_CALLSTACK
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
static tracy_force_inline void* Callstack( int /*depth*/ ) { return nullptr; }
|
||||
static constexpr bool has_callstack() { return false; }
|
||||
static tracy_force_inline void* Callstack( int32_t /*depth*/ ) { return nullptr; }
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -38,6 +54,8 @@ static tracy_force_inline void* Callstack( int /*depth*/ ) { return nullptr; }
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static constexpr bool has_callstack() { return true; }
|
||||
|
||||
struct CallstackSymbolData
|
||||
{
|
||||
const char* file;
|
||||
@@ -79,11 +97,10 @@ debuginfod_client* GetDebuginfodClient();
|
||||
|
||||
extern "C"
|
||||
{
|
||||
typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
|
||||
TRACY_API extern ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain;
|
||||
TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void**, unsigned long, unsigned long );
|
||||
}
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
static tracy_force_inline void* Callstack( int32_t depth )
|
||||
{
|
||||
assert( depth >= 1 && depth < 63 );
|
||||
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
|
||||
@@ -112,7 +129,7 @@ static _Unwind_Reason_Code tracy_unwind_callback( struct _Unwind_Context* ctx, v
|
||||
return _URC_NO_REASON;
|
||||
}
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
static tracy_force_inline void* Callstack( int32_t depth )
|
||||
{
|
||||
assert( depth >= 1 && depth < 63 );
|
||||
|
||||
@@ -127,7 +144,7 @@ static tracy_force_inline void* Callstack( int depth )
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
static tracy_force_inline void* Callstack( int32_t depth )
|
||||
{
|
||||
assert( depth >= 1 );
|
||||
|
||||
|
||||
@@ -219,8 +219,9 @@ public:
|
||||
m_ctx.CustomName( name, size );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
|
||||
private:
|
||||
LockableCtx m_ctx;
|
||||
};
|
||||
|
||||
@@ -535,8 +536,9 @@ public:
|
||||
m_ctx.CustomName( name, size );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
|
||||
private:
|
||||
SharedLockableCtx m_ctx;
|
||||
};
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -58,6 +58,9 @@ TRACY_API bool IsProfilerStarted();
|
||||
# define TracyIsStarted true
|
||||
#endif
|
||||
|
||||
TRACY_API bool BeginSamplingProfiling();
|
||||
TRACY_API void EndSamplingProfiling();
|
||||
|
||||
class GpuCtx;
|
||||
class Profiler;
|
||||
class Socket;
|
||||
@@ -114,11 +117,11 @@ struct LuaZoneState
|
||||
|
||||
|
||||
#define TracyLfqPrepare( _type ) \
|
||||
moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
|
||||
auto __token = GetToken(); \
|
||||
tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
|
||||
auto __token = tracy::GetToken(); \
|
||||
auto& __tail = __token->get_tail_index(); \
|
||||
auto item = __token->enqueue_begin( __magic ); \
|
||||
MemWrite( &item->hdr.type, _type );
|
||||
tracy::MemWrite( &item->hdr.type, _type );
|
||||
|
||||
#define TracyLfqCommit \
|
||||
__tail.store( __magic + 1, std::memory_order_release );
|
||||
@@ -136,11 +139,11 @@ struct LuaZoneState
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
# define TracyQueuePrepare( _type ) \
|
||||
auto item = Profiler::QueueSerial(); \
|
||||
MemWrite( &item->hdr.type, _type );
|
||||
auto item = tracy::Profiler::QueueSerial(); \
|
||||
tracy::MemWrite( &item->hdr.type, _type );
|
||||
# define TracyQueueCommit( _name ) \
|
||||
MemWrite( &item->_name.thread, GetThreadHandle() ); \
|
||||
Profiler::QueueSerialFinish();
|
||||
tracy::MemWrite( &item->_name.thread, tracy::GetThreadHandle() ); \
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
# define TracyQueuePrepareC( _type ) \
|
||||
auto item = tracy::Profiler::QueueSerial(); \
|
||||
tracy::MemWrite( &item->hdr.type, _type );
|
||||
@@ -252,6 +255,9 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
bool BeginSamplingProfiling();
|
||||
void EndSamplingProfiling();
|
||||
|
||||
tracy_force_inline uint32_t GetNextZoneId()
|
||||
{
|
||||
return m_zoneId.fetch_add( 1, std::memory_order_relaxed );
|
||||
@@ -387,58 +393,58 @@ public:
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt, size_t size, int callstack )
|
||||
static tracy_force_inline void Message( const char* txt, size_t size, int32_t callstack_depth )
|
||||
{
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
if( callstack_depth != 0 && has_callstack() )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
tracy::GetProfiler().SendCallstack( callstack_depth );
|
||||
}
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
|
||||
TracyQueuePrepare( callstack_depth == 0 ? QueueType::Message : QueueType::MessageCallstack );
|
||||
MemWrite( &item->messageFat.time, GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( messageFatThread );
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt, int callstack )
|
||||
static tracy_force_inline void Message( const char* txt, int32_t callstack_depth )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
if( callstack_depth != 0 && has_callstack() )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
tracy::GetProfiler().SendCallstack( callstack_depth );
|
||||
}
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
|
||||
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
|
||||
MemWrite( &item->messageLiteral.time, GetTime() );
|
||||
MemWrite( &item->messageLiteral.text, (uint64_t)txt );
|
||||
TracyQueueCommit( messageLiteralThread );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack )
|
||||
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int32_t callstack_depth )
|
||||
{
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
if( callstack_depth != 0 && has_callstack() )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
tracy::GetProfiler().SendCallstack( callstack_depth );
|
||||
}
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
|
||||
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
|
||||
MemWrite( &item->messageColorFat.time, GetTime() );
|
||||
MemWrite( &item->messageColorFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageColorFat.b, uint8_t( ( color ) & 0xFF ) );
|
||||
@@ -448,17 +454,17 @@ public:
|
||||
TracyQueueCommit( messageColorFatThread );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack )
|
||||
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int32_t callstack_depth )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
if( callstack_depth != 0 && has_callstack() )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
tracy::GetProfiler().SendCallstack( callstack_depth );
|
||||
}
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
|
||||
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
|
||||
MemWrite( &item->messageColorLiteral.time, GetTime() );
|
||||
MemWrite( &item->messageColorLiteral.text, (uint64_t)txt );
|
||||
MemWrite( &item->messageColorLiteral.b, uint8_t( ( color ) & 0xFF ) );
|
||||
@@ -510,29 +516,31 @@ public:
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure )
|
||||
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int32_t depth, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
MemAlloc( ptr, size, secure );
|
||||
#endif
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
|
||||
profiler.m_serialLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
MemAlloc( ptr, size, secure );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure )
|
||||
static tracy_force_inline void MemFreeCallstack( const void* ptr, int32_t depth, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
if( !ProfilerAllocatorAvailable() )
|
||||
@@ -540,23 +548,25 @@ public:
|
||||
MemFree( ptr, secure );
|
||||
return;
|
||||
}
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
MemFree( ptr, secure );
|
||||
#endif
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
|
||||
profiler.m_serialLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
MemFree( ptr, secure );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name )
|
||||
@@ -587,62 +597,101 @@ public:
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name )
|
||||
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int32_t depth, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
MemAllocNamed( ptr, size, secure, name );
|
||||
#endif
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
|
||||
profiler.m_serialLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
MemAllocNamed( ptr, size, secure, name );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name )
|
||||
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int32_t depth, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
MemFreeNamed( ptr, secure, name );
|
||||
#endif
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
|
||||
profiler.m_serialLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
MemFreeNamed( ptr, secure, name );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendCallstack( int depth )
|
||||
static tracy_force_inline void MemDiscard( const char* name, bool secure )
|
||||
{
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto ptr = Callstack( depth );
|
||||
TracyQueuePrepare( QueueType::Callstack );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
TracyQueueCommit( callstackFatThread );
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
GetProfiler().m_serialLock.lock();
|
||||
SendMemDiscard( QueueType::MemDiscard, thread, name );
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemDiscardCallstack( const char* name, bool secure, int32_t depth )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
GetProfiler().m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemDiscard( QueueType::MemDiscard, thread, name );
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
MemDiscard( name, secure );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendCallstack( int32_t depth )
|
||||
{
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto ptr = Callstack( depth );
|
||||
TracyQueuePrepare( QueueType::Callstack );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
TracyQueueCommit( callstackFatThread );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void ParameterRegister( ParameterCallback cb, void* data )
|
||||
@@ -677,6 +726,9 @@ public:
|
||||
#ifdef TRACY_FIBERS
|
||||
static tracy_force_inline void EnterFiber( const char* fiber, int32_t groupHint )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::FiberEnter );
|
||||
MemWrite( &item->fiberEnter.time, GetTime() );
|
||||
MemWrite( &item->fiberEnter.fiber, (uint64_t)fiber );
|
||||
@@ -686,13 +738,16 @@ public:
|
||||
|
||||
static tracy_force_inline void LeaveFiber()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::FiberLeave );
|
||||
MemWrite( &item->fiberLeave.time, GetTime() );
|
||||
TracyQueueCommit( fiberLeave );
|
||||
}
|
||||
#endif
|
||||
|
||||
void SendCallstack( int depth, const char* skipBefore );
|
||||
void SendCallstack( int32_t depth, const char* skipBefore );
|
||||
static void CutCallstack( void* callstack, const char* skipBefore );
|
||||
|
||||
static bool ShouldExit();
|
||||
@@ -800,7 +855,7 @@ private:
|
||||
|
||||
void InstallCrashHandler();
|
||||
void RemoveCrashHandler();
|
||||
|
||||
|
||||
void ClearQueues( tracy::moodycamel::ConsumerToken& token );
|
||||
void ClearSerial();
|
||||
DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token );
|
||||
@@ -833,6 +888,21 @@ private:
|
||||
m_bufferOffset += int( len );
|
||||
}
|
||||
|
||||
char* SafeCopyProlog( const char* p, size_t size );
|
||||
void SafeCopyEpilog( char* buf );
|
||||
|
||||
template<class Callable> // must be void( const char* buf, size_t size )
|
||||
bool WithSafeCopy( const char* p, size_t size, Callable&& callable )
|
||||
{
|
||||
if( char* buf = SafeCopyProlog( p, size ) )
|
||||
{
|
||||
callable( buf, size );
|
||||
SafeCopyEpilog( buf );
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SendData( const char* data, size_t len );
|
||||
void SendLongString( uint64_t ptr, const char* str, size_t len, QueueType type );
|
||||
void SendSourceLocation( uint64_t ptr );
|
||||
@@ -862,14 +932,13 @@ private:
|
||||
|
||||
static tracy_force_inline void SendCallstackSerial( void* ptr )
|
||||
{
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, QueueType::CallstackSerial );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
#else
|
||||
static_cast<void>(ptr); // unused
|
||||
#endif
|
||||
if( has_callstack() )
|
||||
{
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, QueueType::CallstackSerial );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemAlloc( QueueType type, const uint32_t thread, const void* ptr, size_t size )
|
||||
@@ -907,6 +976,18 @@ private:
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemDiscard( QueueType type, const uint32_t thread, const char* name )
|
||||
{
|
||||
assert( type == QueueType::MemDiscard || type == QueueType::MemDiscardCallstack );
|
||||
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, type );
|
||||
MemWrite( &item->memDiscard.time, GetTime() );
|
||||
MemWrite( &item->memDiscard.thread, thread );
|
||||
MemWrite( &item->memDiscard.name, (uint64_t)name );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemName( const char* name )
|
||||
{
|
||||
assert( name );
|
||||
@@ -922,7 +1003,6 @@ private:
|
||||
|
||||
double m_timerMul;
|
||||
uint64_t m_resolution;
|
||||
uint64_t m_delay;
|
||||
std::atomic<int64_t> m_timeBegin;
|
||||
uint32_t m_mainThread;
|
||||
uint64_t m_epoch, m_exectime;
|
||||
@@ -963,6 +1043,7 @@ private:
|
||||
std::atomic<bool> m_isConnected;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
std::atomic<uint64_t> m_connectionId;
|
||||
std::atomic<bool> m_symbolsBusy;
|
||||
|
||||
TracyMutex m_deferredLock;
|
||||
FastVector<QueueItem> m_deferredQueue;
|
||||
@@ -990,9 +1071,19 @@ private:
|
||||
char* m_queryData;
|
||||
char* m_queryDataPtr;
|
||||
|
||||
#if defined _WIN32
|
||||
void* m_exceptionHandler;
|
||||
#ifndef NDEBUG
|
||||
// m_safeSendBuffer and m_pipe should only be used by the Tracy Profiler thread; this ensures that in debug builds.
|
||||
std::atomic_bool m_inUse{ false };
|
||||
#endif
|
||||
char* m_safeSendBuffer;
|
||||
|
||||
#if defined _WIN32
|
||||
void* m_prevHandler;
|
||||
#else
|
||||
int m_pipe[2];
|
||||
int m_pipeBufSize;
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
struct {
|
||||
struct sigaction pwr, ill, fpe, segv, pipe, bus, abrt;
|
||||
|
||||
@@ -0,0 +1,556 @@
|
||||
#include "../server/tracy_robin_hood.h"
|
||||
#include "TracyProfiler.hpp"
|
||||
#include "TracyThread.hpp"
|
||||
#include "tracy/TracyC.h"
|
||||
#include <rocprofiler-sdk/registration.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <shared_mutex>
|
||||
#include <sstream>
|
||||
#include <time.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#define ROCPROFILER_CALL( result, msg ) \
|
||||
{ \
|
||||
rocprofiler_status_t CHECKSTATUS = result; \
|
||||
if( CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS ) \
|
||||
{ \
|
||||
std::string status_msg = rocprofiler_get_status_string( CHECKSTATUS ); \
|
||||
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg << " failed with error code " \
|
||||
<< CHECKSTATUS << ": " << status_msg << std::endl; \
|
||||
std::stringstream errmsg{}; \
|
||||
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" << status_msg \
|
||||
<< ")"; \
|
||||
throw std::runtime_error( errmsg.str() ); \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
|
||||
|
||||
struct DispatchData
|
||||
{
|
||||
int64_t launch_start;
|
||||
int64_t launch_end;
|
||||
uint32_t thread_id;
|
||||
uint16_t query_id;
|
||||
};
|
||||
|
||||
struct ToolData
|
||||
{
|
||||
uint32_t version;
|
||||
const char* runtime_version;
|
||||
uint32_t priority;
|
||||
rocprofiler_client_id_t client_id;
|
||||
uint8_t context_id;
|
||||
bool init;
|
||||
uint64_t query_id;
|
||||
int64_t previous_cpu_time;
|
||||
tracy::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data_t> client_kernels;
|
||||
tracy::unordered_map<rocprofiler_dispatch_id_t, DispatchData> dispatch_data;
|
||||
tracy::unordered_set<std::string> counter_names = { "SQ_WAVES", "GL2C_MISS", "GL2C_HIT" };
|
||||
std::unique_ptr<tracy::Thread> cal_thread;
|
||||
std::mutex mut{};
|
||||
};
|
||||
|
||||
using namespace tracy;
|
||||
|
||||
rocprofiler_context_id_t& get_client_ctx()
|
||||
{
|
||||
static rocprofiler_context_id_t ctx{ 0 };
|
||||
return ctx;
|
||||
}
|
||||
|
||||
const char* CTX_NAME = "rocprofv3";
|
||||
|
||||
uint8_t gpu_context_allocate( ToolData* data )
|
||||
{
|
||||
|
||||
timespec ts;
|
||||
clock_gettime( CLOCK_BOOTTIME, &ts );
|
||||
uint64_t cpu_timestamp = Profiler::GetTime();
|
||||
uint64_t gpu_timestamp = ( (uint64_t)ts.tv_sec * 1000000000 ) + ts.tv_nsec;
|
||||
float timestamp_period = 1.0f;
|
||||
data->previous_cpu_time = cpu_timestamp;
|
||||
|
||||
// Allocate the process-unique GPU context ID. There's a max of 255 available;
|
||||
// if we are recreating devices a lot we may exceed that. Don't do that, or
|
||||
// wrap around and get weird (but probably still usable) numbers.
|
||||
uint8_t context_id = tracy::GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed );
|
||||
if( context_id >= 255 )
|
||||
{
|
||||
context_id %= 255;
|
||||
}
|
||||
|
||||
uint8_t context_flags = 0;
|
||||
#ifdef TRACY_ROCPROF_CALIBRATION
|
||||
// Tell tracy we'll be passing calibrated timestamps and not to mess with
|
||||
// the times. We'll periodically send GpuCalibration events in case the
|
||||
// times drift.
|
||||
context_flags |= tracy::GpuContextCalibration;
|
||||
#endif
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext );
|
||||
tracy::MemWrite( &item->gpuNewContext.cpuTime, cpu_timestamp );
|
||||
tracy::MemWrite( &item->gpuNewContext.gpuTime, gpu_timestamp );
|
||||
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
|
||||
tracy::MemWrite( &item->gpuNewContext.period, timestamp_period );
|
||||
tracy::MemWrite( &item->gpuNewContext.context, context_id );
|
||||
tracy::MemWrite( &item->gpuNewContext.flags, context_flags );
|
||||
tracy::MemWrite( &item->gpuNewContext.type, tracy::GpuContextType::Rocprof );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
// Send the name of the context along.
|
||||
// NOTE: Tracy will unconditionally free the name so we must clone it here.
|
||||
// Since internally Tracy will use its own rpmalloc implementation we must
|
||||
// make sure we allocate from the same source.
|
||||
size_t name_length = strlen( CTX_NAME );
|
||||
char* cloned_name = (char*)tracy::tracy_malloc( name_length );
|
||||
memcpy( cloned_name, CTX_NAME, name_length );
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName );
|
||||
tracy::MemWrite( &item->gpuContextNameFat.context, context_id );
|
||||
tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)cloned_name );
|
||||
tracy::MemWrite( &item->gpuContextNameFat.size, name_length );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
return context_id;
|
||||
}
|
||||
|
||||
uint64_t kernel_src_loc( ToolData* data, uint64_t kernel_id )
|
||||
{
|
||||
uint64_t src_loc = 0;
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
rocprofiler_kernel_id_t kid = kernel_id;
|
||||
if( data->client_kernels.count( kid ) )
|
||||
{
|
||||
auto& sym_data = data->client_kernels[kid];
|
||||
const char* name = sym_data.kernel_name;
|
||||
size_t name_len = strlen( name );
|
||||
uint32_t line = 0;
|
||||
src_loc = tracy::Profiler::AllocSourceLocation( line, NULL, 0, name, name_len, NULL, 0 );
|
||||
}
|
||||
return src_loc;
|
||||
}
|
||||
|
||||
void record_interval( ToolData* data, rocprofiler_timestamp_t start_timestamp, rocprofiler_timestamp_t end_timestamp,
|
||||
uint64_t src_loc, rocprofiler_dispatch_id_t dispatch_id )
|
||||
{
|
||||
|
||||
uint16_t query_id = 0;
|
||||
uint8_t context_id = data->context_id;
|
||||
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
query_id = data->query_id;
|
||||
data->query_id++;
|
||||
if( dispatch_id != UINT64_MAX )
|
||||
{
|
||||
DispatchData& dispatch_data = data->dispatch_data[dispatch_id];
|
||||
dispatch_data.query_id = query_id;
|
||||
dispatch_data.thread_id = tracy::GetThreadHandle();
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t cpu_start_time = 0, cpu_end_time = 0;
|
||||
if( dispatch_id == UINT64_MAX )
|
||||
{
|
||||
cpu_start_time = tracy::Profiler::GetTime();
|
||||
cpu_end_time = tracy::Profiler::GetTime();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
DispatchData& dispatch_data = data->dispatch_data[dispatch_id];
|
||||
cpu_start_time = dispatch_data.launch_start;
|
||||
cpu_end_time = dispatch_data.launch_end;
|
||||
}
|
||||
|
||||
if( src_loc != 0 )
|
||||
{
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, cpu_start_time );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)src_loc );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.queryId, query_id );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.context, context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
static const ___tracy_source_location_data src_loc = { NULL, NULL, NULL, 0, 0 };
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, cpu_start_time );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)&src_loc );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.queryId, query_id );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.context, context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime );
|
||||
tracy::MemWrite( &item->gpuTime.gpuTime, start_timestamp );
|
||||
tracy::MemWrite( &item->gpuTime.queryId, query_id );
|
||||
tracy::MemWrite( &item->gpuTime.context, context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial );
|
||||
tracy::MemWrite( &item->gpuZoneEnd.cpuTime, cpu_end_time );
|
||||
tracy::MemWrite( &item->gpuZoneEnd.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneEnd.queryId, query_id );
|
||||
tracy::MemWrite( &item->gpuZoneEnd.context, context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime );
|
||||
tracy::MemWrite( &item->gpuTime.gpuTime, end_timestamp );
|
||||
tracy::MemWrite( &item->gpuTime.queryId, query_id );
|
||||
tracy::MemWrite( &item->gpuTime.context, context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
void record_callback( rocprofiler_dispatch_counting_service_data_t dispatch_data,
|
||||
rocprofiler_record_counter_t* record_data, size_t record_count,
|
||||
rocprofiler_user_data_t /*user_data*/, void* callback_data )
|
||||
{
|
||||
assert( callback_data != nullptr );
|
||||
ToolData* data = static_cast<ToolData*>( callback_data );
|
||||
if( !data->init ) return;
|
||||
|
||||
std::unordered_map<rocprofiler_counter_instance_id_t, double> sums;
|
||||
for( size_t i = 0; i < record_count; ++i )
|
||||
{
|
||||
auto _counter_id = rocprofiler_counter_id_t{};
|
||||
ROCPROFILER_CALL( rocprofiler_query_record_counter_id( record_data[i].id, &_counter_id ),
|
||||
"query record counter id" );
|
||||
sums[_counter_id.handle] += record_data[i].counter_value;
|
||||
}
|
||||
|
||||
uint16_t query_id = 0;
|
||||
uint32_t thread_id = 0;
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
// An assumption is made here that the counter values are supplied after the dispatch
|
||||
// complete callback.
|
||||
assert( data->dispatch_data.count( dispatch_data.dispatch_info.dispatch_id ) );
|
||||
DispatchData& ddata = data->dispatch_data[dispatch_data.dispatch_info.dispatch_id];
|
||||
query_id = ddata.query_id;
|
||||
thread_id = ddata.thread_id;
|
||||
}
|
||||
|
||||
for( auto& p : sums )
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneAnnotation );
|
||||
tracy::MemWrite( &item->zoneAnnotation.noteId, p.first );
|
||||
tracy::MemWrite( &item->zoneAnnotation.queryId, query_id );
|
||||
tracy::MemWrite( &item->zoneAnnotation.thread, thread_id );
|
||||
tracy::MemWrite( &item->zoneAnnotation.value, p.second );
|
||||
tracy::MemWrite( &item->zoneAnnotation.context, data->context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue.
|
||||
* rocprofiler_counter_config_id_t* is a return to specify what counters to collect
|
||||
* for this dispatch (dispatch_packet).
|
||||
*/
|
||||
void dispatch_callback( rocprofiler_dispatch_counting_service_data_t dispatch_data,
|
||||
rocprofiler_profile_config_id_t* config, rocprofiler_user_data_t* /*user_data*/,
|
||||
void* callback_data )
|
||||
{
|
||||
assert( callback_data != nullptr );
|
||||
ToolData* data = static_cast<ToolData*>( callback_data );
|
||||
if( !data->init ) return;
|
||||
|
||||
/**
|
||||
* This simple example uses the same profile counter set for all agents.
|
||||
* We store this in a cache to prevent constructing many identical profile counter
|
||||
* sets. We first check the cache to see if we have already constructed a counter"
|
||||
* set for the agent. If we have, return it. Otherwise, construct a new profile counter
|
||||
* set.
|
||||
*/
|
||||
static std::shared_mutex m_mutex = {};
|
||||
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache = {};
|
||||
|
||||
auto search_cache = [&]()
|
||||
{
|
||||
if( auto pos = profile_cache.find( dispatch_data.dispatch_info.agent_id.handle ); pos != profile_cache.end() )
|
||||
{
|
||||
*config = pos->second;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
{
|
||||
auto rlock = std::shared_lock{ m_mutex };
|
||||
if( search_cache() ) return;
|
||||
}
|
||||
|
||||
auto wlock = std::unique_lock{ m_mutex };
|
||||
if( search_cache() ) return;
|
||||
|
||||
// GPU Counter IDs
|
||||
std::vector<rocprofiler_counter_id_t> gpu_counters;
|
||||
|
||||
// Iterate through the agents and get the counters available on that agent
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_iterate_agent_supported_counters(
|
||||
dispatch_data.dispatch_info.agent_id,
|
||||
[]( rocprofiler_agent_id_t, rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data )
|
||||
{
|
||||
std::vector<rocprofiler_counter_id_t>* vec =
|
||||
static_cast<std::vector<rocprofiler_counter_id_t>*>( user_data );
|
||||
for( size_t i = 0; i < num_counters; i++ )
|
||||
{
|
||||
vec->push_back( counters[i] );
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
static_cast<void*>( &gpu_counters ) ),
|
||||
"Could not fetch supported counters" );
|
||||
|
||||
std::vector<rocprofiler_counter_id_t> collect_counters;
|
||||
collect_counters.reserve( data->counter_names.size() );
|
||||
// Look for the counters contained in counters_to_collect in gpu_counters
|
||||
for( auto& counter : gpu_counters )
|
||||
{
|
||||
rocprofiler_counter_info_v0_t info;
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_query_counter_info( counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast<void*>( &info ) ),
|
||||
"Could not query info" );
|
||||
if( data->counter_names.count( std::string( info.name ) ) > 0 )
|
||||
{
|
||||
collect_counters.push_back( counter );
|
||||
|
||||
size_t name_length = strlen( info.name );
|
||||
char* cloned_name = (char*)tracy::tracy_malloc( name_length );
|
||||
memcpy( cloned_name, info.name, name_length );
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuAnnotationName );
|
||||
tracy::MemWrite( &item->gpuAnnotationNameFat.context, data->context_id );
|
||||
tracy::MemWrite( &item->gpuAnnotationNameFat.noteId, counter.handle );
|
||||
tracy::MemWrite( &item->gpuAnnotationNameFat.ptr, (uint64_t)cloned_name );
|
||||
tracy::MemWrite( &item->gpuAnnotationNameFat.size, name_length );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a colleciton profile for the counters
|
||||
rocprofiler_profile_config_id_t profile = { .handle = 0 };
|
||||
ROCPROFILER_CALL( rocprofiler_create_profile_config( dispatch_data.dispatch_info.agent_id, collect_counters.data(),
|
||||
collect_counters.size(), &profile ),
|
||||
"Could not construct profile cfg" );
|
||||
|
||||
profile_cache.emplace( dispatch_data.dispatch_info.agent_id.handle, profile );
|
||||
// Return the profile to collect those counters for this dispatch
|
||||
*config = profile;
|
||||
}
|
||||
|
||||
void tool_callback_tracing_callback( rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data,
|
||||
void* callback_data )
|
||||
{
|
||||
assert( callback_data != nullptr );
|
||||
ToolData* data = static_cast<ToolData*>( callback_data );
|
||||
if( !data->init ) return;
|
||||
|
||||
if( record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
|
||||
record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER )
|
||||
{
|
||||
auto* sym_data = static_cast<kernel_symbol_data_t*>( record.payload );
|
||||
|
||||
if( record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD )
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
data->client_kernels.emplace( sym_data->kernel_id, *sym_data );
|
||||
}
|
||||
else if( record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD )
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
data->client_kernels.erase( sym_data->kernel_id );
|
||||
}
|
||||
}
|
||||
else if( record.kind == ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH )
|
||||
{
|
||||
auto* rdata = static_cast<rocprofiler_callback_tracing_kernel_dispatch_data_t*>( record.payload );
|
||||
if( record.operation == ROCPROFILER_KERNEL_DISPATCH_ENQUEUE )
|
||||
{
|
||||
if( record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER )
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
data->dispatch_data[rdata->dispatch_info.dispatch_id].launch_start = tracy::Profiler::GetTime();
|
||||
}
|
||||
else if( record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT )
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
data->dispatch_data[rdata->dispatch_info.dispatch_id].launch_end = tracy::Profiler::GetTime();
|
||||
}
|
||||
}
|
||||
else if( record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE )
|
||||
{
|
||||
uint64_t src_loc = kernel_src_loc( data, rdata->dispatch_info.kernel_id );
|
||||
record_interval( data, rdata->start_timestamp, rdata->end_timestamp, src_loc,
|
||||
rdata->dispatch_info.dispatch_id );
|
||||
}
|
||||
}
|
||||
else if( record.kind == ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY &&
|
||||
record.operation != ROCPROFILER_MEMORY_COPY_NONE && record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT )
|
||||
{
|
||||
auto* rdata = static_cast<rocprofiler_callback_tracing_memory_copy_data_t*>( record.payload );
|
||||
const char* name = nullptr;
|
||||
switch( record.operation )
|
||||
{
|
||||
case ROCPROFILER_MEMORY_COPY_DEVICE_TO_DEVICE:
|
||||
name = "DeviceToDeviceCopy";
|
||||
break;
|
||||
case ROCPROFILER_MEMORY_COPY_DEVICE_TO_HOST:
|
||||
name = "DeviceToHostCopy";
|
||||
break;
|
||||
case ROCPROFILER_MEMORY_COPY_HOST_TO_DEVICE:
|
||||
name = "HostToDeviceCopy";
|
||||
break;
|
||||
case ROCPROFILER_MEMORY_COPY_HOST_TO_HOST:
|
||||
name = "HostToHostCopy";
|
||||
break;
|
||||
}
|
||||
size_t name_len = strlen( name );
|
||||
uint64_t src_loc = tracy::Profiler::AllocSourceLocation( 0, NULL, 0, name, name_len, NULL, 0 );
|
||||
record_interval( data, rdata->start_timestamp, rdata->end_timestamp, src_loc, UINT64_MAX );
|
||||
}
|
||||
}
|
||||
|
||||
void calibration_thread( void* ptr )
|
||||
{
|
||||
while( !TracyIsStarted )
|
||||
;
|
||||
ToolData* data = static_cast<ToolData*>( ptr );
|
||||
data->context_id = gpu_context_allocate( data );
|
||||
const char* user_counters = GetEnvVar( "TRACY_ROCPROF_COUNTERS" );
|
||||
if( user_counters )
|
||||
{
|
||||
data->counter_names.clear();
|
||||
std::stringstream ss( user_counters );
|
||||
std::string counter;
|
||||
while( std::getline( ss, counter, ',' ) ) data->counter_names.insert( counter );
|
||||
}
|
||||
data->init = true;
|
||||
|
||||
#ifdef TRACY_ROCPROF_CALIBRATION
|
||||
while( data->init )
|
||||
{
|
||||
sleep( 1 );
|
||||
|
||||
timespec ts;
|
||||
// HSA performs a linear interpolation of GPU time to CLOCK_BOOTTIME. However, this is
|
||||
// subject to network time updates and can drift relative to tracy's clock.
|
||||
clock_gettime( CLOCK_BOOTTIME, &ts );
|
||||
int64_t cpu_timestamp = Profiler::GetTime();
|
||||
int64_t gpu_timestamp = ts.tv_nsec + ts.tv_sec * 1e9L;
|
||||
|
||||
if( cpu_timestamp > data->previous_cpu_time )
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration );
|
||||
tracy::MemWrite( &item->gpuCalibration.gpuTime, gpu_timestamp );
|
||||
tracy::MemWrite( &item->gpuCalibration.cpuTime, cpu_timestamp );
|
||||
tracy::MemWrite( &item->gpuCalibration.cpuDelta, cpu_timestamp - data->previous_cpu_time );
|
||||
tracy::MemWrite( &item->gpuCalibration.context, data->context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
data->previous_cpu_time = cpu_timestamp;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int tool_init( rocprofiler_client_finalize_t fini_func, void* user_data )
|
||||
{
|
||||
ToolData* data = static_cast<ToolData*>( user_data );
|
||||
data->cal_thread = std::make_unique<tracy::Thread>( calibration_thread, data );
|
||||
|
||||
ROCPROFILER_CALL( rocprofiler_create_context( &get_client_ctx() ), "context creation failed" );
|
||||
|
||||
ROCPROFILER_CALL( rocprofiler_configure_callback_dispatch_counting_service( get_client_ctx(), dispatch_callback,
|
||||
user_data, record_callback, user_data ),
|
||||
"Could not setup counting service" );
|
||||
|
||||
rocprofiler_tracing_operation_t ops[] = { ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER };
|
||||
ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service( get_client_ctx(),
|
||||
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ops, 1,
|
||||
tool_callback_tracing_callback, user_data ),
|
||||
"callback tracing service failed to configure" );
|
||||
|
||||
rocprofiler_tracing_operation_t ops2[] = { ROCPROFILER_KERNEL_DISPATCH_COMPLETE,
|
||||
ROCPROFILER_KERNEL_DISPATCH_ENQUEUE };
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service( get_client_ctx(), ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH,
|
||||
ops2, 2, tool_callback_tracing_callback, user_data ),
|
||||
"callback tracing service failed to configure" );
|
||||
|
||||
ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service( get_client_ctx(),
|
||||
ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY, nullptr,
|
||||
0, tool_callback_tracing_callback, user_data ),
|
||||
"callback tracing service failed to configure" );
|
||||
|
||||
ROCPROFILER_CALL( rocprofiler_start_context( get_client_ctx() ), "start context" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
void tool_fini( void* tool_data_v )
|
||||
{
|
||||
rocprofiler_stop_context( get_client_ctx() );
|
||||
|
||||
ToolData* data = static_cast<ToolData*>( tool_data_v );
|
||||
data->init = false;
|
||||
data->cal_thread.reset();
|
||||
}
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
rocprofiler_tool_configure_result_t* rocprofiler_configure( uint32_t version, const char* runtime_version,
|
||||
uint32_t priority, rocprofiler_client_id_t* client_id )
|
||||
{
|
||||
// If not the first tool to register, indicate that the tool doesn't want to do anything
|
||||
if( priority > 0 ) return nullptr;
|
||||
|
||||
// (optional) Provide a name for this tool to rocprofiler
|
||||
client_id->name = "Tracy";
|
||||
|
||||
// (optional) create configure data
|
||||
static ToolData data = ToolData{ version, runtime_version, priority, *client_id, 0, false, 0, 0 };
|
||||
|
||||
// construct configure result
|
||||
static auto cfg = rocprofiler_tool_configure_result_t{ sizeof( rocprofiler_tool_configure_result_t ),
|
||||
&tool_init, &tool_fini, static_cast<void*>( &data ) };
|
||||
|
||||
return &cfg;
|
||||
}
|
||||
}
|
||||
@@ -10,7 +10,14 @@
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "TracyProfiler.hpp"
|
||||
#include "TracyCallstack.hpp"
|
||||
|
||||
#if (defined(__GNUC__) || defined(__clang__))
|
||||
# define TRACY_ATTRIBUTE_FORMAT_PRINTF(fmt_idx, arg_idx) \
|
||||
__attribute__((format(printf, fmt_idx, arg_idx)))
|
||||
#else
|
||||
# define TRACY_ATTRIBUTE_FORMAT_PRINTF(fmt_idx, arg_idx)
|
||||
#endif
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
@@ -22,7 +29,7 @@ public:
|
||||
ScopedZone& operator=( const ScopedZone& ) = delete;
|
||||
ScopedZone& operator=( ScopedZone&& ) = delete;
|
||||
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true )
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int32_t depth = -1, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
@@ -33,13 +40,19 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::ZoneBegin );
|
||||
auto zoneQueue = QueueType::ZoneBegin;
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
GetProfiler().SendCallstack( depth );
|
||||
zoneQueue = QueueType::ZoneBeginCallstack;
|
||||
}
|
||||
TracyQueuePrepare( zoneQueue );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true )
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int32_t depth = -1, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
@@ -50,55 +63,21 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
GetProfiler().SendCallstack( depth );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginCallstack );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
|
||||
auto zoneQueue = QueueType::ZoneBeginAllocSrcLoc;
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
GetProfiler().SendCallstack( depth );
|
||||
zoneQueue = QueueType::ZoneBeginAllocSrcLocCallstack;
|
||||
}
|
||||
TracyQueuePrepare( zoneQueue );
|
||||
const auto srcloc =
|
||||
Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, static_cast<uint32_t>(0), is_active ) {}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int depth, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
GetProfiler().SendCallstack( depth );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {}
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {}
|
||||
|
||||
tracy_force_inline ~ScopedZone()
|
||||
{
|
||||
@@ -126,7 +105,7 @@ public:
|
||||
TracyQueueCommit( zoneTextFatThread );
|
||||
}
|
||||
|
||||
void TextFmt( const char* fmt, ... )
|
||||
void TextFmt( const char* fmt, ... ) TRACY_ATTRIBUTE_FORMAT_PRINTF(2, 3)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
@@ -165,7 +144,7 @@ public:
|
||||
TracyQueueCommit( zoneTextFatThread );
|
||||
}
|
||||
|
||||
void NameFmt( const char* fmt, ... )
|
||||
void NameFmt( const char* fmt, ... ) TRACY_ATTRIBUTE_FORMAT_PRINTF(2, 3)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
|
||||
@@ -85,7 +85,7 @@ void SysPower::ScanDirectory( const char* path, int parent )
|
||||
FILE* f = fopen( tmp, "r" );
|
||||
if( f )
|
||||
{
|
||||
fscanf( f, "%" PRIu64, &maxRange );
|
||||
(void)fscanf( f, "%" PRIu64, &maxRange );
|
||||
fclose( f );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
# if defined _WIN32
|
||||
# include <windows.h>
|
||||
# include "../common/TracyWinFamily.hpp"
|
||||
# elif defined __linux__
|
||||
# include <stdio.h>
|
||||
# include <inttypes.h>
|
||||
@@ -27,13 +28,24 @@ static inline uint64_t ConvertTime( const FILETIME& t )
|
||||
|
||||
void SysTime::ReadTimes()
|
||||
{
|
||||
FILETIME idleTime;
|
||||
FILETIME kernelTime;
|
||||
FILETIME userTime;
|
||||
|
||||
# if defined TRACY_GDK
|
||||
FILETIME creationTime;
|
||||
FILETIME exitTime;
|
||||
|
||||
GetProcessTimes( GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime );
|
||||
|
||||
idle = 0;
|
||||
# else
|
||||
FILETIME idleTime;
|
||||
|
||||
GetSystemTimes( &idleTime, &kernelTime, &userTime );
|
||||
|
||||
idle = ConvertTime( idleTime );
|
||||
# endif
|
||||
|
||||
const auto kernel = ConvertTime( kernelTime );
|
||||
const auto user = ConvertTime( userTime );
|
||||
used = kernel + user;
|
||||
|
||||
@@ -173,8 +173,11 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
|
||||
MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId );
|
||||
MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId );
|
||||
MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber );
|
||||
MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason );
|
||||
MemWrite( &item->contextSwitch.state, cswitch->oldThreadState );
|
||||
MemWrite( &item->contextSwitch.oldThreadWaitReason, cswitch->oldThreadWaitReason );
|
||||
MemWrite( &item->contextSwitch.oldThreadState, cswitch->oldThreadState );
|
||||
MemWrite( &item->contextSwitch.newThreadPriority, cswitch->newThreadPriority );
|
||||
MemWrite( &item->contextSwitch.oldThreadPriority, cswitch->oldThreadPriority );
|
||||
MemWrite( &item->contextSwitch.previousCState, cswitch->previousCState );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
else if( hdr.EventDescriptor.Opcode == 50 )
|
||||
@@ -183,7 +186,10 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
|
||||
|
||||
TracyLfqPrepare( QueueType::ThreadWakeup );
|
||||
MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart );
|
||||
MemWrite( &item->threadWakeup.cpu, record->BufferContext.ProcessorNumber );
|
||||
MemWrite( &item->threadWakeup.thread, rt->threadId );
|
||||
MemWrite( &item->threadWakeup.adjustReason, rt->adjustReason );
|
||||
MemWrite( &item->threadWakeup.adjustIncrement, rt->adjustIncrement );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 )
|
||||
@@ -232,6 +238,10 @@ void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record )
|
||||
#endif
|
||||
|
||||
const auto& hdr = record->EventHeader;
|
||||
|
||||
// Check for Lost_Event (6a399ae0-4bc6-4de9-870b-3657f8947e7e)
|
||||
if( hdr.ProviderId.Data1 == 0x6A399AE0 ) return;
|
||||
|
||||
assert( hdr.ProviderId.Data1 == 0x802EC45A );
|
||||
assert( hdr.EventDescriptor.Id == 0x0011 );
|
||||
|
||||
@@ -498,11 +508,11 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
|
||||
if( _GetThreadDescription )
|
||||
{
|
||||
PWSTR tmp;
|
||||
_GetThreadDescription( hnd, &tmp );
|
||||
char buf[256];
|
||||
if( tmp )
|
||||
if ( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) )
|
||||
{
|
||||
char buf[256];
|
||||
auto ret = wcstombs( buf, tmp, 256 );
|
||||
LocalFree(tmp);
|
||||
if( ret != 0 )
|
||||
{
|
||||
threadName = CopyString( buf, ret );
|
||||
@@ -521,25 +531,23 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
|
||||
const auto phnd = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid );
|
||||
if( phnd != INVALID_HANDLE_VALUE )
|
||||
{
|
||||
HMODULE modules[1024];
|
||||
DWORD needed;
|
||||
if( _EnumProcessModules( phnd, modules, 1024 * sizeof( HMODULE ), &needed ) != 0 )
|
||||
MEMORY_BASIC_INFORMATION vmeminfo;
|
||||
SIZE_T infosize = VirtualQueryEx( phnd, ptr, &vmeminfo, sizeof( vmeminfo ) );
|
||||
if( infosize == sizeof( vmeminfo ) )
|
||||
{
|
||||
const auto sz = std::min( DWORD( needed / sizeof( HMODULE ) ), DWORD( 1024 ) );
|
||||
for( DWORD i=0; i<sz; i++ )
|
||||
if (vmeminfo.Type == MEM_IMAGE)
|
||||
{
|
||||
// for MEM_IMAGE regions, vmeminfo.AllocationBase _is_ the HMODULE
|
||||
HMODULE mod = (HMODULE)vmeminfo.AllocationBase;
|
||||
MODULEINFO info;
|
||||
if( _GetModuleInformation( phnd, modules[i], &info, sizeof( info ) ) != 0 )
|
||||
if( _GetModuleInformation( phnd, mod, &info, sizeof( info ) ) != 0 )
|
||||
{
|
||||
if( (uint64_t)ptr >= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage )
|
||||
char buf2[1024];
|
||||
const auto modlen = _GetModuleBaseNameA( phnd, mod, buf2, 1024 );
|
||||
if( modlen != 0 )
|
||||
{
|
||||
char buf2[1024];
|
||||
const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 );
|
||||
if( modlen != 0 )
|
||||
{
|
||||
threadName = CopyString( buf2, modlen );
|
||||
threadSent = true;
|
||||
}
|
||||
threadName = CopyString( buf2, modlen );
|
||||
threadSent = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -606,6 +614,7 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
|
||||
# include <fcntl.h>
|
||||
# include <inttypes.h>
|
||||
# include <limits>
|
||||
# include <mntent.h>
|
||||
# include <poll.h>
|
||||
# include <stdio.h>
|
||||
# include <stdlib.h>
|
||||
@@ -678,7 +687,7 @@ enum TraceEventId
|
||||
EventBranchMiss,
|
||||
EventVsync,
|
||||
EventContextSwitch,
|
||||
EventWakeup,
|
||||
EventWaking,
|
||||
};
|
||||
|
||||
static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid )
|
||||
@@ -753,6 +762,42 @@ static const char* ReadFile( const char* path )
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static const char* ReadFile( const char* base, const char* path )
|
||||
{
|
||||
const auto blen = strlen( base );
|
||||
const auto plen = strlen( path );
|
||||
|
||||
auto tmp = (char*)tracy_malloc( blen + plen + 1 );
|
||||
memcpy( tmp, base, blen );
|
||||
memcpy( tmp + blen, path, plen );
|
||||
tmp[blen+plen] = '\0';
|
||||
|
||||
auto res = ReadFile( tmp );
|
||||
tracy_free( tmp );
|
||||
return res;
|
||||
}
|
||||
|
||||
static char* GetTraceFsPath()
|
||||
{
|
||||
auto f = setmntent( "/proc/mounts", "r" );
|
||||
if( !f ) return nullptr;
|
||||
|
||||
char* ret = nullptr;
|
||||
while( auto ent = getmntent( f ) )
|
||||
{
|
||||
if( strcmp( ent->mnt_fsname, "tracefs" ) == 0 )
|
||||
{
|
||||
auto len = strlen( ent->mnt_dir );
|
||||
ret = (char*)tracy_malloc( len + 1 );
|
||||
memcpy( ret, ent->mnt_dir, len );
|
||||
ret[len] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
endmntent( f );
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool SysTraceStart( int64_t& samplingPeriod )
|
||||
{
|
||||
#ifndef CLOCK_MONOTONIC_RAW
|
||||
@@ -767,16 +812,22 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
TracyDebug( "perf_event_paranoid: %i\n", paranoidLevel );
|
||||
#endif
|
||||
|
||||
int switchId = -1, wakeupId = -1, vsyncId = -1;
|
||||
const auto switchIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_switch/id" );
|
||||
auto traceFsPath = GetTraceFsPath();
|
||||
if( !traceFsPath ) return false;
|
||||
TracyDebug( "tracefs path: %s\n", traceFsPath );
|
||||
|
||||
int switchId = -1, wakingId = -1, vsyncId = -1;
|
||||
const auto switchIdStr = ReadFile( traceFsPath, "/events/sched/sched_switch/id" );
|
||||
if( switchIdStr ) switchId = atoi( switchIdStr );
|
||||
const auto wakeupIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_wakeup/id" );
|
||||
if( wakeupIdStr ) wakeupId = atoi( wakeupIdStr );
|
||||
const auto vsyncIdStr = ReadFile( "/sys/kernel/debug/tracing/events/drm/drm_vblank_event/id" );
|
||||
const auto wakingIdStr = ReadFile( traceFsPath, "/events/sched/sched_waking/id" );
|
||||
if( wakingIdStr ) wakingId = atoi( wakingIdStr );
|
||||
const auto vsyncIdStr = ReadFile( traceFsPath, "/events/drm/drm_vblank_event/id" );
|
||||
if( vsyncIdStr ) vsyncId = atoi( vsyncIdStr );
|
||||
|
||||
tracy_free( traceFsPath );
|
||||
|
||||
TracyDebug( "sched_switch id: %i\n", switchId );
|
||||
TracyDebug( "sched_wakeup id: %i\n", wakeupId );
|
||||
TracyDebug( "sched_waking id: %i\n", wakingId );
|
||||
TracyDebug( "drm_vblank_event id: %i\n", vsyncId );
|
||||
|
||||
#ifdef TRACY_NO_SAMPLING
|
||||
@@ -831,7 +882,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
2 + // CPU cycles + instructions retired
|
||||
2 + // cache reference + miss
|
||||
2 + // branch retired + miss
|
||||
2 + // context switches + wakeups
|
||||
2 + // context switches + waking ups
|
||||
1 // vsync
|
||||
);
|
||||
s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers );
|
||||
@@ -1076,18 +1127,31 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
}
|
||||
}
|
||||
|
||||
if( wakeupId != -1 )
|
||||
if( wakingId != -1 )
|
||||
{
|
||||
pe.config = wakeupId;
|
||||
pe.config &= ~PERF_SAMPLE_CALLCHAIN;
|
||||
pe = {};
|
||||
pe.type = PERF_TYPE_TRACEPOINT;
|
||||
pe.size = sizeof( perf_event_attr );
|
||||
pe.sample_period = 1;
|
||||
pe.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_RAW;
|
||||
// Coult ask for callstack here
|
||||
//pe.sample_type |= PERF_SAMPLE_CALLCHAIN;
|
||||
pe.disabled = 1;
|
||||
pe.inherit = 1;
|
||||
pe.config = wakingId;
|
||||
pe.read_format = 0;
|
||||
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
|
||||
pe.use_clockid = 1;
|
||||
pe.clockid = CLOCK_MONOTONIC_RAW;
|
||||
#endif
|
||||
|
||||
TracyDebug( "Setup wakeup capture\n" );
|
||||
TracyDebug( "Setup waking up capture\n" );
|
||||
for( int i=0; i<s_numCpus; i++ )
|
||||
{
|
||||
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd != -1 )
|
||||
{
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWakeup, i );
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWaking, i );
|
||||
if( s_ring[s_numBuffers].IsValid() )
|
||||
{
|
||||
s_numBuffers++;
|
||||
@@ -1332,6 +1396,7 @@ void SysTraceWorker( void* ptr )
|
||||
hadData = true;
|
||||
while( activeNum > 0 )
|
||||
{
|
||||
// Find the earliest event from the active buffers
|
||||
int sel = -1;
|
||||
int selPos;
|
||||
int64_t t0 = std::numeric_limits<int64_t>::max();
|
||||
@@ -1369,6 +1434,7 @@ void SysTraceWorker( void* ptr )
|
||||
}
|
||||
}
|
||||
}
|
||||
// Found any event
|
||||
if( sel >= 0 )
|
||||
{
|
||||
auto& ring = ringArray[ctxBufferIdx + sel];
|
||||
@@ -1384,10 +1450,10 @@ void SysTraceWorker( void* ptr )
|
||||
const auto rid = ring.GetId();
|
||||
if( rid == EventContextSwitch )
|
||||
{
|
||||
// Layout:
|
||||
// u64 time
|
||||
// u64 cnt
|
||||
// u64 ip[cnt]
|
||||
// Layout: See /sys/kernel/debug/tracing/events/sched/sched_switch/format
|
||||
// u64 time // PERF_SAMPLE_TIME
|
||||
// u64 cnt // PERF_SAMPLE_CALLCHAIN
|
||||
// u64 ip[cnt] // PERF_SAMPLE_CALLCHAIN
|
||||
// u32 size
|
||||
// u8 data[size]
|
||||
// Data (not ABI stable, but has not changed since it was added, in 2009):
|
||||
@@ -1408,35 +1474,43 @@ void SysTraceWorker( void* ptr )
|
||||
const auto traceOffset = offset;
|
||||
offset += sizeof( uint64_t ) * cnt + sizeof( uint32_t ) + 8 + 16;
|
||||
|
||||
uint32_t prev_pid, next_pid;
|
||||
uint32_t prev_pid, prev_prio;
|
||||
uint32_t next_pid, next_prio;
|
||||
long prev_state;
|
||||
|
||||
ring.Read( &prev_pid, offset, sizeof( uint32_t ) );
|
||||
offset += sizeof( uint32_t ) + sizeof( uint32_t );
|
||||
offset += sizeof( uint32_t );
|
||||
ring.Read( &prev_prio, offset, sizeof( uint32_t ) );
|
||||
offset += sizeof( uint32_t );
|
||||
ring.Read( &prev_state, offset, sizeof( long ) );
|
||||
offset += sizeof( long ) + 16;
|
||||
ring.Read( &next_pid, offset, sizeof( uint32_t ) );
|
||||
offset += sizeof( uint32_t );
|
||||
ring.Read( &next_prio, offset, sizeof( uint32_t ) );
|
||||
|
||||
uint8_t reason = 100;
|
||||
uint8_t state;
|
||||
uint8_t oldThreadWaitReason = 100;
|
||||
uint8_t oldThreadState;
|
||||
|
||||
if( prev_state & 0x0001 ) state = 104;
|
||||
else if( prev_state & 0x0002 ) state = 101;
|
||||
else if( prev_state & 0x0004 ) state = 105;
|
||||
else if( prev_state & 0x0008 ) state = 106;
|
||||
else if( prev_state & 0x0010 ) state = 108;
|
||||
else if( prev_state & 0x0020 ) state = 109;
|
||||
else if( prev_state & 0x0040 ) state = 110;
|
||||
else if( prev_state & 0x0080 ) state = 102;
|
||||
else state = 103;
|
||||
if( prev_state & 0x0001 ) oldThreadState = 104;
|
||||
else if( prev_state & 0x0002 ) oldThreadState = 101;
|
||||
else if( prev_state & 0x0004 ) oldThreadState = 105;
|
||||
else if( prev_state & 0x0008 ) oldThreadState = 106;
|
||||
else if( prev_state & 0x0010 ) oldThreadState = 108;
|
||||
else if( prev_state & 0x0020 ) oldThreadState = 109;
|
||||
else if( prev_state & 0x0040 ) oldThreadState = 110;
|
||||
else if( prev_state & 0x0080 ) oldThreadState = 102;
|
||||
else oldThreadState = 103;
|
||||
|
||||
TracyLfqPrepare( QueueType::ContextSwitch );
|
||||
MemWrite( &item->contextSwitch.time, t0 );
|
||||
MemWrite( &item->contextSwitch.oldThread, prev_pid );
|
||||
MemWrite( &item->contextSwitch.newThread, next_pid );
|
||||
MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) );
|
||||
MemWrite( &item->contextSwitch.reason, reason );
|
||||
MemWrite( &item->contextSwitch.state, state );
|
||||
MemWrite( &item->contextSwitch.oldThreadWaitReason, oldThreadWaitReason );
|
||||
MemWrite( &item->contextSwitch.oldThreadState, oldThreadState );
|
||||
MemWrite( &item->contextSwitch.previousCState, uint8_t( 0 ) );
|
||||
MemWrite( &item->contextSwitch.newThreadPriority, int8_t( next_prio ) );
|
||||
MemWrite( &item->contextSwitch.oldThreadPriority, int8_t( prev_prio ) );
|
||||
TracyLfqCommit;
|
||||
|
||||
if( cnt > 0 && prev_pid != 0 && CurrentProcOwnsThread( prev_pid ) )
|
||||
@@ -1450,27 +1524,33 @@ void SysTraceWorker( void* ptr )
|
||||
TracyLfqCommit;
|
||||
}
|
||||
}
|
||||
else if( rid == EventWakeup )
|
||||
else if( rid == EventWaking)
|
||||
{
|
||||
// See /sys/kernel/debug/tracing/events/sched/sched_waking/format
|
||||
// Layout:
|
||||
// u64 time
|
||||
// u64 time // PERF_SAMPLE_TIME
|
||||
// u32 size
|
||||
// u8 data[size]
|
||||
// Data:
|
||||
// u8 hdr[8]
|
||||
// u8 comm[16]
|
||||
// u32 pid
|
||||
// u32 prio
|
||||
// u64 target_cpu
|
||||
|
||||
offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16;
|
||||
|
||||
// i32 prio
|
||||
// i32 target_cpu
|
||||
const uint32_t dataOffset = sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t );
|
||||
offset += dataOffset + 8 + 16;
|
||||
uint32_t pid;
|
||||
ring.Read( &pid, offset, sizeof( uint32_t ) );
|
||||
|
||||
|
||||
TracyLfqPrepare( QueueType::ThreadWakeup );
|
||||
MemWrite( &item->threadWakeup.time, t0 );
|
||||
MemWrite( &item->threadWakeup.thread, pid );
|
||||
MemWrite( &item->threadWakeup.cpu, (uint8_t)ring.GetCpu() );
|
||||
|
||||
int8_t adjustReason = -1; // Does not exist on Linux
|
||||
int8_t adjustIncrement = 0; // Should perhaps store the new prio?
|
||||
MemWrite( &item->threadWakeup.adjustReason, adjustReason );
|
||||
MemWrite( &item->threadWakeup.adjustIncrement, adjustIncrement );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
else
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
#define __TRACYSYSTRACE_HPP__
|
||||
|
||||
#if !defined TRACY_NO_SYSTEM_TRACING && ( defined _WIN32 || defined __linux__ )
|
||||
# include "../common/TracyUwp.hpp"
|
||||
# ifndef TRACY_UWP
|
||||
# include "../common/TracyWinFamily.hpp"
|
||||
# if !defined TRACY_WIN32_NO_DESKTOP
|
||||
# define TRACY_HAS_SYSTEM_TRACING
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@@ -690,7 +690,9 @@ static pthread_key_t _memory_thread_heap;
|
||||
# define _Thread_local __declspec(thread)
|
||||
# define TLS_MODEL
|
||||
# else
|
||||
# ifndef __HAIKU__
|
||||
# if defined(__ANDROID__) && __ANDROID_API__ >= 29 && defined(__NDK_MAJOR__) && __NDK_MAJOR__ >= 26
|
||||
# define TLS_MODEL __attribute__((tls_model("local-dynamic")))
|
||||
# elif !defined(__HAIKU__)
|
||||
# define TLS_MODEL __attribute__((tls_model("initial-exec")))
|
||||
# else
|
||||
# define TLS_MODEL
|
||||
@@ -2778,7 +2780,7 @@ rpmalloc_initialize_config(const rpmalloc_config_t* config) {
|
||||
_memory_huge_pages = 1;
|
||||
}
|
||||
|
||||
#if PLATFORM_WINDOWS
|
||||
#if PLATFORM_WINDOWS && !defined TRACY_GDK
|
||||
if (_memory_config.enable_huge_pages) {
|
||||
HANDLE token = 0;
|
||||
size_t large_page_minimum = GetLargePageMinimum();
|
||||
|
||||
Reference in New Issue
Block a user