update tracy from 11.0 to 13.1 and fix build with tracy enabled
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -32,27 +32,11 @@
|
||||
#include "client/TracyOverride.cpp"
|
||||
#include "client/TracyKCore.cpp"
|
||||
|
||||
#if defined(TRACY_HAS_CALLSTACK)
|
||||
# if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
# include "libbacktrace/alloc.cpp"
|
||||
# include "libbacktrace/dwarf.cpp"
|
||||
# include "libbacktrace/fileline.cpp"
|
||||
# include "libbacktrace/mmapio.cpp"
|
||||
# include "libbacktrace/posix.cpp"
|
||||
# include "libbacktrace/sort.cpp"
|
||||
# include "libbacktrace/state.cpp"
|
||||
# if TRACY_HAS_CALLSTACK == 4
|
||||
# include "libbacktrace/macho.cpp"
|
||||
# else
|
||||
# include "libbacktrace/elf.cpp"
|
||||
# endif
|
||||
# include "common/TracyStackFrames.cpp"
|
||||
# endif
|
||||
#ifdef TRACY_ROCPROF
|
||||
# include "client/TracyRocprof.cpp"
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma comment(lib, "ws2_32.lib")
|
||||
# pragma comment(lib, "dbghelp.lib")
|
||||
# pragma comment(lib, "advapi32.lib")
|
||||
# pragma comment(lib, "user32.lib")
|
||||
# pragma warning(pop)
|
||||
|
||||
@@ -24,15 +24,33 @@
|
||||
# pragma warning( disable : 4091 )
|
||||
# endif
|
||||
# include <dbghelp.h>
|
||||
# pragma comment( lib, "dbghelp.lib" )
|
||||
# ifdef _MSC_VER
|
||||
# pragma warning( pop )
|
||||
# endif
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
#elif defined(TRACY_USE_LIBBACKTRACE)
|
||||
|
||||
# include "../libbacktrace/backtrace.hpp"
|
||||
# include <algorithm>
|
||||
# include <dlfcn.h>
|
||||
# include <cxxabi.h>
|
||||
# include <stdlib.h>
|
||||
|
||||
// Implementation files
|
||||
# include "../libbacktrace/alloc.cpp"
|
||||
# include "../libbacktrace/dwarf.cpp"
|
||||
# include "../libbacktrace/fileline.cpp"
|
||||
# include "../libbacktrace/mmapio.cpp"
|
||||
# include "../libbacktrace/posix.cpp"
|
||||
# include "../libbacktrace/sort.cpp"
|
||||
# include "../libbacktrace/state.cpp"
|
||||
# if TRACY_HAS_CALLSTACK == 4
|
||||
# include "../libbacktrace/macho.cpp"
|
||||
# else
|
||||
# include "../libbacktrace/elf.cpp"
|
||||
# endif
|
||||
# include "../common/TracyStackFrames.cpp"
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 5
|
||||
# include <dlfcn.h>
|
||||
# include <cxxabi.h>
|
||||
@@ -53,7 +71,7 @@ extern "C"
|
||||
};
|
||||
#endif
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 5 || TRACY_HAS_CALLSTACK == 6
|
||||
#if defined(TRACY_USE_LIBBACKTRACE) || TRACY_HAS_CALLSTACK == 5
|
||||
// If you want to use your own demangling functionality (e.g. for another language),
|
||||
// define TRACY_DEMANGLE and provide your own implementation of the __tracy_demangle
|
||||
// function. The input parameter is a function name. The demangle function must
|
||||
@@ -91,94 +109,147 @@ extern "C" const char* ___tracy_demangle( const char* mangled )
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 3
|
||||
# define TRACY_USE_IMAGE_CACHE
|
||||
#if defined(TRACY_USE_LIBBACKTRACE) && TRACY_HAS_CALLSTACK != 4 // dl_iterate_phdr is required for the current image cache. Need to move it to libbacktrace?
|
||||
# define TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
# include <link.h>
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef TRACY_USE_IMAGE_CACHE
|
||||
// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths
|
||||
// so we can quickly determine which image an address falls into.
|
||||
// We refresh this cache only when we hit an address that doesn't fall into any known range.
|
||||
static bool IsKernelAddress(uint64_t addr) {
|
||||
return (addr >> 63) != 0;
|
||||
}
|
||||
|
||||
void DestroyImageEntry( ImageEntry& entry )
|
||||
{
|
||||
tracy_free( entry.m_path );
|
||||
tracy_free( entry.m_name );
|
||||
}
|
||||
|
||||
class ImageCache
|
||||
{
|
||||
public:
|
||||
struct ImageEntry
|
||||
|
||||
ImageCache( size_t imageCacheCapacity = 512 )
|
||||
: m_images( imageCacheCapacity )
|
||||
{
|
||||
void* m_startAddress = nullptr;
|
||||
void* m_endAddress = nullptr;
|
||||
char* m_name = nullptr;
|
||||
};
|
||||
|
||||
ImageCache()
|
||||
: m_images( 512 )
|
||||
{
|
||||
Refresh();
|
||||
}
|
||||
|
||||
~ImageCache()
|
||||
{
|
||||
Clear();
|
||||
}
|
||||
|
||||
const ImageEntry* GetImageForAddress( void* address )
|
||||
|
||||
ImageEntry* AddEntry( const ImageEntry& entry )
|
||||
{
|
||||
const ImageEntry* entry = GetImageForAddressImpl( address );
|
||||
if( m_sorted ) m_sorted = m_images.empty() || ( entry.m_startAddress < m_images.back().m_startAddress );
|
||||
ImageEntry* newEntry = m_images.push_next();
|
||||
*newEntry = entry;
|
||||
return newEntry;
|
||||
}
|
||||
|
||||
const ImageEntry* GetImageForAddress( uint64_t address )
|
||||
{
|
||||
Sort();
|
||||
|
||||
auto it = std::lower_bound( m_images.begin(), m_images.end(), address,
|
||||
[]( const ImageEntry& lhs, const uint64_t rhs ) { return lhs.m_startAddress > rhs; } );
|
||||
|
||||
if( it != m_images.end() && address < it->m_endAddress )
|
||||
{
|
||||
return it;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Sort()
|
||||
{
|
||||
if( m_sorted ) return;
|
||||
|
||||
std::sort( m_images.begin(), m_images.end(),
|
||||
[]( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } );
|
||||
m_sorted = true;
|
||||
}
|
||||
|
||||
void Clear()
|
||||
{
|
||||
for( ImageEntry& entry : m_images )
|
||||
{
|
||||
DestroyImageEntry( entry );
|
||||
}
|
||||
|
||||
m_sorted = true;
|
||||
m_images.clear();
|
||||
}
|
||||
|
||||
bool ContainsImage( uint64_t startAddress ) const
|
||||
{
|
||||
return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } );
|
||||
}
|
||||
protected:
|
||||
tracy::FastVector<ImageEntry> m_images;
|
||||
bool m_sorted = true;
|
||||
};
|
||||
|
||||
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths
|
||||
// so we can quickly determine which image an address falls into.
|
||||
// We refresh this cache only when we hit an address that doesn't fall into any known range.
|
||||
class ImageCacheDlIteratePhdr : public ImageCache
|
||||
{
|
||||
public:
|
||||
|
||||
ImageCacheDlIteratePhdr()
|
||||
{
|
||||
Refresh();
|
||||
}
|
||||
|
||||
~ImageCacheDlIteratePhdr()
|
||||
{
|
||||
}
|
||||
|
||||
const ImageEntry* GetImageForAddress( uint64_t address )
|
||||
{
|
||||
const ImageEntry* entry = ImageCache::GetImageForAddress( address );
|
||||
if( !entry )
|
||||
{
|
||||
Refresh();
|
||||
return GetImageForAddressImpl( address );
|
||||
return ImageCache::GetImageForAddress( address );
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
private:
|
||||
tracy::FastVector<ImageEntry> m_images;
|
||||
bool m_updated = false;
|
||||
bool m_haveMainImageName = false;
|
||||
|
||||
static int Callback( struct dl_phdr_info* info, size_t size, void* data )
|
||||
{
|
||||
ImageCache* cache = reinterpret_cast<ImageCache*>( data );
|
||||
ImageCacheDlIteratePhdr* cache = reinterpret_cast<ImageCacheDlIteratePhdr*>( data );
|
||||
|
||||
const auto startAddress = reinterpret_cast<void*>( info->dlpi_addr );
|
||||
if( cache->Contains( startAddress ) ) return 0;
|
||||
const auto startAddress = static_cast<uint64_t>( info->dlpi_addr );
|
||||
if( cache->ContainsImage( startAddress ) ) return 0;
|
||||
|
||||
const uint32_t headerCount = info->dlpi_phnum;
|
||||
assert( headerCount > 0);
|
||||
const auto endAddress = reinterpret_cast<void*>( info->dlpi_addr +
|
||||
const auto endAddress = static_cast<uint64_t>( info->dlpi_addr +
|
||||
info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz);
|
||||
|
||||
ImageEntry* image = cache->m_images.push_next();
|
||||
image->m_startAddress = startAddress;
|
||||
image->m_endAddress = endAddress;
|
||||
ImageEntry image{};
|
||||
image.m_startAddress = startAddress;
|
||||
image.m_endAddress = endAddress;
|
||||
|
||||
// the base executable name isn't provided when iterating with dl_iterate_phdr,
|
||||
// we will have to patch the executable image name outside this callback
|
||||
if( info->dlpi_name && info->dlpi_name[0] != '\0' )
|
||||
{
|
||||
size_t sz = strlen( info->dlpi_name ) + 1;
|
||||
image->m_name = (char*)tracy_malloc( sz );
|
||||
memcpy( image->m_name, info->dlpi_name, sz );
|
||||
}
|
||||
else
|
||||
{
|
||||
image->m_name = nullptr;
|
||||
}
|
||||
image.m_name = info->dlpi_name && info->dlpi_name[0] != '\0' ? CopyStringFast( info->dlpi_name ) : nullptr;
|
||||
|
||||
cache->AddEntry( image );
|
||||
cache->m_updated = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool Contains( void* startAddress ) const
|
||||
{
|
||||
return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } );
|
||||
}
|
||||
|
||||
void Refresh()
|
||||
{
|
||||
m_updated = false;
|
||||
@@ -186,9 +257,7 @@ private:
|
||||
|
||||
if( m_updated )
|
||||
{
|
||||
std::sort( m_images.begin(), m_images.end(),
|
||||
[]( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } );
|
||||
|
||||
Sort();
|
||||
// patch the main executable image name here, as calling dl_* functions inside the dl_iterate_phdr callback might cause deadlocks
|
||||
UpdateMainImageName();
|
||||
}
|
||||
@@ -223,31 +292,45 @@ private:
|
||||
|
||||
m_haveMainImageName = true;
|
||||
}
|
||||
|
||||
const ImageEntry* GetImageForAddressImpl( void* address ) const
|
||||
{
|
||||
auto it = std::lower_bound( m_images.begin(), m_images.end(), address,
|
||||
[]( const ImageEntry& lhs, const void* rhs ) { return lhs.m_startAddress > rhs; } );
|
||||
|
||||
if( it != m_images.end() && address < it->m_endAddress )
|
||||
{
|
||||
return it;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Clear()
|
||||
{
|
||||
for( ImageEntry& entry : m_images )
|
||||
{
|
||||
tracy_free( entry.m_name );
|
||||
}
|
||||
|
||||
m_images.clear();
|
||||
ImageCache::Clear();
|
||||
m_haveMainImageName = false;
|
||||
}
|
||||
};
|
||||
#endif //#ifdef TRACY_USE_IMAGE_CACHE
|
||||
using UserlandImageCache = ImageCacheDlIteratePhdr;
|
||||
#else
|
||||
using UserlandImageCache = ImageCache;
|
||||
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
|
||||
static UserlandImageCache* s_imageCache;
|
||||
static ImageCache* s_krnlCache;
|
||||
|
||||
void CreateImageCaches()
|
||||
{
|
||||
assert( s_imageCache == nullptr && s_krnlCache == nullptr );
|
||||
s_imageCache = new ( tracy_malloc( sizeof( UserlandImageCache ) ) ) UserlandImageCache();
|
||||
s_krnlCache = new ( tracy_malloc( sizeof( ImageCache ) ) ) ImageCache();
|
||||
}
|
||||
|
||||
void DestroyImageCaches()
|
||||
{
|
||||
if( s_krnlCache != nullptr )
|
||||
{
|
||||
s_krnlCache->~ImageCache();
|
||||
tracy_free( s_krnlCache );
|
||||
s_krnlCache = nullptr;
|
||||
}
|
||||
|
||||
if( s_imageCache != nullptr )
|
||||
{
|
||||
s_imageCache->~UserlandImageCache();
|
||||
tracy_free( s_imageCache );
|
||||
s_imageCache = nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// when "TRACY_SYMBOL_OFFLINE_RESOLVE" is set, instead of fully resolving symbols at runtime,
|
||||
// simply resolve the offset and image name (which will be enough the resolving to be done offline)
|
||||
@@ -282,32 +365,17 @@ extern "C"
|
||||
t_SymFromInlineContext _SymFromInlineContext = 0;
|
||||
t_SymGetLineFromInlineContext _SymGetLineFromInlineContext = 0;
|
||||
|
||||
TRACY_API ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain = 0;
|
||||
typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
|
||||
___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChainPtr = nullptr;
|
||||
TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void** callers, unsigned long count, unsigned long flags)
|
||||
{
|
||||
return ___tracy_RtlWalkFrameChainPtr(callers, count, flags);
|
||||
}
|
||||
}
|
||||
|
||||
struct ModuleCache
|
||||
{
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
char* name;
|
||||
};
|
||||
|
||||
static FastVector<ModuleCache>* s_modCache;
|
||||
|
||||
|
||||
struct KernelDriver
|
||||
{
|
||||
uint64_t addr;
|
||||
const char* mod;
|
||||
const char* path;
|
||||
};
|
||||
|
||||
KernelDriver* s_krnlCache = nullptr;
|
||||
size_t s_krnlCacheCnt;
|
||||
|
||||
void InitCallstackCritical()
|
||||
{
|
||||
___tracy_RtlWalkFrameChain = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" );
|
||||
___tracy_RtlWalkFrameChainPtr = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" );
|
||||
}
|
||||
|
||||
void DbgHelpInit()
|
||||
@@ -338,75 +406,57 @@ DWORD64 DbgHelpLoadSymbolsForModule( const char* imageName, uint64_t baseOfDll,
|
||||
return SymLoadModuleEx( GetCurrentProcess(), nullptr, imageName, nullptr, baseOfDll, bllSize, nullptr, 0 );
|
||||
}
|
||||
|
||||
ModuleCache* LoadSymbolsForModuleAndCache( const char* imageName, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
|
||||
char* FormatImageName( const char* imageName, uint32_t imageNameLength )
|
||||
{
|
||||
DbgHelpLoadSymbolsForModule( imageName, baseOfDll, dllSize );
|
||||
|
||||
ModuleCache* cachedModule = s_modCache->push_next();
|
||||
cachedModule->start = baseOfDll;
|
||||
cachedModule->end = baseOfDll + dllSize;
|
||||
|
||||
// when doing offline symbol resolution, we must store the full path of the dll for the resolving to work
|
||||
if( s_shouldResolveSymbolsOffline )
|
||||
{
|
||||
cachedModule->name = (char*)tracy_malloc_fast(imageNameLength + 1);
|
||||
memcpy(cachedModule->name, imageName, imageNameLength);
|
||||
cachedModule->name[imageNameLength] = '\0';
|
||||
return CopyStringFast( imageName, imageNameLength );
|
||||
}
|
||||
else
|
||||
{
|
||||
auto ptr = imageName + imageNameLength;
|
||||
while (ptr > imageName && *ptr != '\\' && *ptr != '/') ptr--;
|
||||
if (ptr > imageName) ptr++;
|
||||
const char* ptr = imageName + imageNameLength;
|
||||
while( ptr > imageName && *ptr != '\\' && *ptr != '/' ) ptr--;
|
||||
if( ptr > imageName ) ptr++;
|
||||
const auto namelen = imageName + imageNameLength - ptr;
|
||||
cachedModule->name = (char*)tracy_malloc_fast(namelen + 3);
|
||||
cachedModule->name[0] = '[';
|
||||
memcpy(cachedModule->name + 1, ptr, namelen);
|
||||
cachedModule->name[namelen + 1] = ']';
|
||||
cachedModule->name[namelen + 2] = '\0';
|
||||
}
|
||||
|
||||
return cachedModule;
|
||||
char* alloc = (char*)tracy_malloc_fast( namelen + 3 );
|
||||
alloc[0] = '[';
|
||||
memcpy( alloc + 1, ptr, namelen );
|
||||
alloc[namelen + 1] = ']';
|
||||
alloc[namelen + 2] = '\0';
|
||||
return alloc;
|
||||
}
|
||||
}
|
||||
|
||||
void InitCallstack()
|
||||
ImageEntry* CacheModuleInfo( const char* imagePath, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
|
||||
{
|
||||
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
|
||||
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
|
||||
#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
|
||||
if( s_shouldResolveSymbolsOffline )
|
||||
{
|
||||
TracyDebug("TRACY: enabling offline symbol resolving!\n");
|
||||
}
|
||||
ImageEntry moduleEntry = {};
|
||||
moduleEntry.m_startAddress = baseOfDll;
|
||||
moduleEntry.m_endAddress = baseOfDll + dllSize;
|
||||
moduleEntry.m_path = CopyStringFast( imagePath, imageNameLength );
|
||||
moduleEntry.m_name = FormatImageName( imagePath, imageNameLength );
|
||||
|
||||
DbgHelpInit();
|
||||
return s_imageCache->AddEntry( moduleEntry );
|
||||
}
|
||||
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_LOCK;
|
||||
#endif
|
||||
|
||||
// use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver
|
||||
// and process module symbol loading at startup time - they will be loaded on demand later
|
||||
// Sometimes this process can take a very long time and prevent resolving callstack frames
|
||||
// symbols during that time.
|
||||
const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" );
|
||||
const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' );
|
||||
if ( !initTimeModuleLoad )
|
||||
{
|
||||
TracyDebug("TRACY: skipping init time dbghelper module load\n");
|
||||
}
|
||||
ImageEntry* LoadSymbolsForModuleAndCache( const char* imagePath, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize )
|
||||
{
|
||||
DbgHelpLoadSymbolsForModule( imagePath, baseOfDll, dllSize );
|
||||
return CacheModuleInfo( imagePath, imageNameLength, baseOfDll, dllSize );
|
||||
}
|
||||
|
||||
static void CacheProcessDrivers()
|
||||
{
|
||||
DWORD needed;
|
||||
LPVOID dev[4096];
|
||||
if( initTimeModuleLoad && EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 )
|
||||
if( EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 )
|
||||
{
|
||||
char windir[MAX_PATH];
|
||||
if( !GetWindowsDirectoryA( windir, sizeof( windir ) ) ) memcpy( windir, "c:\\windows", 11 );
|
||||
const auto windirlen = strlen( windir );
|
||||
|
||||
const auto sz = needed / sizeof( LPVOID );
|
||||
s_krnlCache = (KernelDriver*)tracy_malloc( sizeof(KernelDriver) * sz );
|
||||
int cnt = 0;
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
{
|
||||
char fn[MAX_PATH];
|
||||
@@ -417,7 +467,12 @@ void InitCallstack()
|
||||
buf[0] = '<';
|
||||
memcpy( buf+1, fn, len );
|
||||
memcpy( buf+len+1, ">", 2 );
|
||||
s_krnlCache[cnt] = KernelDriver { (uint64_t)dev[i], buf };
|
||||
|
||||
ImageEntry kernelDriver{};
|
||||
kernelDriver.m_startAddress = (uint64_t)dev[i];
|
||||
kernelDriver.m_endAddress = 0;
|
||||
kernelDriver.m_name = buf;
|
||||
kernelDriver.m_path = nullptr;
|
||||
|
||||
const auto len = GetDeviceDriverFileNameA( dev[i], fn, sizeof( fn ) );
|
||||
if( len != 0 )
|
||||
@@ -433,27 +488,23 @@ void InitCallstack()
|
||||
}
|
||||
|
||||
DbgHelpLoadSymbolsForModule( path, (DWORD64)dev[i], 0 );
|
||||
|
||||
const auto psz = strlen( path );
|
||||
auto pptr = (char*)tracy_malloc_fast( psz+1 );
|
||||
memcpy( pptr, path, psz );
|
||||
pptr[psz] = '\0';
|
||||
s_krnlCache[cnt].path = pptr;
|
||||
|
||||
kernelDriver.m_path = CopyString( path );
|
||||
}
|
||||
|
||||
cnt++;
|
||||
s_krnlCache->AddEntry(kernelDriver);
|
||||
}
|
||||
}
|
||||
s_krnlCacheCnt = cnt;
|
||||
std::sort( s_krnlCache, s_krnlCache + s_krnlCacheCnt, []( const KernelDriver& lhs, const KernelDriver& rhs ) { return lhs.addr > rhs.addr; } );
|
||||
s_krnlCache->Sort();
|
||||
}
|
||||
}
|
||||
|
||||
s_modCache = (FastVector<ModuleCache>*)tracy_malloc( sizeof( FastVector<ModuleCache> ) );
|
||||
new(s_modCache) FastVector<ModuleCache>( 512 );
|
||||
|
||||
static void CacheProcessModules()
|
||||
{
|
||||
DWORD needed;
|
||||
HANDLE proc = GetCurrentProcess();
|
||||
HMODULE mod[1024];
|
||||
if( initTimeModuleLoad && EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
|
||||
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
|
||||
{
|
||||
const auto sz = needed / sizeof( HMODULE );
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
@@ -472,6 +523,41 @@ void InitCallstack()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InitCallstack()
|
||||
{
|
||||
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
|
||||
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
|
||||
#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
|
||||
if( s_shouldResolveSymbolsOffline )
|
||||
{
|
||||
TracyDebug("TRACY: enabling offline symbol resolving!\n");
|
||||
}
|
||||
|
||||
CreateImageCaches();
|
||||
|
||||
DbgHelpInit();
|
||||
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_LOCK;
|
||||
#endif
|
||||
|
||||
// use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver
|
||||
// and process module symbol loading at startup time - they will be loaded on demand later
|
||||
// Sometimes this process can take a very long time and prevent resolving callstack frames
|
||||
// symbols during that time.
|
||||
const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" );
|
||||
const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' );
|
||||
if ( !initTimeModuleLoad )
|
||||
{
|
||||
TracyDebug("TRACY: skipping init time dbghelper module load\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
CacheProcessDrivers();
|
||||
CacheProcessModules();
|
||||
}
|
||||
|
||||
#ifdef TRACY_DBGHELP_LOCK
|
||||
DBGHELP_UNLOCK;
|
||||
@@ -480,6 +566,7 @@ void InitCallstack()
|
||||
|
||||
void EndCallstack()
|
||||
{
|
||||
DestroyImageCaches();
|
||||
}
|
||||
|
||||
const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
@@ -514,11 +601,11 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
|
||||
|
||||
const char* GetKernelModulePath( uint64_t addr )
|
||||
{
|
||||
assert( addr >> 63 != 0 );
|
||||
assert( IsKernelAddress( addr ) );
|
||||
if( !s_krnlCache ) return nullptr;
|
||||
auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } );
|
||||
if( it == s_krnlCache + s_krnlCacheCnt ) return nullptr;
|
||||
return it->path;
|
||||
const ImageEntry* imageEntry = s_krnlCache->GetImageForAddress( addr );
|
||||
if( imageEntry ) return imageEntry->m_path;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
struct ModuleNameAndBaseAddress
|
||||
@@ -529,51 +616,38 @@ struct ModuleNameAndBaseAddress
|
||||
|
||||
ModuleNameAndBaseAddress GetModuleNameAndPrepareSymbols( uint64_t addr )
|
||||
{
|
||||
if( ( addr >> 63 ) != 0 )
|
||||
if( IsKernelAddress( addr ) )
|
||||
{
|
||||
if( s_krnlCache )
|
||||
{
|
||||
auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } );
|
||||
if( it != s_krnlCache + s_krnlCacheCnt )
|
||||
{
|
||||
return ModuleNameAndBaseAddress{ it->mod, it->addr };
|
||||
}
|
||||
}
|
||||
const ImageEntry* entry = s_krnlCache->GetImageForAddress( addr );
|
||||
if( entry != nullptr ) return ModuleNameAndBaseAddress{ entry->m_name, entry->m_startAddress };
|
||||
return ModuleNameAndBaseAddress{ "<kernel>", addr };
|
||||
}
|
||||
|
||||
for( auto& v : *s_modCache )
|
||||
{
|
||||
if( addr >= v.start && addr < v.end )
|
||||
{
|
||||
return ModuleNameAndBaseAddress{ v.name, v.start };
|
||||
}
|
||||
}
|
||||
const ImageEntry* entry = s_imageCache->GetImageForAddress( addr );
|
||||
if( entry != nullptr ) return ModuleNameAndBaseAddress{ entry->m_name, entry->m_startAddress };
|
||||
|
||||
HMODULE mod[1024];
|
||||
DWORD needed;
|
||||
HANDLE proc = GetCurrentProcess();
|
||||
// Do not use FreeLibrary because we set the flag GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT
|
||||
// see https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-getmodulehandleexa to get more information
|
||||
constexpr DWORD flag = GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT;
|
||||
HMODULE mod = NULL;
|
||||
|
||||
InitRpmalloc();
|
||||
if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 )
|
||||
if( GetModuleHandleExA( flag, (char*)addr, &mod ) != 0 )
|
||||
{
|
||||
const auto sz = needed / sizeof( HMODULE );
|
||||
for( size_t i=0; i<sz; i++ )
|
||||
MODULEINFO info;
|
||||
if( GetModuleInformation( proc, mod, &info, sizeof( info ) ) != 0 )
|
||||
{
|
||||
MODULEINFO info;
|
||||
if( GetModuleInformation( proc, mod[i], &info, sizeof( info ) ) != 0 )
|
||||
const auto base = uint64_t( info.lpBaseOfDll );
|
||||
if( addr >= base && addr < ( base + info.SizeOfImage ) )
|
||||
{
|
||||
const auto base = uint64_t( info.lpBaseOfDll );
|
||||
if( addr >= base && addr < base + info.SizeOfImage )
|
||||
char name[1024];
|
||||
const auto nameLength = GetModuleFileNameA( mod, name, sizeof( name ) );
|
||||
if( nameLength > 0 )
|
||||
{
|
||||
char name[1024];
|
||||
const auto nameLength = GetModuleFileNameA( mod[i], name, 1021 );
|
||||
if( nameLength > 0 )
|
||||
{
|
||||
// since this is the first time we encounter this module, load its symbols (needed for modules loaded after SymInitialize)
|
||||
ModuleCache* cachedModule = LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage );
|
||||
return ModuleNameAndBaseAddress{ cachedModule->name, cachedModule->start };
|
||||
}
|
||||
// since this is the first time we encounter this module, load its symbols (needed for modules loaded after SymInitialize)
|
||||
ImageEntry* cachedModule = LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage );
|
||||
return ModuleNameAndBaseAddress{ cachedModule->m_name, cachedModule->m_startAddress };
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -753,7 +827,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
return { cb_data, uint8_t( cb_num ), moduleNameAndAddress.name };
|
||||
}
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
#elif defined(TRACY_USE_LIBBACKTRACE)
|
||||
|
||||
enum { MaxCbTrace = 64 };
|
||||
|
||||
@@ -762,9 +836,6 @@ struct backtrace_state* cb_bts = nullptr;
|
||||
int cb_num;
|
||||
CallstackEntry cb_data[MaxCbTrace];
|
||||
int cb_fixup;
|
||||
#ifdef TRACY_USE_IMAGE_CACHE
|
||||
static ImageCache* s_imageCache = nullptr;
|
||||
#endif //#ifdef TRACY_USE_IMAGE_CACHE
|
||||
|
||||
#ifdef TRACY_DEBUGINFOD
|
||||
debuginfod_client* s_debuginfod;
|
||||
@@ -959,10 +1030,9 @@ void InitCallstack()
|
||||
{
|
||||
InitRpmalloc();
|
||||
|
||||
#ifdef TRACY_USE_IMAGE_CACHE
|
||||
s_imageCache = (ImageCache*)tracy_malloc( sizeof( ImageCache ) );
|
||||
new(s_imageCache) ImageCache();
|
||||
#endif //#ifdef TRACY_USE_IMAGE_CACHE
|
||||
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
CreateImageCaches();
|
||||
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
|
||||
#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE
|
||||
s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline();
|
||||
@@ -1056,13 +1126,9 @@ debuginfod_client* GetDebuginfodClient()
|
||||
|
||||
void EndCallstack()
|
||||
{
|
||||
#ifdef TRACY_USE_IMAGE_CACHE
|
||||
if( s_imageCache )
|
||||
{
|
||||
s_imageCache->~ImageCache();
|
||||
tracy_free( s_imageCache );
|
||||
}
|
||||
#endif //#ifdef TRACY_USE_IMAGE_CACHE
|
||||
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
DestroyImageCaches();
|
||||
#endif //#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
#ifndef TRACY_DEMANGLE
|
||||
___tracy_free_demangle_buffer();
|
||||
#endif
|
||||
@@ -1252,17 +1318,17 @@ void GetSymbolForOfflineResolve(void* address, uint64_t imageBaseAddress, Callst
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
|
||||
{
|
||||
InitRpmalloc();
|
||||
if( ptr >> 63 == 0 )
|
||||
if ( !IsKernelAddress( ptr ) )
|
||||
{
|
||||
const char* imageName = nullptr;
|
||||
uint64_t imageBaseAddress = 0x0;
|
||||
|
||||
#ifdef TRACY_USE_IMAGE_CACHE
|
||||
const auto* image = s_imageCache->GetImageForAddress((void*)ptr);
|
||||
#ifdef TRACY_HAS_DL_ITERATE_PHDR_TO_REFRESH_IMAGE_CACHE
|
||||
const auto* image = s_imageCache->GetImageForAddress( ptr );
|
||||
if( image )
|
||||
{
|
||||
imageName = image->m_name;
|
||||
imageBaseAddress = uint64_t(image->m_startAddress);
|
||||
imageBaseAddress = uint64_t( image->m_startAddress );
|
||||
}
|
||||
#else
|
||||
Dl_info dlinfo;
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
# endif
|
||||
|
||||
# if defined _WIN32
|
||||
# include "../common/TracyUwp.hpp"
|
||||
# ifndef TRACY_UWP
|
||||
# include "../common/TracyWinFamily.hpp"
|
||||
# if !defined TRACY_WIN32_NO_DESKTOP
|
||||
# define TRACY_HAS_CALLSTACK 1
|
||||
# endif
|
||||
# elif defined __ANDROID__
|
||||
@@ -30,6 +30,10 @@
|
||||
# define TRACY_HAS_CALLSTACK 6
|
||||
# endif
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
#define TRACY_USE_LIBBACKTRACE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,15 +1,31 @@
|
||||
#ifndef __TRACYCALLSTACK_HPP__
|
||||
#define __TRACYCALLSTACK_HPP__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../common/TracyApi.h"
|
||||
#include "../common/TracyForceInline.hpp"
|
||||
#include "TracyCallstack.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct ImageEntry
|
||||
{
|
||||
uint64_t m_startAddress = 0;
|
||||
uint64_t m_endAddress = 0;
|
||||
char* m_name = nullptr;
|
||||
char* m_path = nullptr;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#ifndef TRACY_HAS_CALLSTACK
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
static tracy_force_inline void* Callstack( int /*depth*/ ) { return nullptr; }
|
||||
static constexpr bool has_callstack() { return false; }
|
||||
static tracy_force_inline void* Callstack( int32_t /*depth*/ ) { return nullptr; }
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -38,6 +54,8 @@ static tracy_force_inline void* Callstack( int /*depth*/ ) { return nullptr; }
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static constexpr bool has_callstack() { return true; }
|
||||
|
||||
struct CallstackSymbolData
|
||||
{
|
||||
const char* file;
|
||||
@@ -79,11 +97,10 @@ debuginfod_client* GetDebuginfodClient();
|
||||
|
||||
extern "C"
|
||||
{
|
||||
typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
|
||||
TRACY_API extern ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain;
|
||||
TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void**, unsigned long, unsigned long );
|
||||
}
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
static tracy_force_inline void* Callstack( int32_t depth )
|
||||
{
|
||||
assert( depth >= 1 && depth < 63 );
|
||||
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
|
||||
@@ -112,7 +129,7 @@ static _Unwind_Reason_Code tracy_unwind_callback( struct _Unwind_Context* ctx, v
|
||||
return _URC_NO_REASON;
|
||||
}
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
static tracy_force_inline void* Callstack( int32_t depth )
|
||||
{
|
||||
assert( depth >= 1 && depth < 63 );
|
||||
|
||||
@@ -127,7 +144,7 @@ static tracy_force_inline void* Callstack( int depth )
|
||||
|
||||
#elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
static tracy_force_inline void* Callstack( int32_t depth )
|
||||
{
|
||||
assert( depth >= 1 );
|
||||
|
||||
|
||||
@@ -219,8 +219,9 @@ public:
|
||||
m_ctx.CustomName( name, size );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
|
||||
private:
|
||||
LockableCtx m_ctx;
|
||||
};
|
||||
|
||||
@@ -535,8 +536,9 @@ public:
|
||||
m_ctx.CustomName( name, size );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
|
||||
private:
|
||||
SharedLockableCtx m_ctx;
|
||||
};
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -58,6 +58,9 @@ TRACY_API bool IsProfilerStarted();
|
||||
# define TracyIsStarted true
|
||||
#endif
|
||||
|
||||
TRACY_API bool BeginSamplingProfiling();
|
||||
TRACY_API void EndSamplingProfiling();
|
||||
|
||||
class GpuCtx;
|
||||
class Profiler;
|
||||
class Socket;
|
||||
@@ -114,11 +117,11 @@ struct LuaZoneState
|
||||
|
||||
|
||||
#define TracyLfqPrepare( _type ) \
|
||||
moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
|
||||
auto __token = GetToken(); \
|
||||
tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
|
||||
auto __token = tracy::GetToken(); \
|
||||
auto& __tail = __token->get_tail_index(); \
|
||||
auto item = __token->enqueue_begin( __magic ); \
|
||||
MemWrite( &item->hdr.type, _type );
|
||||
tracy::MemWrite( &item->hdr.type, _type );
|
||||
|
||||
#define TracyLfqCommit \
|
||||
__tail.store( __magic + 1, std::memory_order_release );
|
||||
@@ -136,11 +139,11 @@ struct LuaZoneState
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
# define TracyQueuePrepare( _type ) \
|
||||
auto item = Profiler::QueueSerial(); \
|
||||
MemWrite( &item->hdr.type, _type );
|
||||
auto item = tracy::Profiler::QueueSerial(); \
|
||||
tracy::MemWrite( &item->hdr.type, _type );
|
||||
# define TracyQueueCommit( _name ) \
|
||||
MemWrite( &item->_name.thread, GetThreadHandle() ); \
|
||||
Profiler::QueueSerialFinish();
|
||||
tracy::MemWrite( &item->_name.thread, tracy::GetThreadHandle() ); \
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
# define TracyQueuePrepareC( _type ) \
|
||||
auto item = tracy::Profiler::QueueSerial(); \
|
||||
tracy::MemWrite( &item->hdr.type, _type );
|
||||
@@ -252,6 +255,9 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
bool BeginSamplingProfiling();
|
||||
void EndSamplingProfiling();
|
||||
|
||||
tracy_force_inline uint32_t GetNextZoneId()
|
||||
{
|
||||
return m_zoneId.fetch_add( 1, std::memory_order_relaxed );
|
||||
@@ -387,58 +393,58 @@ public:
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt, size_t size, int callstack )
|
||||
static tracy_force_inline void Message( const char* txt, size_t size, int32_t callstack_depth )
|
||||
{
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
if( callstack_depth != 0 && has_callstack() )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
tracy::GetProfiler().SendCallstack( callstack_depth );
|
||||
}
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
|
||||
TracyQueuePrepare( callstack_depth == 0 ? QueueType::Message : QueueType::MessageCallstack );
|
||||
MemWrite( &item->messageFat.time, GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( messageFatThread );
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt, int callstack )
|
||||
static tracy_force_inline void Message( const char* txt, int32_t callstack_depth )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
if( callstack_depth != 0 && has_callstack() )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
tracy::GetProfiler().SendCallstack( callstack_depth );
|
||||
}
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
|
||||
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
|
||||
MemWrite( &item->messageLiteral.time, GetTime() );
|
||||
MemWrite( &item->messageLiteral.text, (uint64_t)txt );
|
||||
TracyQueueCommit( messageLiteralThread );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack )
|
||||
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int32_t callstack_depth )
|
||||
{
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
if( callstack_depth != 0 && has_callstack() )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
tracy::GetProfiler().SendCallstack( callstack_depth );
|
||||
}
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
|
||||
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
|
||||
MemWrite( &item->messageColorFat.time, GetTime() );
|
||||
MemWrite( &item->messageColorFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageColorFat.b, uint8_t( ( color ) & 0xFF ) );
|
||||
@@ -448,17 +454,17 @@ public:
|
||||
TracyQueueCommit( messageColorFatThread );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack )
|
||||
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int32_t callstack_depth )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
if( callstack_depth != 0 && has_callstack() )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
tracy::GetProfiler().SendCallstack( callstack_depth );
|
||||
}
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
|
||||
TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
|
||||
MemWrite( &item->messageColorLiteral.time, GetTime() );
|
||||
MemWrite( &item->messageColorLiteral.text, (uint64_t)txt );
|
||||
MemWrite( &item->messageColorLiteral.b, uint8_t( ( color ) & 0xFF ) );
|
||||
@@ -510,29 +516,31 @@ public:
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure )
|
||||
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int32_t depth, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
MemAlloc( ptr, size, secure );
|
||||
#endif
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
|
||||
profiler.m_serialLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
MemAlloc( ptr, size, secure );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure )
|
||||
static tracy_force_inline void MemFreeCallstack( const void* ptr, int32_t depth, bool secure )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
if( !ProfilerAllocatorAvailable() )
|
||||
@@ -540,23 +548,25 @@ public:
|
||||
MemFree( ptr, secure );
|
||||
return;
|
||||
}
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
MemFree( ptr, secure );
|
||||
#endif
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
|
||||
profiler.m_serialLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
MemFree( ptr, secure );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name )
|
||||
@@ -587,62 +597,101 @@ public:
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name )
|
||||
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int32_t depth, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
MemAllocNamed( ptr, size, secure, name );
|
||||
#endif
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
|
||||
profiler.m_serialLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
MemAllocNamed( ptr, size, secure, name );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name )
|
||||
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int32_t depth, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
MemFreeNamed( ptr, secure, name );
|
||||
#endif
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
|
||||
profiler.m_serialLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
MemFreeNamed( ptr, secure, name );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendCallstack( int depth )
|
||||
static tracy_force_inline void MemDiscard( const char* name, bool secure )
|
||||
{
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto ptr = Callstack( depth );
|
||||
TracyQueuePrepare( QueueType::Callstack );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
TracyQueueCommit( callstackFatThread );
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
GetProfiler().m_serialLock.lock();
|
||||
SendMemDiscard( QueueType::MemDiscard, thread, name );
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemDiscardCallstack( const char* name, bool secure, int32_t depth )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
GetProfiler().m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemDiscard( QueueType::MemDiscard, thread, name );
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
MemDiscard( name, secure );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendCallstack( int32_t depth )
|
||||
{
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto ptr = Callstack( depth );
|
||||
TracyQueuePrepare( QueueType::Callstack );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
TracyQueueCommit( callstackFatThread );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void ParameterRegister( ParameterCallback cb, void* data )
|
||||
@@ -677,6 +726,9 @@ public:
|
||||
#ifdef TRACY_FIBERS
|
||||
static tracy_force_inline void EnterFiber( const char* fiber, int32_t groupHint )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::FiberEnter );
|
||||
MemWrite( &item->fiberEnter.time, GetTime() );
|
||||
MemWrite( &item->fiberEnter.fiber, (uint64_t)fiber );
|
||||
@@ -686,13 +738,16 @@ public:
|
||||
|
||||
static tracy_force_inline void LeaveFiber()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::FiberLeave );
|
||||
MemWrite( &item->fiberLeave.time, GetTime() );
|
||||
TracyQueueCommit( fiberLeave );
|
||||
}
|
||||
#endif
|
||||
|
||||
void SendCallstack( int depth, const char* skipBefore );
|
||||
void SendCallstack( int32_t depth, const char* skipBefore );
|
||||
static void CutCallstack( void* callstack, const char* skipBefore );
|
||||
|
||||
static bool ShouldExit();
|
||||
@@ -800,7 +855,7 @@ private:
|
||||
|
||||
void InstallCrashHandler();
|
||||
void RemoveCrashHandler();
|
||||
|
||||
|
||||
void ClearQueues( tracy::moodycamel::ConsumerToken& token );
|
||||
void ClearSerial();
|
||||
DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token );
|
||||
@@ -833,6 +888,21 @@ private:
|
||||
m_bufferOffset += int( len );
|
||||
}
|
||||
|
||||
char* SafeCopyProlog( const char* p, size_t size );
|
||||
void SafeCopyEpilog( char* buf );
|
||||
|
||||
template<class Callable> // must be void( const char* buf, size_t size )
|
||||
bool WithSafeCopy( const char* p, size_t size, Callable&& callable )
|
||||
{
|
||||
if( char* buf = SafeCopyProlog( p, size ) )
|
||||
{
|
||||
callable( buf, size );
|
||||
SafeCopyEpilog( buf );
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SendData( const char* data, size_t len );
|
||||
void SendLongString( uint64_t ptr, const char* str, size_t len, QueueType type );
|
||||
void SendSourceLocation( uint64_t ptr );
|
||||
@@ -862,14 +932,13 @@ private:
|
||||
|
||||
static tracy_force_inline void SendCallstackSerial( void* ptr )
|
||||
{
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, QueueType::CallstackSerial );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
#else
|
||||
static_cast<void>(ptr); // unused
|
||||
#endif
|
||||
if( has_callstack() )
|
||||
{
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, QueueType::CallstackSerial );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemAlloc( QueueType type, const uint32_t thread, const void* ptr, size_t size )
|
||||
@@ -907,6 +976,18 @@ private:
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemDiscard( QueueType type, const uint32_t thread, const char* name )
|
||||
{
|
||||
assert( type == QueueType::MemDiscard || type == QueueType::MemDiscardCallstack );
|
||||
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, type );
|
||||
MemWrite( &item->memDiscard.time, GetTime() );
|
||||
MemWrite( &item->memDiscard.thread, thread );
|
||||
MemWrite( &item->memDiscard.name, (uint64_t)name );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemName( const char* name )
|
||||
{
|
||||
assert( name );
|
||||
@@ -922,7 +1003,6 @@ private:
|
||||
|
||||
double m_timerMul;
|
||||
uint64_t m_resolution;
|
||||
uint64_t m_delay;
|
||||
std::atomic<int64_t> m_timeBegin;
|
||||
uint32_t m_mainThread;
|
||||
uint64_t m_epoch, m_exectime;
|
||||
@@ -963,6 +1043,7 @@ private:
|
||||
std::atomic<bool> m_isConnected;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
std::atomic<uint64_t> m_connectionId;
|
||||
std::atomic<bool> m_symbolsBusy;
|
||||
|
||||
TracyMutex m_deferredLock;
|
||||
FastVector<QueueItem> m_deferredQueue;
|
||||
@@ -990,9 +1071,19 @@ private:
|
||||
char* m_queryData;
|
||||
char* m_queryDataPtr;
|
||||
|
||||
#if defined _WIN32
|
||||
void* m_exceptionHandler;
|
||||
#ifndef NDEBUG
|
||||
// m_safeSendBuffer and m_pipe should only be used by the Tracy Profiler thread; this ensures that in debug builds.
|
||||
std::atomic_bool m_inUse{ false };
|
||||
#endif
|
||||
char* m_safeSendBuffer;
|
||||
|
||||
#if defined _WIN32
|
||||
void* m_prevHandler;
|
||||
#else
|
||||
int m_pipe[2];
|
||||
int m_pipeBufSize;
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
struct {
|
||||
struct sigaction pwr, ill, fpe, segv, pipe, bus, abrt;
|
||||
|
||||
@@ -0,0 +1,556 @@
|
||||
#include "../server/tracy_robin_hood.h"
|
||||
#include "TracyProfiler.hpp"
|
||||
#include "TracyThread.hpp"
|
||||
#include "tracy/TracyC.h"
|
||||
#include <rocprofiler-sdk/registration.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <shared_mutex>
|
||||
#include <sstream>
|
||||
#include <time.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#define ROCPROFILER_CALL( result, msg ) \
|
||||
{ \
|
||||
rocprofiler_status_t CHECKSTATUS = result; \
|
||||
if( CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS ) \
|
||||
{ \
|
||||
std::string status_msg = rocprofiler_get_status_string( CHECKSTATUS ); \
|
||||
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg << " failed with error code " \
|
||||
<< CHECKSTATUS << ": " << status_msg << std::endl; \
|
||||
std::stringstream errmsg{}; \
|
||||
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" << status_msg \
|
||||
<< ")"; \
|
||||
throw std::runtime_error( errmsg.str() ); \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
|
||||
|
||||
struct DispatchData
|
||||
{
|
||||
int64_t launch_start;
|
||||
int64_t launch_end;
|
||||
uint32_t thread_id;
|
||||
uint16_t query_id;
|
||||
};
|
||||
|
||||
struct ToolData
|
||||
{
|
||||
uint32_t version;
|
||||
const char* runtime_version;
|
||||
uint32_t priority;
|
||||
rocprofiler_client_id_t client_id;
|
||||
uint8_t context_id;
|
||||
bool init;
|
||||
uint64_t query_id;
|
||||
int64_t previous_cpu_time;
|
||||
tracy::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data_t> client_kernels;
|
||||
tracy::unordered_map<rocprofiler_dispatch_id_t, DispatchData> dispatch_data;
|
||||
tracy::unordered_set<std::string> counter_names = { "SQ_WAVES", "GL2C_MISS", "GL2C_HIT" };
|
||||
std::unique_ptr<tracy::Thread> cal_thread;
|
||||
std::mutex mut{};
|
||||
};
|
||||
|
||||
using namespace tracy;
|
||||
|
||||
rocprofiler_context_id_t& get_client_ctx()
|
||||
{
|
||||
static rocprofiler_context_id_t ctx{ 0 };
|
||||
return ctx;
|
||||
}
|
||||
|
||||
const char* CTX_NAME = "rocprofv3";
|
||||
|
||||
uint8_t gpu_context_allocate( ToolData* data )
|
||||
{
|
||||
|
||||
timespec ts;
|
||||
clock_gettime( CLOCK_BOOTTIME, &ts );
|
||||
uint64_t cpu_timestamp = Profiler::GetTime();
|
||||
uint64_t gpu_timestamp = ( (uint64_t)ts.tv_sec * 1000000000 ) + ts.tv_nsec;
|
||||
float timestamp_period = 1.0f;
|
||||
data->previous_cpu_time = cpu_timestamp;
|
||||
|
||||
// Allocate the process-unique GPU context ID. There's a max of 255 available;
|
||||
// if we are recreating devices a lot we may exceed that. Don't do that, or
|
||||
// wrap around and get weird (but probably still usable) numbers.
|
||||
uint8_t context_id = tracy::GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed );
|
||||
if( context_id >= 255 )
|
||||
{
|
||||
context_id %= 255;
|
||||
}
|
||||
|
||||
uint8_t context_flags = 0;
|
||||
#ifdef TRACY_ROCPROF_CALIBRATION
|
||||
// Tell tracy we'll be passing calibrated timestamps and not to mess with
|
||||
// the times. We'll periodically send GpuCalibration events in case the
|
||||
// times drift.
|
||||
context_flags |= tracy::GpuContextCalibration;
|
||||
#endif
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext );
|
||||
tracy::MemWrite( &item->gpuNewContext.cpuTime, cpu_timestamp );
|
||||
tracy::MemWrite( &item->gpuNewContext.gpuTime, gpu_timestamp );
|
||||
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
|
||||
tracy::MemWrite( &item->gpuNewContext.period, timestamp_period );
|
||||
tracy::MemWrite( &item->gpuNewContext.context, context_id );
|
||||
tracy::MemWrite( &item->gpuNewContext.flags, context_flags );
|
||||
tracy::MemWrite( &item->gpuNewContext.type, tracy::GpuContextType::Rocprof );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
// Send the name of the context along.
|
||||
// NOTE: Tracy will unconditionally free the name so we must clone it here.
|
||||
// Since internally Tracy will use its own rpmalloc implementation we must
|
||||
// make sure we allocate from the same source.
|
||||
size_t name_length = strlen( CTX_NAME );
|
||||
char* cloned_name = (char*)tracy::tracy_malloc( name_length );
|
||||
memcpy( cloned_name, CTX_NAME, name_length );
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName );
|
||||
tracy::MemWrite( &item->gpuContextNameFat.context, context_id );
|
||||
tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)cloned_name );
|
||||
tracy::MemWrite( &item->gpuContextNameFat.size, name_length );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
return context_id;
|
||||
}
|
||||
|
||||
uint64_t kernel_src_loc( ToolData* data, uint64_t kernel_id )
|
||||
{
|
||||
uint64_t src_loc = 0;
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
rocprofiler_kernel_id_t kid = kernel_id;
|
||||
if( data->client_kernels.count( kid ) )
|
||||
{
|
||||
auto& sym_data = data->client_kernels[kid];
|
||||
const char* name = sym_data.kernel_name;
|
||||
size_t name_len = strlen( name );
|
||||
uint32_t line = 0;
|
||||
src_loc = tracy::Profiler::AllocSourceLocation( line, NULL, 0, name, name_len, NULL, 0 );
|
||||
}
|
||||
return src_loc;
|
||||
}
|
||||
|
||||
void record_interval( ToolData* data, rocprofiler_timestamp_t start_timestamp, rocprofiler_timestamp_t end_timestamp,
|
||||
uint64_t src_loc, rocprofiler_dispatch_id_t dispatch_id )
|
||||
{
|
||||
|
||||
uint16_t query_id = 0;
|
||||
uint8_t context_id = data->context_id;
|
||||
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
query_id = data->query_id;
|
||||
data->query_id++;
|
||||
if( dispatch_id != UINT64_MAX )
|
||||
{
|
||||
DispatchData& dispatch_data = data->dispatch_data[dispatch_id];
|
||||
dispatch_data.query_id = query_id;
|
||||
dispatch_data.thread_id = tracy::GetThreadHandle();
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t cpu_start_time = 0, cpu_end_time = 0;
|
||||
if( dispatch_id == UINT64_MAX )
|
||||
{
|
||||
cpu_start_time = tracy::Profiler::GetTime();
|
||||
cpu_end_time = tracy::Profiler::GetTime();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
DispatchData& dispatch_data = data->dispatch_data[dispatch_id];
|
||||
cpu_start_time = dispatch_data.launch_start;
|
||||
cpu_end_time = dispatch_data.launch_end;
|
||||
}
|
||||
|
||||
if( src_loc != 0 )
|
||||
{
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, cpu_start_time );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)src_loc );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.queryId, query_id );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.context, context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
static const ___tracy_source_location_data src_loc = { NULL, NULL, NULL, 0, 0 };
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, cpu_start_time );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)&src_loc );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.queryId, query_id );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.context, context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime );
|
||||
tracy::MemWrite( &item->gpuTime.gpuTime, start_timestamp );
|
||||
tracy::MemWrite( &item->gpuTime.queryId, query_id );
|
||||
tracy::MemWrite( &item->gpuTime.context, context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial );
|
||||
tracy::MemWrite( &item->gpuZoneEnd.cpuTime, cpu_end_time );
|
||||
tracy::MemWrite( &item->gpuZoneEnd.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneEnd.queryId, query_id );
|
||||
tracy::MemWrite( &item->gpuZoneEnd.context, context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime );
|
||||
tracy::MemWrite( &item->gpuTime.gpuTime, end_timestamp );
|
||||
tracy::MemWrite( &item->gpuTime.queryId, query_id );
|
||||
tracy::MemWrite( &item->gpuTime.context, context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
void record_callback( rocprofiler_dispatch_counting_service_data_t dispatch_data,
|
||||
rocprofiler_record_counter_t* record_data, size_t record_count,
|
||||
rocprofiler_user_data_t /*user_data*/, void* callback_data )
|
||||
{
|
||||
assert( callback_data != nullptr );
|
||||
ToolData* data = static_cast<ToolData*>( callback_data );
|
||||
if( !data->init ) return;
|
||||
|
||||
std::unordered_map<rocprofiler_counter_instance_id_t, double> sums;
|
||||
for( size_t i = 0; i < record_count; ++i )
|
||||
{
|
||||
auto _counter_id = rocprofiler_counter_id_t{};
|
||||
ROCPROFILER_CALL( rocprofiler_query_record_counter_id( record_data[i].id, &_counter_id ),
|
||||
"query record counter id" );
|
||||
sums[_counter_id.handle] += record_data[i].counter_value;
|
||||
}
|
||||
|
||||
uint16_t query_id = 0;
|
||||
uint32_t thread_id = 0;
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
// An assumption is made here that the counter values are supplied after the dispatch
|
||||
// complete callback.
|
||||
assert( data->dispatch_data.count( dispatch_data.dispatch_info.dispatch_id ) );
|
||||
DispatchData& ddata = data->dispatch_data[dispatch_data.dispatch_info.dispatch_id];
|
||||
query_id = ddata.query_id;
|
||||
thread_id = ddata.thread_id;
|
||||
}
|
||||
|
||||
for( auto& p : sums )
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneAnnotation );
|
||||
tracy::MemWrite( &item->zoneAnnotation.noteId, p.first );
|
||||
tracy::MemWrite( &item->zoneAnnotation.queryId, query_id );
|
||||
tracy::MemWrite( &item->zoneAnnotation.thread, thread_id );
|
||||
tracy::MemWrite( &item->zoneAnnotation.value, p.second );
|
||||
tracy::MemWrite( &item->zoneAnnotation.context, data->context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue.
|
||||
* rocprofiler_counter_config_id_t* is a return to specify what counters to collect
|
||||
* for this dispatch (dispatch_packet).
|
||||
*/
|
||||
void dispatch_callback( rocprofiler_dispatch_counting_service_data_t dispatch_data,
|
||||
rocprofiler_profile_config_id_t* config, rocprofiler_user_data_t* /*user_data*/,
|
||||
void* callback_data )
|
||||
{
|
||||
assert( callback_data != nullptr );
|
||||
ToolData* data = static_cast<ToolData*>( callback_data );
|
||||
if( !data->init ) return;
|
||||
|
||||
/**
|
||||
* This simple example uses the same profile counter set for all agents.
|
||||
* We store this in a cache to prevent constructing many identical profile counter
|
||||
* sets. We first check the cache to see if we have already constructed a counter"
|
||||
* set for the agent. If we have, return it. Otherwise, construct a new profile counter
|
||||
* set.
|
||||
*/
|
||||
static std::shared_mutex m_mutex = {};
|
||||
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache = {};
|
||||
|
||||
auto search_cache = [&]()
|
||||
{
|
||||
if( auto pos = profile_cache.find( dispatch_data.dispatch_info.agent_id.handle ); pos != profile_cache.end() )
|
||||
{
|
||||
*config = pos->second;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
{
|
||||
auto rlock = std::shared_lock{ m_mutex };
|
||||
if( search_cache() ) return;
|
||||
}
|
||||
|
||||
auto wlock = std::unique_lock{ m_mutex };
|
||||
if( search_cache() ) return;
|
||||
|
||||
// GPU Counter IDs
|
||||
std::vector<rocprofiler_counter_id_t> gpu_counters;
|
||||
|
||||
// Iterate through the agents and get the counters available on that agent
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_iterate_agent_supported_counters(
|
||||
dispatch_data.dispatch_info.agent_id,
|
||||
[]( rocprofiler_agent_id_t, rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data )
|
||||
{
|
||||
std::vector<rocprofiler_counter_id_t>* vec =
|
||||
static_cast<std::vector<rocprofiler_counter_id_t>*>( user_data );
|
||||
for( size_t i = 0; i < num_counters; i++ )
|
||||
{
|
||||
vec->push_back( counters[i] );
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
},
|
||||
static_cast<void*>( &gpu_counters ) ),
|
||||
"Could not fetch supported counters" );
|
||||
|
||||
std::vector<rocprofiler_counter_id_t> collect_counters;
|
||||
collect_counters.reserve( data->counter_names.size() );
|
||||
// Look for the counters contained in counters_to_collect in gpu_counters
|
||||
for( auto& counter : gpu_counters )
|
||||
{
|
||||
rocprofiler_counter_info_v0_t info;
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_query_counter_info( counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast<void*>( &info ) ),
|
||||
"Could not query info" );
|
||||
if( data->counter_names.count( std::string( info.name ) ) > 0 )
|
||||
{
|
||||
collect_counters.push_back( counter );
|
||||
|
||||
size_t name_length = strlen( info.name );
|
||||
char* cloned_name = (char*)tracy::tracy_malloc( name_length );
|
||||
memcpy( cloned_name, info.name, name_length );
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuAnnotationName );
|
||||
tracy::MemWrite( &item->gpuAnnotationNameFat.context, data->context_id );
|
||||
tracy::MemWrite( &item->gpuAnnotationNameFat.noteId, counter.handle );
|
||||
tracy::MemWrite( &item->gpuAnnotationNameFat.ptr, (uint64_t)cloned_name );
|
||||
tracy::MemWrite( &item->gpuAnnotationNameFat.size, name_length );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a colleciton profile for the counters
|
||||
rocprofiler_profile_config_id_t profile = { .handle = 0 };
|
||||
ROCPROFILER_CALL( rocprofiler_create_profile_config( dispatch_data.dispatch_info.agent_id, collect_counters.data(),
|
||||
collect_counters.size(), &profile ),
|
||||
"Could not construct profile cfg" );
|
||||
|
||||
profile_cache.emplace( dispatch_data.dispatch_info.agent_id.handle, profile );
|
||||
// Return the profile to collect those counters for this dispatch
|
||||
*config = profile;
|
||||
}
|
||||
|
||||
void tool_callback_tracing_callback( rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data,
|
||||
void* callback_data )
|
||||
{
|
||||
assert( callback_data != nullptr );
|
||||
ToolData* data = static_cast<ToolData*>( callback_data );
|
||||
if( !data->init ) return;
|
||||
|
||||
if( record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
|
||||
record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER )
|
||||
{
|
||||
auto* sym_data = static_cast<kernel_symbol_data_t*>( record.payload );
|
||||
|
||||
if( record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD )
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
data->client_kernels.emplace( sym_data->kernel_id, *sym_data );
|
||||
}
|
||||
else if( record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD )
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
data->client_kernels.erase( sym_data->kernel_id );
|
||||
}
|
||||
}
|
||||
else if( record.kind == ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH )
|
||||
{
|
||||
auto* rdata = static_cast<rocprofiler_callback_tracing_kernel_dispatch_data_t*>( record.payload );
|
||||
if( record.operation == ROCPROFILER_KERNEL_DISPATCH_ENQUEUE )
|
||||
{
|
||||
if( record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER )
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
data->dispatch_data[rdata->dispatch_info.dispatch_id].launch_start = tracy::Profiler::GetTime();
|
||||
}
|
||||
else if( record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT )
|
||||
{
|
||||
auto _lk = std::unique_lock{ data->mut };
|
||||
data->dispatch_data[rdata->dispatch_info.dispatch_id].launch_end = tracy::Profiler::GetTime();
|
||||
}
|
||||
}
|
||||
else if( record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE )
|
||||
{
|
||||
uint64_t src_loc = kernel_src_loc( data, rdata->dispatch_info.kernel_id );
|
||||
record_interval( data, rdata->start_timestamp, rdata->end_timestamp, src_loc,
|
||||
rdata->dispatch_info.dispatch_id );
|
||||
}
|
||||
}
|
||||
else if( record.kind == ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY &&
|
||||
record.operation != ROCPROFILER_MEMORY_COPY_NONE && record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT )
|
||||
{
|
||||
auto* rdata = static_cast<rocprofiler_callback_tracing_memory_copy_data_t*>( record.payload );
|
||||
const char* name = nullptr;
|
||||
switch( record.operation )
|
||||
{
|
||||
case ROCPROFILER_MEMORY_COPY_DEVICE_TO_DEVICE:
|
||||
name = "DeviceToDeviceCopy";
|
||||
break;
|
||||
case ROCPROFILER_MEMORY_COPY_DEVICE_TO_HOST:
|
||||
name = "DeviceToHostCopy";
|
||||
break;
|
||||
case ROCPROFILER_MEMORY_COPY_HOST_TO_DEVICE:
|
||||
name = "HostToDeviceCopy";
|
||||
break;
|
||||
case ROCPROFILER_MEMORY_COPY_HOST_TO_HOST:
|
||||
name = "HostToHostCopy";
|
||||
break;
|
||||
}
|
||||
size_t name_len = strlen( name );
|
||||
uint64_t src_loc = tracy::Profiler::AllocSourceLocation( 0, NULL, 0, name, name_len, NULL, 0 );
|
||||
record_interval( data, rdata->start_timestamp, rdata->end_timestamp, src_loc, UINT64_MAX );
|
||||
}
|
||||
}
|
||||
|
||||
void calibration_thread( void* ptr )
|
||||
{
|
||||
while( !TracyIsStarted )
|
||||
;
|
||||
ToolData* data = static_cast<ToolData*>( ptr );
|
||||
data->context_id = gpu_context_allocate( data );
|
||||
const char* user_counters = GetEnvVar( "TRACY_ROCPROF_COUNTERS" );
|
||||
if( user_counters )
|
||||
{
|
||||
data->counter_names.clear();
|
||||
std::stringstream ss( user_counters );
|
||||
std::string counter;
|
||||
while( std::getline( ss, counter, ',' ) ) data->counter_names.insert( counter );
|
||||
}
|
||||
data->init = true;
|
||||
|
||||
#ifdef TRACY_ROCPROF_CALIBRATION
|
||||
while( data->init )
|
||||
{
|
||||
sleep( 1 );
|
||||
|
||||
timespec ts;
|
||||
// HSA performs a linear interpolation of GPU time to CLOCK_BOOTTIME. However, this is
|
||||
// subject to network time updates and can drift relative to tracy's clock.
|
||||
clock_gettime( CLOCK_BOOTTIME, &ts );
|
||||
int64_t cpu_timestamp = Profiler::GetTime();
|
||||
int64_t gpu_timestamp = ts.tv_nsec + ts.tv_sec * 1e9L;
|
||||
|
||||
if( cpu_timestamp > data->previous_cpu_time )
|
||||
{
|
||||
auto* item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration );
|
||||
tracy::MemWrite( &item->gpuCalibration.gpuTime, gpu_timestamp );
|
||||
tracy::MemWrite( &item->gpuCalibration.cpuTime, cpu_timestamp );
|
||||
tracy::MemWrite( &item->gpuCalibration.cpuDelta, cpu_timestamp - data->previous_cpu_time );
|
||||
tracy::MemWrite( &item->gpuCalibration.context, data->context_id );
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
data->previous_cpu_time = cpu_timestamp;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int tool_init( rocprofiler_client_finalize_t fini_func, void* user_data )
|
||||
{
|
||||
ToolData* data = static_cast<ToolData*>( user_data );
|
||||
data->cal_thread = std::make_unique<tracy::Thread>( calibration_thread, data );
|
||||
|
||||
ROCPROFILER_CALL( rocprofiler_create_context( &get_client_ctx() ), "context creation failed" );
|
||||
|
||||
ROCPROFILER_CALL( rocprofiler_configure_callback_dispatch_counting_service( get_client_ctx(), dispatch_callback,
|
||||
user_data, record_callback, user_data ),
|
||||
"Could not setup counting service" );
|
||||
|
||||
rocprofiler_tracing_operation_t ops[] = { ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER };
|
||||
ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service( get_client_ctx(),
|
||||
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ops, 1,
|
||||
tool_callback_tracing_callback, user_data ),
|
||||
"callback tracing service failed to configure" );
|
||||
|
||||
rocprofiler_tracing_operation_t ops2[] = { ROCPROFILER_KERNEL_DISPATCH_COMPLETE,
|
||||
ROCPROFILER_KERNEL_DISPATCH_ENQUEUE };
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_tracing_service( get_client_ctx(), ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH,
|
||||
ops2, 2, tool_callback_tracing_callback, user_data ),
|
||||
"callback tracing service failed to configure" );
|
||||
|
||||
ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service( get_client_ctx(),
|
||||
ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY, nullptr,
|
||||
0, tool_callback_tracing_callback, user_data ),
|
||||
"callback tracing service failed to configure" );
|
||||
|
||||
ROCPROFILER_CALL( rocprofiler_start_context( get_client_ctx() ), "start context" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
void tool_fini( void* tool_data_v )
|
||||
{
|
||||
rocprofiler_stop_context( get_client_ctx() );
|
||||
|
||||
ToolData* data = static_cast<ToolData*>( tool_data_v );
|
||||
data->init = false;
|
||||
data->cal_thread.reset();
|
||||
}
|
||||
}
|
||||
|
||||
extern "C"
|
||||
{
|
||||
rocprofiler_tool_configure_result_t* rocprofiler_configure( uint32_t version, const char* runtime_version,
|
||||
uint32_t priority, rocprofiler_client_id_t* client_id )
|
||||
{
|
||||
// If not the first tool to register, indicate that the tool doesn't want to do anything
|
||||
if( priority > 0 ) return nullptr;
|
||||
|
||||
// (optional) Provide a name for this tool to rocprofiler
|
||||
client_id->name = "Tracy";
|
||||
|
||||
// (optional) create configure data
|
||||
static ToolData data = ToolData{ version, runtime_version, priority, *client_id, 0, false, 0, 0 };
|
||||
|
||||
// construct configure result
|
||||
static auto cfg = rocprofiler_tool_configure_result_t{ sizeof( rocprofiler_tool_configure_result_t ),
|
||||
&tool_init, &tool_fini, static_cast<void*>( &data ) };
|
||||
|
||||
return &cfg;
|
||||
}
|
||||
}
|
||||
@@ -10,7 +10,14 @@
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "TracyProfiler.hpp"
|
||||
#include "TracyCallstack.hpp"
|
||||
|
||||
#if (defined(__GNUC__) || defined(__clang__))
|
||||
# define TRACY_ATTRIBUTE_FORMAT_PRINTF(fmt_idx, arg_idx) \
|
||||
__attribute__((format(printf, fmt_idx, arg_idx)))
|
||||
#else
|
||||
# define TRACY_ATTRIBUTE_FORMAT_PRINTF(fmt_idx, arg_idx)
|
||||
#endif
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
@@ -22,7 +29,7 @@ public:
|
||||
ScopedZone& operator=( const ScopedZone& ) = delete;
|
||||
ScopedZone& operator=( ScopedZone&& ) = delete;
|
||||
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true )
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int32_t depth = -1, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
@@ -33,13 +40,19 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::ZoneBegin );
|
||||
auto zoneQueue = QueueType::ZoneBegin;
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
GetProfiler().SendCallstack( depth );
|
||||
zoneQueue = QueueType::ZoneBeginCallstack;
|
||||
}
|
||||
TracyQueuePrepare( zoneQueue );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true )
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int32_t depth = -1, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
@@ -50,55 +63,21 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
GetProfiler().SendCallstack( depth );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginCallstack );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
|
||||
auto zoneQueue = QueueType::ZoneBeginAllocSrcLoc;
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
GetProfiler().SendCallstack( depth );
|
||||
zoneQueue = QueueType::ZoneBeginAllocSrcLocCallstack;
|
||||
}
|
||||
TracyQueuePrepare( zoneQueue );
|
||||
const auto srcloc =
|
||||
Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, static_cast<uint32_t>(0), is_active ) {}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int depth, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
GetProfiler().SendCallstack( depth );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {}
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {}
|
||||
|
||||
tracy_force_inline ~ScopedZone()
|
||||
{
|
||||
@@ -126,7 +105,7 @@ public:
|
||||
TracyQueueCommit( zoneTextFatThread );
|
||||
}
|
||||
|
||||
void TextFmt( const char* fmt, ... )
|
||||
void TextFmt( const char* fmt, ... ) TRACY_ATTRIBUTE_FORMAT_PRINTF(2, 3)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
@@ -165,7 +144,7 @@ public:
|
||||
TracyQueueCommit( zoneTextFatThread );
|
||||
}
|
||||
|
||||
void NameFmt( const char* fmt, ... )
|
||||
void NameFmt( const char* fmt, ... ) TRACY_ATTRIBUTE_FORMAT_PRINTF(2, 3)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
|
||||
@@ -85,7 +85,7 @@ void SysPower::ScanDirectory( const char* path, int parent )
|
||||
FILE* f = fopen( tmp, "r" );
|
||||
if( f )
|
||||
{
|
||||
fscanf( f, "%" PRIu64, &maxRange );
|
||||
(void)fscanf( f, "%" PRIu64, &maxRange );
|
||||
fclose( f );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
# if defined _WIN32
|
||||
# include <windows.h>
|
||||
# include "../common/TracyWinFamily.hpp"
|
||||
# elif defined __linux__
|
||||
# include <stdio.h>
|
||||
# include <inttypes.h>
|
||||
@@ -27,13 +28,24 @@ static inline uint64_t ConvertTime( const FILETIME& t )
|
||||
|
||||
void SysTime::ReadTimes()
|
||||
{
|
||||
FILETIME idleTime;
|
||||
FILETIME kernelTime;
|
||||
FILETIME userTime;
|
||||
|
||||
# if defined TRACY_GDK
|
||||
FILETIME creationTime;
|
||||
FILETIME exitTime;
|
||||
|
||||
GetProcessTimes( GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime );
|
||||
|
||||
idle = 0;
|
||||
# else
|
||||
FILETIME idleTime;
|
||||
|
||||
GetSystemTimes( &idleTime, &kernelTime, &userTime );
|
||||
|
||||
idle = ConvertTime( idleTime );
|
||||
# endif
|
||||
|
||||
const auto kernel = ConvertTime( kernelTime );
|
||||
const auto user = ConvertTime( userTime );
|
||||
used = kernel + user;
|
||||
|
||||
@@ -173,8 +173,11 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
|
||||
MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId );
|
||||
MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId );
|
||||
MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber );
|
||||
MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason );
|
||||
MemWrite( &item->contextSwitch.state, cswitch->oldThreadState );
|
||||
MemWrite( &item->contextSwitch.oldThreadWaitReason, cswitch->oldThreadWaitReason );
|
||||
MemWrite( &item->contextSwitch.oldThreadState, cswitch->oldThreadState );
|
||||
MemWrite( &item->contextSwitch.newThreadPriority, cswitch->newThreadPriority );
|
||||
MemWrite( &item->contextSwitch.oldThreadPriority, cswitch->oldThreadPriority );
|
||||
MemWrite( &item->contextSwitch.previousCState, cswitch->previousCState );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
else if( hdr.EventDescriptor.Opcode == 50 )
|
||||
@@ -183,7 +186,10 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
|
||||
|
||||
TracyLfqPrepare( QueueType::ThreadWakeup );
|
||||
MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart );
|
||||
MemWrite( &item->threadWakeup.cpu, record->BufferContext.ProcessorNumber );
|
||||
MemWrite( &item->threadWakeup.thread, rt->threadId );
|
||||
MemWrite( &item->threadWakeup.adjustReason, rt->adjustReason );
|
||||
MemWrite( &item->threadWakeup.adjustIncrement, rt->adjustIncrement );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 )
|
||||
@@ -232,6 +238,10 @@ void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record )
|
||||
#endif
|
||||
|
||||
const auto& hdr = record->EventHeader;
|
||||
|
||||
// Check for Lost_Event (6a399ae0-4bc6-4de9-870b-3657f8947e7e)
|
||||
if( hdr.ProviderId.Data1 == 0x6A399AE0 ) return;
|
||||
|
||||
assert( hdr.ProviderId.Data1 == 0x802EC45A );
|
||||
assert( hdr.EventDescriptor.Id == 0x0011 );
|
||||
|
||||
@@ -498,11 +508,11 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
|
||||
if( _GetThreadDescription )
|
||||
{
|
||||
PWSTR tmp;
|
||||
_GetThreadDescription( hnd, &tmp );
|
||||
char buf[256];
|
||||
if( tmp )
|
||||
if ( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) )
|
||||
{
|
||||
char buf[256];
|
||||
auto ret = wcstombs( buf, tmp, 256 );
|
||||
LocalFree(tmp);
|
||||
if( ret != 0 )
|
||||
{
|
||||
threadName = CopyString( buf, ret );
|
||||
@@ -521,25 +531,23 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
|
||||
const auto phnd = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid );
|
||||
if( phnd != INVALID_HANDLE_VALUE )
|
||||
{
|
||||
HMODULE modules[1024];
|
||||
DWORD needed;
|
||||
if( _EnumProcessModules( phnd, modules, 1024 * sizeof( HMODULE ), &needed ) != 0 )
|
||||
MEMORY_BASIC_INFORMATION vmeminfo;
|
||||
SIZE_T infosize = VirtualQueryEx( phnd, ptr, &vmeminfo, sizeof( vmeminfo ) );
|
||||
if( infosize == sizeof( vmeminfo ) )
|
||||
{
|
||||
const auto sz = std::min( DWORD( needed / sizeof( HMODULE ) ), DWORD( 1024 ) );
|
||||
for( DWORD i=0; i<sz; i++ )
|
||||
if (vmeminfo.Type == MEM_IMAGE)
|
||||
{
|
||||
// for MEM_IMAGE regions, vmeminfo.AllocationBase _is_ the HMODULE
|
||||
HMODULE mod = (HMODULE)vmeminfo.AllocationBase;
|
||||
MODULEINFO info;
|
||||
if( _GetModuleInformation( phnd, modules[i], &info, sizeof( info ) ) != 0 )
|
||||
if( _GetModuleInformation( phnd, mod, &info, sizeof( info ) ) != 0 )
|
||||
{
|
||||
if( (uint64_t)ptr >= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage )
|
||||
char buf2[1024];
|
||||
const auto modlen = _GetModuleBaseNameA( phnd, mod, buf2, 1024 );
|
||||
if( modlen != 0 )
|
||||
{
|
||||
char buf2[1024];
|
||||
const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 );
|
||||
if( modlen != 0 )
|
||||
{
|
||||
threadName = CopyString( buf2, modlen );
|
||||
threadSent = true;
|
||||
}
|
||||
threadName = CopyString( buf2, modlen );
|
||||
threadSent = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -606,6 +614,7 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
|
||||
# include <fcntl.h>
|
||||
# include <inttypes.h>
|
||||
# include <limits>
|
||||
# include <mntent.h>
|
||||
# include <poll.h>
|
||||
# include <stdio.h>
|
||||
# include <stdlib.h>
|
||||
@@ -678,7 +687,7 @@ enum TraceEventId
|
||||
EventBranchMiss,
|
||||
EventVsync,
|
||||
EventContextSwitch,
|
||||
EventWakeup,
|
||||
EventWaking,
|
||||
};
|
||||
|
||||
static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid )
|
||||
@@ -753,6 +762,42 @@ static const char* ReadFile( const char* path )
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static const char* ReadFile( const char* base, const char* path )
|
||||
{
|
||||
const auto blen = strlen( base );
|
||||
const auto plen = strlen( path );
|
||||
|
||||
auto tmp = (char*)tracy_malloc( blen + plen + 1 );
|
||||
memcpy( tmp, base, blen );
|
||||
memcpy( tmp + blen, path, plen );
|
||||
tmp[blen+plen] = '\0';
|
||||
|
||||
auto res = ReadFile( tmp );
|
||||
tracy_free( tmp );
|
||||
return res;
|
||||
}
|
||||
|
||||
static char* GetTraceFsPath()
|
||||
{
|
||||
auto f = setmntent( "/proc/mounts", "r" );
|
||||
if( !f ) return nullptr;
|
||||
|
||||
char* ret = nullptr;
|
||||
while( auto ent = getmntent( f ) )
|
||||
{
|
||||
if( strcmp( ent->mnt_fsname, "tracefs" ) == 0 )
|
||||
{
|
||||
auto len = strlen( ent->mnt_dir );
|
||||
ret = (char*)tracy_malloc( len + 1 );
|
||||
memcpy( ret, ent->mnt_dir, len );
|
||||
ret[len] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
endmntent( f );
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool SysTraceStart( int64_t& samplingPeriod )
|
||||
{
|
||||
#ifndef CLOCK_MONOTONIC_RAW
|
||||
@@ -767,16 +812,22 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
TracyDebug( "perf_event_paranoid: %i\n", paranoidLevel );
|
||||
#endif
|
||||
|
||||
int switchId = -1, wakeupId = -1, vsyncId = -1;
|
||||
const auto switchIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_switch/id" );
|
||||
auto traceFsPath = GetTraceFsPath();
|
||||
if( !traceFsPath ) return false;
|
||||
TracyDebug( "tracefs path: %s\n", traceFsPath );
|
||||
|
||||
int switchId = -1, wakingId = -1, vsyncId = -1;
|
||||
const auto switchIdStr = ReadFile( traceFsPath, "/events/sched/sched_switch/id" );
|
||||
if( switchIdStr ) switchId = atoi( switchIdStr );
|
||||
const auto wakeupIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_wakeup/id" );
|
||||
if( wakeupIdStr ) wakeupId = atoi( wakeupIdStr );
|
||||
const auto vsyncIdStr = ReadFile( "/sys/kernel/debug/tracing/events/drm/drm_vblank_event/id" );
|
||||
const auto wakingIdStr = ReadFile( traceFsPath, "/events/sched/sched_waking/id" );
|
||||
if( wakingIdStr ) wakingId = atoi( wakingIdStr );
|
||||
const auto vsyncIdStr = ReadFile( traceFsPath, "/events/drm/drm_vblank_event/id" );
|
||||
if( vsyncIdStr ) vsyncId = atoi( vsyncIdStr );
|
||||
|
||||
tracy_free( traceFsPath );
|
||||
|
||||
TracyDebug( "sched_switch id: %i\n", switchId );
|
||||
TracyDebug( "sched_wakeup id: %i\n", wakeupId );
|
||||
TracyDebug( "sched_waking id: %i\n", wakingId );
|
||||
TracyDebug( "drm_vblank_event id: %i\n", vsyncId );
|
||||
|
||||
#ifdef TRACY_NO_SAMPLING
|
||||
@@ -831,7 +882,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
2 + // CPU cycles + instructions retired
|
||||
2 + // cache reference + miss
|
||||
2 + // branch retired + miss
|
||||
2 + // context switches + wakeups
|
||||
2 + // context switches + waking ups
|
||||
1 // vsync
|
||||
);
|
||||
s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers );
|
||||
@@ -1076,18 +1127,31 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
||||
}
|
||||
}
|
||||
|
||||
if( wakeupId != -1 )
|
||||
if( wakingId != -1 )
|
||||
{
|
||||
pe.config = wakeupId;
|
||||
pe.config &= ~PERF_SAMPLE_CALLCHAIN;
|
||||
pe = {};
|
||||
pe.type = PERF_TYPE_TRACEPOINT;
|
||||
pe.size = sizeof( perf_event_attr );
|
||||
pe.sample_period = 1;
|
||||
pe.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_RAW;
|
||||
// Coult ask for callstack here
|
||||
//pe.sample_type |= PERF_SAMPLE_CALLCHAIN;
|
||||
pe.disabled = 1;
|
||||
pe.inherit = 1;
|
||||
pe.config = wakingId;
|
||||
pe.read_format = 0;
|
||||
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
|
||||
pe.use_clockid = 1;
|
||||
pe.clockid = CLOCK_MONOTONIC_RAW;
|
||||
#endif
|
||||
|
||||
TracyDebug( "Setup wakeup capture\n" );
|
||||
TracyDebug( "Setup waking up capture\n" );
|
||||
for( int i=0; i<s_numCpus; i++ )
|
||||
{
|
||||
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd != -1 )
|
||||
{
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWakeup, i );
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWaking, i );
|
||||
if( s_ring[s_numBuffers].IsValid() )
|
||||
{
|
||||
s_numBuffers++;
|
||||
@@ -1332,6 +1396,7 @@ void SysTraceWorker( void* ptr )
|
||||
hadData = true;
|
||||
while( activeNum > 0 )
|
||||
{
|
||||
// Find the earliest event from the active buffers
|
||||
int sel = -1;
|
||||
int selPos;
|
||||
int64_t t0 = std::numeric_limits<int64_t>::max();
|
||||
@@ -1369,6 +1434,7 @@ void SysTraceWorker( void* ptr )
|
||||
}
|
||||
}
|
||||
}
|
||||
// Found any event
|
||||
if( sel >= 0 )
|
||||
{
|
||||
auto& ring = ringArray[ctxBufferIdx + sel];
|
||||
@@ -1384,10 +1450,10 @@ void SysTraceWorker( void* ptr )
|
||||
const auto rid = ring.GetId();
|
||||
if( rid == EventContextSwitch )
|
||||
{
|
||||
// Layout:
|
||||
// u64 time
|
||||
// u64 cnt
|
||||
// u64 ip[cnt]
|
||||
// Layout: See /sys/kernel/debug/tracing/events/sched/sched_switch/format
|
||||
// u64 time // PERF_SAMPLE_TIME
|
||||
// u64 cnt // PERF_SAMPLE_CALLCHAIN
|
||||
// u64 ip[cnt] // PERF_SAMPLE_CALLCHAIN
|
||||
// u32 size
|
||||
// u8 data[size]
|
||||
// Data (not ABI stable, but has not changed since it was added, in 2009):
|
||||
@@ -1408,35 +1474,43 @@ void SysTraceWorker( void* ptr )
|
||||
const auto traceOffset = offset;
|
||||
offset += sizeof( uint64_t ) * cnt + sizeof( uint32_t ) + 8 + 16;
|
||||
|
||||
uint32_t prev_pid, next_pid;
|
||||
uint32_t prev_pid, prev_prio;
|
||||
uint32_t next_pid, next_prio;
|
||||
long prev_state;
|
||||
|
||||
ring.Read( &prev_pid, offset, sizeof( uint32_t ) );
|
||||
offset += sizeof( uint32_t ) + sizeof( uint32_t );
|
||||
offset += sizeof( uint32_t );
|
||||
ring.Read( &prev_prio, offset, sizeof( uint32_t ) );
|
||||
offset += sizeof( uint32_t );
|
||||
ring.Read( &prev_state, offset, sizeof( long ) );
|
||||
offset += sizeof( long ) + 16;
|
||||
ring.Read( &next_pid, offset, sizeof( uint32_t ) );
|
||||
offset += sizeof( uint32_t );
|
||||
ring.Read( &next_prio, offset, sizeof( uint32_t ) );
|
||||
|
||||
uint8_t reason = 100;
|
||||
uint8_t state;
|
||||
uint8_t oldThreadWaitReason = 100;
|
||||
uint8_t oldThreadState;
|
||||
|
||||
if( prev_state & 0x0001 ) state = 104;
|
||||
else if( prev_state & 0x0002 ) state = 101;
|
||||
else if( prev_state & 0x0004 ) state = 105;
|
||||
else if( prev_state & 0x0008 ) state = 106;
|
||||
else if( prev_state & 0x0010 ) state = 108;
|
||||
else if( prev_state & 0x0020 ) state = 109;
|
||||
else if( prev_state & 0x0040 ) state = 110;
|
||||
else if( prev_state & 0x0080 ) state = 102;
|
||||
else state = 103;
|
||||
if( prev_state & 0x0001 ) oldThreadState = 104;
|
||||
else if( prev_state & 0x0002 ) oldThreadState = 101;
|
||||
else if( prev_state & 0x0004 ) oldThreadState = 105;
|
||||
else if( prev_state & 0x0008 ) oldThreadState = 106;
|
||||
else if( prev_state & 0x0010 ) oldThreadState = 108;
|
||||
else if( prev_state & 0x0020 ) oldThreadState = 109;
|
||||
else if( prev_state & 0x0040 ) oldThreadState = 110;
|
||||
else if( prev_state & 0x0080 ) oldThreadState = 102;
|
||||
else oldThreadState = 103;
|
||||
|
||||
TracyLfqPrepare( QueueType::ContextSwitch );
|
||||
MemWrite( &item->contextSwitch.time, t0 );
|
||||
MemWrite( &item->contextSwitch.oldThread, prev_pid );
|
||||
MemWrite( &item->contextSwitch.newThread, next_pid );
|
||||
MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) );
|
||||
MemWrite( &item->contextSwitch.reason, reason );
|
||||
MemWrite( &item->contextSwitch.state, state );
|
||||
MemWrite( &item->contextSwitch.oldThreadWaitReason, oldThreadWaitReason );
|
||||
MemWrite( &item->contextSwitch.oldThreadState, oldThreadState );
|
||||
MemWrite( &item->contextSwitch.previousCState, uint8_t( 0 ) );
|
||||
MemWrite( &item->contextSwitch.newThreadPriority, int8_t( next_prio ) );
|
||||
MemWrite( &item->contextSwitch.oldThreadPriority, int8_t( prev_prio ) );
|
||||
TracyLfqCommit;
|
||||
|
||||
if( cnt > 0 && prev_pid != 0 && CurrentProcOwnsThread( prev_pid ) )
|
||||
@@ -1450,27 +1524,33 @@ void SysTraceWorker( void* ptr )
|
||||
TracyLfqCommit;
|
||||
}
|
||||
}
|
||||
else if( rid == EventWakeup )
|
||||
else if( rid == EventWaking)
|
||||
{
|
||||
// See /sys/kernel/debug/tracing/events/sched/sched_waking/format
|
||||
// Layout:
|
||||
// u64 time
|
||||
// u64 time // PERF_SAMPLE_TIME
|
||||
// u32 size
|
||||
// u8 data[size]
|
||||
// Data:
|
||||
// u8 hdr[8]
|
||||
// u8 comm[16]
|
||||
// u32 pid
|
||||
// u32 prio
|
||||
// u64 target_cpu
|
||||
|
||||
offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16;
|
||||
|
||||
// i32 prio
|
||||
// i32 target_cpu
|
||||
const uint32_t dataOffset = sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t );
|
||||
offset += dataOffset + 8 + 16;
|
||||
uint32_t pid;
|
||||
ring.Read( &pid, offset, sizeof( uint32_t ) );
|
||||
|
||||
|
||||
TracyLfqPrepare( QueueType::ThreadWakeup );
|
||||
MemWrite( &item->threadWakeup.time, t0 );
|
||||
MemWrite( &item->threadWakeup.thread, pid );
|
||||
MemWrite( &item->threadWakeup.cpu, (uint8_t)ring.GetCpu() );
|
||||
|
||||
int8_t adjustReason = -1; // Does not exist on Linux
|
||||
int8_t adjustIncrement = 0; // Should perhaps store the new prio?
|
||||
MemWrite( &item->threadWakeup.adjustReason, adjustReason );
|
||||
MemWrite( &item->threadWakeup.adjustIncrement, adjustIncrement );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
else
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
#define __TRACYSYSTRACE_HPP__
|
||||
|
||||
#if !defined TRACY_NO_SYSTEM_TRACING && ( defined _WIN32 || defined __linux__ )
|
||||
# include "../common/TracyUwp.hpp"
|
||||
# ifndef TRACY_UWP
|
||||
# include "../common/TracyWinFamily.hpp"
|
||||
# if !defined TRACY_WIN32_NO_DESKTOP
|
||||
# define TRACY_HAS_SYSTEM_TRACING
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@@ -690,7 +690,9 @@ static pthread_key_t _memory_thread_heap;
|
||||
# define _Thread_local __declspec(thread)
|
||||
# define TLS_MODEL
|
||||
# else
|
||||
# ifndef __HAIKU__
|
||||
# if defined(__ANDROID__) && __ANDROID_API__ >= 29 && defined(__NDK_MAJOR__) && __NDK_MAJOR__ >= 26
|
||||
# define TLS_MODEL __attribute__((tls_model("local-dynamic")))
|
||||
# elif !defined(__HAIKU__)
|
||||
# define TLS_MODEL __attribute__((tls_model("initial-exec")))
|
||||
# else
|
||||
# define TLS_MODEL
|
||||
@@ -2778,7 +2780,7 @@ rpmalloc_initialize_config(const rpmalloc_config_t* config) {
|
||||
_memory_huge_pages = 1;
|
||||
}
|
||||
|
||||
#if PLATFORM_WINDOWS
|
||||
#if PLATFORM_WINDOWS && !defined TRACY_GDK
|
||||
if (_memory_config.enable_huge_pages) {
|
||||
HANDLE token = 0;
|
||||
size_t large_page_minimum = GetLargePageMinimum();
|
||||
|
||||
@@ -9,7 +9,7 @@ namespace tracy
|
||||
|
||||
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
|
||||
|
||||
enum : uint32_t { ProtocolVersion = 69 };
|
||||
enum : uint32_t { ProtocolVersion = 76 };
|
||||
enum : uint16_t { BroadcastVersion = 3 };
|
||||
|
||||
using lz4sz_t = uint32_t;
|
||||
@@ -83,7 +83,7 @@ struct WelcomeFlag
|
||||
enum _t : uint8_t
|
||||
{
|
||||
OnDemand = 1 << 0,
|
||||
IsApple = 1 << 1,
|
||||
IgnoreMemFaults = 1 << 1,
|
||||
CodeTransfer = 1 << 2,
|
||||
CombineSamples = 1 << 3,
|
||||
IdentifySamples = 1 << 4,
|
||||
@@ -95,7 +95,6 @@ struct WelcomeMessage
|
||||
double timerMul;
|
||||
int64_t initBegin;
|
||||
int64_t initEnd;
|
||||
uint64_t delay;
|
||||
uint64_t resolution;
|
||||
uint64_t epoch;
|
||||
uint64_t exectime;
|
||||
|
||||
@@ -42,6 +42,8 @@ enum class QueueType : uint8_t
|
||||
MemAllocCallstackNamed,
|
||||
MemFreeCallstack,
|
||||
MemFreeCallstackNamed,
|
||||
MemDiscard,
|
||||
MemDiscardCallstack,
|
||||
GpuZoneBegin,
|
||||
GpuZoneBeginCallstack,
|
||||
GpuZoneBeginAllocSrcLoc,
|
||||
@@ -59,6 +61,7 @@ enum class QueueType : uint8_t
|
||||
ThreadWakeup,
|
||||
GpuTime,
|
||||
GpuContextName,
|
||||
GpuAnnotationName,
|
||||
CallstackFrameSize,
|
||||
SymbolInformation,
|
||||
ExternalNameMetadata,
|
||||
@@ -109,6 +112,7 @@ enum class QueueType : uint8_t
|
||||
SecondStringData,
|
||||
MemNamePayload,
|
||||
ThreadGroupHint,
|
||||
GpuZoneAnnotation,
|
||||
StringData,
|
||||
ThreadName,
|
||||
PlotName,
|
||||
@@ -329,7 +333,7 @@ struct QueuePlotDataInt : public QueuePlotDataBase
|
||||
int64_t val;
|
||||
};
|
||||
|
||||
struct QueuePlotDataFloat : public QueuePlotDataBase
|
||||
struct QueuePlotDataFloat : public QueuePlotDataBase
|
||||
{
|
||||
float val;
|
||||
};
|
||||
@@ -401,7 +405,11 @@ enum class GpuContextType : uint8_t
|
||||
Vulkan,
|
||||
OpenCL,
|
||||
Direct3D12,
|
||||
Direct3D11
|
||||
Direct3D11,
|
||||
Metal,
|
||||
Custom,
|
||||
CUDA,
|
||||
Rocprof
|
||||
};
|
||||
|
||||
enum GpuContextFlags : uint8_t
|
||||
@@ -441,6 +449,15 @@ struct QueueGpuZoneEnd
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneAnnotation
|
||||
{
|
||||
int64_t noteId;
|
||||
double value;
|
||||
uint32_t thread;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuTime
|
||||
{
|
||||
int64_t gpuTime;
|
||||
@@ -462,7 +479,7 @@ struct QueueGpuTimeSync
|
||||
int64_t cpuTime;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
|
||||
struct QueueGpuContextName
|
||||
{
|
||||
uint8_t context;
|
||||
@@ -474,6 +491,18 @@ struct QueueGpuContextNameFat : public QueueGpuContextName
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
struct QueueGpuAnnotationName
|
||||
{
|
||||
int64_t noteId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuAnnotationNameFat : public QueueGpuAnnotationName
|
||||
{
|
||||
uint64_t ptr;
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
struct QueueMemNamePayload
|
||||
{
|
||||
uint64_t name;
|
||||
@@ -500,6 +529,13 @@ struct QueueMemFree
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueMemDiscard
|
||||
{
|
||||
int64_t time;
|
||||
uint32_t thread;
|
||||
uint64_t name;
|
||||
};
|
||||
|
||||
struct QueueCallstackFat
|
||||
{
|
||||
uint64_t ptr;
|
||||
@@ -593,14 +629,20 @@ struct QueueContextSwitch
|
||||
uint32_t oldThread;
|
||||
uint32_t newThread;
|
||||
uint8_t cpu;
|
||||
uint8_t reason;
|
||||
uint8_t state;
|
||||
uint8_t oldThreadWaitReason;
|
||||
uint8_t oldThreadState;
|
||||
uint8_t previousCState;
|
||||
int8_t newThreadPriority;
|
||||
int8_t oldThreadPriority;
|
||||
};
|
||||
|
||||
struct QueueThreadWakeup
|
||||
{
|
||||
int64_t time;
|
||||
uint32_t thread;
|
||||
uint8_t cpu;
|
||||
int8_t adjustReason;
|
||||
int8_t adjustIncrement;
|
||||
};
|
||||
|
||||
struct QueueTidToPid
|
||||
@@ -738,8 +780,11 @@ struct QueueItem
|
||||
QueueGpuTimeSync gpuTimeSync;
|
||||
QueueGpuContextName gpuContextName;
|
||||
QueueGpuContextNameFat gpuContextNameFat;
|
||||
QueueGpuAnnotationName gpuAnnotationName;
|
||||
QueueGpuAnnotationNameFat gpuAnnotationNameFat;
|
||||
QueueMemAlloc memAlloc;
|
||||
QueueMemFree memFree;
|
||||
QueueMemDiscard memDiscard;
|
||||
QueueMemNamePayload memName;
|
||||
QueueThreadGroupHint threadGroupHint;
|
||||
QueueCallstackFat callstackFat;
|
||||
@@ -770,6 +815,7 @@ struct QueueItem
|
||||
QueueSourceCodeNotAvailable sourceCodeNotAvailable;
|
||||
QueueFiberEnter fiberEnter;
|
||||
QueueFiberLeave fiberLeave;
|
||||
QueueGpuZoneAnnotation zoneAnnotation;
|
||||
};
|
||||
};
|
||||
#pragma pack( pop )
|
||||
@@ -811,6 +857,8 @@ static constexpr size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemDiscard ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemDiscard ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location
|
||||
@@ -828,6 +876,7 @@ static constexpr size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ) + sizeof( QueueThreadWakeup ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuTime ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuContextName ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuAnnotationName ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ),
|
||||
sizeof( QueueHeader ), // ExternalNameMetadata - not for wire transfer
|
||||
@@ -879,6 +928,7 @@ static constexpr size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ), // second string data
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueThreadGroupHint ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneAnnotation ), // GPU zone annotation
|
||||
// keep all QueueStringTransfer below
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
# endif
|
||||
# include <windows.h>
|
||||
# include <malloc.h>
|
||||
# include "TracyUwp.hpp"
|
||||
# include "TracyWinFamily.hpp"
|
||||
#else
|
||||
# include <pthread.h>
|
||||
# include <string.h>
|
||||
@@ -26,7 +26,9 @@
|
||||
# include <fcntl.h>
|
||||
#elif defined __FreeBSD__
|
||||
# include <sys/thr.h>
|
||||
#elif defined __NetBSD__ || defined __DragonFly__
|
||||
#elif defined __NetBSD__
|
||||
# include <lwp.h>
|
||||
#elif defined __DragonFly__
|
||||
# include <sys/lwp.h>
|
||||
#elif defined __QNX__
|
||||
# include <process.h>
|
||||
@@ -135,7 +137,7 @@ TRACY_API void SetThreadName( const char* name )
|
||||
TRACY_API void SetThreadNameWithHint( const char* name, int32_t groupHint )
|
||||
{
|
||||
#if defined _WIN32
|
||||
# ifdef TRACY_UWP
|
||||
# if defined TRACY_WIN32_NO_DESKTOP
|
||||
static auto _SetThreadDescription = &::SetThreadDescription;
|
||||
# else
|
||||
static auto _SetThreadDescription = (t_SetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "SetThreadDescription" );
|
||||
@@ -244,7 +246,7 @@ TRACY_API const char* GetThreadName( uint32_t id )
|
||||
#endif
|
||||
|
||||
#if defined _WIN32
|
||||
# ifdef TRACY_UWP
|
||||
# if defined TRACY_WIN32_NO_DESKTOP
|
||||
static auto _GetThreadDescription = &::GetThreadDescription;
|
||||
# else
|
||||
static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
#ifndef __TRACYUWP_HPP__
|
||||
#define __TRACYUWP_HPP__
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <winapifamily.h>
|
||||
# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
|
||||
# define TRACY_UWP
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -6,7 +6,7 @@ namespace tracy
|
||||
namespace Version
|
||||
{
|
||||
enum { Major = 0 };
|
||||
enum { Minor = 11 };
|
||||
enum { Minor = 13 };
|
||||
enum { Patch = 1 };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
#ifndef __TRACYWINFAMILY_HPP__
|
||||
#define __TRACYWINFAMILY_HPP__
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <winapifamily.h>
|
||||
# if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
|
||||
# define TRACY_WIN32_NO_DESKTOP
|
||||
# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_GAMES)
|
||||
# define TRACY_GDK
|
||||
# elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
|
||||
# define TRACY_UWP
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -725,8 +725,8 @@ struct dwarf_data
|
||||
struct dwarf_data *next;
|
||||
/* The data for .gnu_debugaltlink. */
|
||||
struct dwarf_data *altlink;
|
||||
/* The base address for this file. */
|
||||
uintptr_t base_address;
|
||||
/* The base address mapping for this file. */
|
||||
struct libbacktrace_base_address base_address;
|
||||
/* A sorted list of address ranges. */
|
||||
struct unit_addrs *addrs;
|
||||
/* Number of address ranges in list. */
|
||||
@@ -1947,8 +1947,9 @@ update_pcrange (const struct attr* attr, const struct attr_val* val,
|
||||
static int
|
||||
add_low_high_range (struct backtrace_state *state,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
uintptr_t base_address, int is_bigendian,
|
||||
struct unit *u, const struct pcrange *pcrange,
|
||||
struct libbacktrace_base_address base_address,
|
||||
int is_bigendian, struct unit *u,
|
||||
const struct pcrange *pcrange,
|
||||
int (*add_range) (struct backtrace_state *state,
|
||||
void *rdata, uintptr_t lowpc,
|
||||
uintptr_t highpc,
|
||||
@@ -1983,8 +1984,8 @@ add_low_high_range (struct backtrace_state *state,
|
||||
|
||||
/* Add in the base address of the module when recording PC values,
|
||||
so that we can look up the PC directly. */
|
||||
lowpc += base_address;
|
||||
highpc += base_address;
|
||||
lowpc = libbacktrace_add_base (lowpc, base_address);
|
||||
highpc = libbacktrace_add_base (highpc, base_address);
|
||||
|
||||
return add_range (state, rdata, lowpc, highpc, error_callback, data, vec);
|
||||
}
|
||||
@@ -1996,7 +1997,7 @@ static int
|
||||
add_ranges_from_ranges (
|
||||
struct backtrace_state *state,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
uintptr_t base_address, int is_bigendian,
|
||||
struct libbacktrace_base_address base_address, int is_bigendian,
|
||||
struct unit *u, uintptr_t base,
|
||||
const struct pcrange *pcrange,
|
||||
int (*add_range) (struct backtrace_state *state, void *rdata,
|
||||
@@ -2042,10 +2043,11 @@ add_ranges_from_ranges (
|
||||
base = (uintptr_t) high;
|
||||
else
|
||||
{
|
||||
if (!add_range (state, rdata,
|
||||
(uintptr_t) low + base + base_address,
|
||||
(uintptr_t) high + base + base_address,
|
||||
error_callback, data, vec))
|
||||
uintptr_t rl, rh;
|
||||
|
||||
rl = libbacktrace_add_base ((uintptr_t) low + base, base_address);
|
||||
rh = libbacktrace_add_base ((uintptr_t) high + base, base_address);
|
||||
if (!add_range (state, rdata, rl, rh, error_callback, data, vec))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -2063,7 +2065,7 @@ static int
|
||||
add_ranges_from_rnglists (
|
||||
struct backtrace_state *state,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
uintptr_t base_address, int is_bigendian,
|
||||
struct libbacktrace_base_address base_address, int is_bigendian,
|
||||
struct unit *u, uintptr_t base,
|
||||
const struct pcrange *pcrange,
|
||||
int (*add_range) (struct backtrace_state *state, void *rdata,
|
||||
@@ -2146,9 +2148,10 @@ add_ranges_from_rnglists (
|
||||
u->addrsize, is_bigendian, index,
|
||||
error_callback, data, &high))
|
||||
return 0;
|
||||
if (!add_range (state, rdata, low + base_address,
|
||||
high + base_address, error_callback, data,
|
||||
vec))
|
||||
if (!add_range (state, rdata,
|
||||
libbacktrace_add_base (low, base_address),
|
||||
libbacktrace_add_base (high, base_address),
|
||||
error_callback, data, vec))
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
@@ -2165,7 +2168,7 @@ add_ranges_from_rnglists (
|
||||
error_callback, data, &low))
|
||||
return 0;
|
||||
length = read_uleb128 (&rnglists_buf);
|
||||
low += base_address;
|
||||
low = libbacktrace_add_base (low, base_address);
|
||||
if (!add_range (state, rdata, low, low + length,
|
||||
error_callback, data, vec))
|
||||
return 0;
|
||||
@@ -2179,8 +2182,9 @@ add_ranges_from_rnglists (
|
||||
|
||||
low = read_uleb128 (&rnglists_buf);
|
||||
high = read_uleb128 (&rnglists_buf);
|
||||
if (!add_range (state, rdata, low + base + base_address,
|
||||
high + base + base_address,
|
||||
if (!add_range (state, rdata,
|
||||
libbacktrace_add_base (low + base, base_address),
|
||||
libbacktrace_add_base (high + base, base_address),
|
||||
error_callback, data, vec))
|
||||
return 0;
|
||||
}
|
||||
@@ -2197,9 +2201,10 @@ add_ranges_from_rnglists (
|
||||
|
||||
low = (uintptr_t) read_address (&rnglists_buf, u->addrsize);
|
||||
high = (uintptr_t) read_address (&rnglists_buf, u->addrsize);
|
||||
if (!add_range (state, rdata, low + base_address,
|
||||
high + base_address, error_callback, data,
|
||||
vec))
|
||||
if (!add_range (state, rdata,
|
||||
libbacktrace_add_base (low, base_address),
|
||||
libbacktrace_add_base (high, base_address),
|
||||
error_callback, data, vec))
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
@@ -2211,7 +2216,7 @@ add_ranges_from_rnglists (
|
||||
|
||||
low = (uintptr_t) read_address (&rnglists_buf, u->addrsize);
|
||||
length = (uintptr_t) read_uleb128 (&rnglists_buf);
|
||||
low += base_address;
|
||||
low = libbacktrace_add_base (low, base_address);
|
||||
if (!add_range (state, rdata, low, low + length,
|
||||
error_callback, data, vec))
|
||||
return 0;
|
||||
@@ -2239,7 +2244,7 @@ add_ranges_from_rnglists (
|
||||
static int
|
||||
add_ranges (struct backtrace_state *state,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
uintptr_t base_address, int is_bigendian,
|
||||
struct libbacktrace_base_address base_address, int is_bigendian,
|
||||
struct unit *u, uintptr_t base, const struct pcrange *pcrange,
|
||||
int (*add_range) (struct backtrace_state *state, void *rdata,
|
||||
uintptr_t lowpc, uintptr_t highpc,
|
||||
@@ -2275,7 +2280,8 @@ add_ranges (struct backtrace_state *state,
|
||||
read, 0 if there is some error. */
|
||||
|
||||
static int
|
||||
find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
|
||||
find_address_ranges (struct backtrace_state *state,
|
||||
struct libbacktrace_base_address base_address,
|
||||
struct dwarf_buf *unit_buf,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
int is_bigendian, struct dwarf_data *altlink,
|
||||
@@ -2430,7 +2436,8 @@ find_address_ranges (struct backtrace_state *state, uintptr_t base_address,
|
||||
on success, 0 on failure. */
|
||||
|
||||
static int
|
||||
build_address_map (struct backtrace_state *state, uintptr_t base_address,
|
||||
build_address_map (struct backtrace_state *state,
|
||||
struct libbacktrace_base_address base_address,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
int is_bigendian, struct dwarf_data *altlink,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
@@ -2649,7 +2656,7 @@ add_line (struct backtrace_state *state, struct dwarf_data *ddata,
|
||||
|
||||
/* Add in the base address here, so that we can look up the PC
|
||||
directly. */
|
||||
ln->pc = pc + ddata->base_address;
|
||||
ln->pc = libbacktrace_add_base (pc, ddata->base_address);
|
||||
|
||||
ln->filename = filename;
|
||||
ln->lineno = lineno;
|
||||
@@ -4329,7 +4336,7 @@ dwarf_fileline (struct backtrace_state *state, uintptr_t pc,
|
||||
|
||||
static struct dwarf_data *
|
||||
build_dwarf_data (struct backtrace_state *state,
|
||||
uintptr_t base_address,
|
||||
struct libbacktrace_base_address base_address,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
int is_bigendian,
|
||||
struct dwarf_data *altlink,
|
||||
@@ -4387,7 +4394,7 @@ build_dwarf_data (struct backtrace_state *state,
|
||||
|
||||
int
|
||||
backtrace_dwarf_add (struct backtrace_state *state,
|
||||
uintptr_t base_address,
|
||||
struct libbacktrace_base_address base_address,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
int is_bigendian,
|
||||
struct dwarf_data *fileline_altlink,
|
||||
|
||||
@@ -75,7 +75,7 @@ namespace tracy
|
||||
{
|
||||
|
||||
#ifdef TRACY_DEBUGINFOD
|
||||
int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size );
|
||||
int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size, const char* filename );
|
||||
#endif
|
||||
|
||||
#if !defined(HAVE_DECL_STRNLEN) || !HAVE_DECL_STRNLEN
|
||||
@@ -643,7 +643,7 @@ elf_symbol_search (const void *vkey, const void *ventry)
|
||||
|
||||
static int
|
||||
elf_initialize_syminfo (struct backtrace_state *state,
|
||||
uintptr_t base_address,
|
||||
struct libbacktrace_base_address base_address,
|
||||
const unsigned char *symtab_data, size_t symtab_size,
|
||||
const unsigned char *strtab, size_t strtab_size,
|
||||
backtrace_error_callback error_callback,
|
||||
@@ -709,7 +709,8 @@ elf_initialize_syminfo (struct backtrace_state *state,
|
||||
= *(const b_elf_addr *) (opd->data + (sym->st_value - opd->addr));
|
||||
else
|
||||
elf_symbols[j].address = sym->st_value;
|
||||
elf_symbols[j].address += base_address;
|
||||
elf_symbols[j].address =
|
||||
libbacktrace_add_base (elf_symbols[j].address, base_address);
|
||||
elf_symbols[j].size = sym->st_size;
|
||||
++j;
|
||||
}
|
||||
@@ -1200,14 +1201,7 @@ elf_fetch_bits_backward (const unsigned char **ppin,
|
||||
val = *pval;
|
||||
|
||||
if (unlikely (pin <= pinend))
|
||||
{
|
||||
if (bits == 0)
|
||||
{
|
||||
elf_uncompress_failed ();
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
return 1;
|
||||
|
||||
pin -= 4;
|
||||
|
||||
@@ -5712,10 +5706,10 @@ elf_uncompress_lzma_block (const unsigned char *compressed,
|
||||
/* Block header CRC. */
|
||||
computed_crc = elf_crc32 (0, compressed + block_header_offset,
|
||||
block_header_size - 4);
|
||||
stream_crc = (compressed[off]
|
||||
| (compressed[off + 1] << 8)
|
||||
| (compressed[off + 2] << 16)
|
||||
| (compressed[off + 3] << 24));
|
||||
stream_crc = ((uint32_t)compressed[off]
|
||||
| ((uint32_t)compressed[off + 1] << 8)
|
||||
| ((uint32_t)compressed[off + 2] << 16)
|
||||
| ((uint32_t)compressed[off + 3] << 24));
|
||||
if (unlikely (computed_crc != stream_crc))
|
||||
{
|
||||
elf_uncompress_failed ();
|
||||
@@ -6222,10 +6216,10 @@ elf_uncompress_lzma_block (const unsigned char *compressed,
|
||||
return 0;
|
||||
}
|
||||
computed_crc = elf_crc32 (0, uncompressed, uncompressed_offset);
|
||||
stream_crc = (compressed[off]
|
||||
| (compressed[off + 1] << 8)
|
||||
| (compressed[off + 2] << 16)
|
||||
| (compressed[off + 3] << 24));
|
||||
stream_crc = ((uint32_t)compressed[off]
|
||||
| ((uint32_t)compressed[off + 1] << 8)
|
||||
| ((uint32_t)compressed[off + 2] << 16)
|
||||
| ((uint32_t)compressed[off + 3] << 24));
|
||||
if (computed_crc != stream_crc)
|
||||
{
|
||||
elf_uncompress_failed ();
|
||||
@@ -6325,10 +6319,10 @@ elf_uncompress_lzma (struct backtrace_state *state,
|
||||
|
||||
/* Next comes a CRC of the stream flags. */
|
||||
computed_crc = elf_crc32 (0, compressed + 6, 2);
|
||||
stream_crc = (compressed[8]
|
||||
| (compressed[9] << 8)
|
||||
| (compressed[10] << 16)
|
||||
| (compressed[11] << 24));
|
||||
stream_crc = ((uint32_t)compressed[8]
|
||||
| ((uint32_t)compressed[9] << 8)
|
||||
| ((uint32_t)compressed[10] << 16)
|
||||
| ((uint32_t)compressed[11] << 24));
|
||||
if (unlikely (computed_crc != stream_crc))
|
||||
{
|
||||
elf_uncompress_failed ();
|
||||
@@ -6369,10 +6363,10 @@ elf_uncompress_lzma (struct backtrace_state *state,
|
||||
|
||||
/* Before that is a footer CRC. */
|
||||
computed_crc = elf_crc32 (0, compressed + offset, 6);
|
||||
stream_crc = (compressed[offset - 4]
|
||||
| (compressed[offset - 3] << 8)
|
||||
| (compressed[offset - 2] << 16)
|
||||
| (compressed[offset - 1] << 24));
|
||||
stream_crc = ((uint32_t)compressed[offset - 4]
|
||||
| ((uint32_t)compressed[offset - 3] << 8)
|
||||
| ((uint32_t)compressed[offset - 2] << 16)
|
||||
| ((uint32_t)compressed[offset - 1] << 24));
|
||||
if (unlikely (computed_crc != stream_crc))
|
||||
{
|
||||
elf_uncompress_failed ();
|
||||
@@ -6428,10 +6422,10 @@ elf_uncompress_lzma (struct backtrace_state *state,
|
||||
/* Next is a CRC of the index. */
|
||||
computed_crc = elf_crc32 (0, compressed + index_offset,
|
||||
offset - index_offset);
|
||||
stream_crc = (compressed[offset]
|
||||
| (compressed[offset + 1] << 8)
|
||||
| (compressed[offset + 2] << 16)
|
||||
| (compressed[offset + 3] << 24));
|
||||
stream_crc = ((uint32_t)compressed[offset]
|
||||
| ((uint32_t)compressed[offset + 1] << 8)
|
||||
| ((uint32_t)compressed[offset + 2] << 16)
|
||||
| ((uint32_t)compressed[offset + 3] << 24));
|
||||
if (unlikely (computed_crc != stream_crc))
|
||||
{
|
||||
elf_uncompress_failed ();
|
||||
@@ -6524,7 +6518,8 @@ backtrace_uncompress_lzma (struct backtrace_state *state,
|
||||
static int
|
||||
elf_add (struct backtrace_state *state, const char *filename, int descriptor,
|
||||
const unsigned char *memory, size_t memory_size,
|
||||
uintptr_t base_address, struct elf_ppc64_opd_data *caller_opd,
|
||||
struct libbacktrace_base_address base_address,
|
||||
struct elf_ppc64_opd_data *caller_opd,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
fileline *fileline_fn, int *found_sym, int *found_dwarf,
|
||||
struct dwarf_data **fileline_entry, int exe, int debuginfo,
|
||||
@@ -6867,7 +6862,8 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor,
|
||||
}
|
||||
}
|
||||
|
||||
if (!gnu_debugdata_view_valid
|
||||
if (!debuginfo
|
||||
&& !gnu_debugdata_view_valid
|
||||
&& strcmp (name, ".gnu_debugdata") == 0)
|
||||
{
|
||||
if (!elf_get_view (state, descriptor, memory, memory_size,
|
||||
@@ -7425,6 +7421,7 @@ phdr_callback (struct PhdrIterate *info, void *pdata)
|
||||
const char *filename;
|
||||
int descriptor;
|
||||
int does_not_exist;
|
||||
struct libbacktrace_base_address base_address;
|
||||
fileline elf_fileline_fn;
|
||||
int found_dwarf;
|
||||
|
||||
@@ -7454,7 +7451,8 @@ phdr_callback (struct PhdrIterate *info, void *pdata)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (elf_add (pd->state, filename, descriptor, NULL, 0, info->dlpi_addr, NULL,
|
||||
base_address.m = info->dlpi_addr;
|
||||
if (elf_add (pd->state, filename, descriptor, NULL, 0, base_address, NULL,
|
||||
pd->error_callback, pd->data, &elf_fileline_fn, pd->found_sym,
|
||||
&found_dwarf, NULL, 0, 0, NULL, 0))
|
||||
{
|
||||
@@ -7543,11 +7541,21 @@ backtrace_initialize (struct backtrace_state *state, const char *filename,
|
||||
fileline elf_fileline_fn = elf_nodebug;
|
||||
struct phdr_data pd;
|
||||
|
||||
ret = elf_add (state, filename, descriptor, NULL, 0, 0, NULL, error_callback,
|
||||
data, &elf_fileline_fn, &found_sym, &found_dwarf, NULL, 1, 0,
|
||||
NULL, 0);
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
/* When using fdpic we must use dl_iterate_phdr for all modules, including
|
||||
the main executable, so that we can get the right base address
|
||||
mapping. */
|
||||
if (!libbacktrace_using_fdpic ())
|
||||
{
|
||||
struct libbacktrace_base_address zero_base_address;
|
||||
|
||||
memset (&zero_base_address, 0, sizeof zero_base_address);
|
||||
ret = elf_add (state, filename, descriptor, NULL, 0, zero_base_address,
|
||||
NULL, error_callback, data, &elf_fileline_fn, &found_sym,
|
||||
&found_dwarf, NULL, 1, 0, NULL, 0);
|
||||
if (!ret)
|
||||
return 0;
|
||||
}
|
||||
|
||||
pd.state = state;
|
||||
pd.error_callback = error_callback;
|
||||
|
||||
@@ -333,10 +333,44 @@ struct dwarf_sections
|
||||
|
||||
struct dwarf_data;
|
||||
|
||||
/* The load address mapping. */
|
||||
|
||||
#if defined(__FDPIC__) && defined(HAVE_DL_ITERATE_PHDR) && (defined(HAVE_LINK_H) || defined(HAVE_SYS_LINK_H))
|
||||
|
||||
#ifdef HAVE_LINK_H
|
||||
#include <link.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_LINK_H
|
||||
#include <sys/link.h>
|
||||
#endif
|
||||
|
||||
#define libbacktrace_using_fdpic() (1)
|
||||
|
||||
struct libbacktrace_base_address
|
||||
{
|
||||
struct elf32_fdpic_loadaddr m;
|
||||
};
|
||||
|
||||
#define libbacktrace_add_base(pc, base) \
|
||||
((uintptr_t) (__RELOC_POINTER ((pc), (base).m)))
|
||||
|
||||
#else /* not _FDPIC__ */
|
||||
|
||||
#define libbacktrace_using_fdpic() (0)
|
||||
|
||||
struct libbacktrace_base_address
|
||||
{
|
||||
uintptr_t m;
|
||||
};
|
||||
|
||||
#define libbacktrace_add_base(pc, base) ((pc) + (base).m)
|
||||
|
||||
#endif /* not _FDPIC__ */
|
||||
|
||||
/* Add file/line information for a DWARF module. */
|
||||
|
||||
extern int backtrace_dwarf_add (struct backtrace_state *state,
|
||||
uintptr_t base_address,
|
||||
struct libbacktrace_base_address base_address,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
int is_bigendian,
|
||||
struct dwarf_data *fileline_altlink,
|
||||
|
||||
@@ -274,12 +274,14 @@ struct macho_nlist_64
|
||||
|
||||
/* Value found in nlist n_type field. */
|
||||
|
||||
#define MACH_O_N_EXT 0x01 /* Extern symbol */
|
||||
#define MACH_O_N_ABS 0x02 /* Absolute symbol */
|
||||
#define MACH_O_N_SECT 0x0e /* Defined in section */
|
||||
|
||||
#define MACH_O_N_TYPE 0x0e /* Mask for type bits */
|
||||
#define MACH_O_N_STAB 0xe0 /* Stabs debugging symbol */
|
||||
#define MACH_O_N_TYPE 0x0e /* Mask for type bits */
|
||||
|
||||
/* Values found after masking with MACH_O_N_TYPE. */
|
||||
#define MACH_O_N_UNDF 0x00 /* Undefined symbol */
|
||||
#define MACH_O_N_ABS 0x02 /* Absolute symbol */
|
||||
#define MACH_O_N_SECT 0x0e /* Defined in section from n_sect field */
|
||||
|
||||
|
||||
/* Information we keep for a Mach-O symbol. */
|
||||
|
||||
@@ -307,17 +309,18 @@ static const char * const dwarf_section_names[DEBUG_MAX] =
|
||||
"__debug_abbrev",
|
||||
"__debug_ranges",
|
||||
"__debug_str",
|
||||
"", /* DEBUG_ADDR */
|
||||
"__debug_addr",
|
||||
"__debug_str_offs",
|
||||
"", /* DEBUG_LINE_STR */
|
||||
"__debug_line_str",
|
||||
"__debug_rnglists"
|
||||
};
|
||||
|
||||
/* Forward declaration. */
|
||||
|
||||
static int macho_add (struct backtrace_state *, const char *, int, off_t,
|
||||
const unsigned char *, uintptr_t, int,
|
||||
backtrace_error_callback, void *, fileline *, int *);
|
||||
const unsigned char *, struct libbacktrace_base_address,
|
||||
int, backtrace_error_callback, void *, fileline *,
|
||||
int *);
|
||||
|
||||
/* A dummy callback function used when we can't find any debug info. */
|
||||
|
||||
@@ -495,10 +498,10 @@ macho_defined_symbol (uint8_t type)
|
||||
{
|
||||
if ((type & MACH_O_N_STAB) != 0)
|
||||
return 0;
|
||||
if ((type & MACH_O_N_EXT) != 0)
|
||||
return 0;
|
||||
switch (type & MACH_O_N_TYPE)
|
||||
{
|
||||
case MACH_O_N_UNDF:
|
||||
return 0;
|
||||
case MACH_O_N_ABS:
|
||||
return 1;
|
||||
case MACH_O_N_SECT:
|
||||
@@ -512,7 +515,7 @@ macho_defined_symbol (uint8_t type)
|
||||
|
||||
static int
|
||||
macho_add_symtab (struct backtrace_state *state, int descriptor,
|
||||
uintptr_t base_address, int is_64,
|
||||
struct libbacktrace_base_address base_address, int is_64,
|
||||
off_t symoff, unsigned int nsyms, off_t stroff,
|
||||
unsigned int strsize,
|
||||
backtrace_error_callback error_callback, void *data)
|
||||
@@ -627,7 +630,7 @@ macho_add_symtab (struct backtrace_state *state, int descriptor,
|
||||
if (name[0] == '_')
|
||||
++name;
|
||||
macho_symbols[j].name = name;
|
||||
macho_symbols[j].address = value + base_address;
|
||||
macho_symbols[j].address = libbacktrace_add_base (value, base_address);
|
||||
++j;
|
||||
}
|
||||
|
||||
@@ -760,7 +763,8 @@ macho_syminfo (struct backtrace_state *state, uintptr_t addr,
|
||||
static int
|
||||
macho_add_fat (struct backtrace_state *state, const char *filename,
|
||||
int descriptor, int swapped, off_t offset,
|
||||
const unsigned char *match_uuid, uintptr_t base_address,
|
||||
const unsigned char *match_uuid,
|
||||
struct libbacktrace_base_address base_address,
|
||||
int skip_symtab, uint32_t nfat_arch, int is_64,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
fileline *fileline_fn, int *found_sym)
|
||||
@@ -862,7 +866,8 @@ macho_add_fat (struct backtrace_state *state, const char *filename,
|
||||
|
||||
static int
|
||||
macho_add_dsym (struct backtrace_state *state, const char *filename,
|
||||
uintptr_t base_address, const unsigned char *uuid,
|
||||
struct libbacktrace_base_address base_address,
|
||||
const unsigned char *uuid,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
fileline* fileline_fn)
|
||||
{
|
||||
@@ -980,7 +985,7 @@ macho_add_dsym (struct backtrace_state *state, const char *filename,
|
||||
static int
|
||||
macho_add (struct backtrace_state *state, const char *filename, int descriptor,
|
||||
off_t offset, const unsigned char *match_uuid,
|
||||
uintptr_t base_address, int skip_symtab,
|
||||
struct libbacktrace_base_address base_address, int skip_symtab,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
fileline *fileline_fn, int *found_sym)
|
||||
{
|
||||
@@ -1242,7 +1247,7 @@ backtrace_initialize (struct backtrace_state *state, const char *filename,
|
||||
c = _dyld_image_count ();
|
||||
for (i = 0; i < c; ++i)
|
||||
{
|
||||
uintptr_t base_address;
|
||||
struct libbacktrace_base_address base_address;
|
||||
const char *name;
|
||||
int d;
|
||||
fileline mff;
|
||||
@@ -1266,7 +1271,7 @@ backtrace_initialize (struct backtrace_state *state, const char *filename,
|
||||
continue;
|
||||
}
|
||||
|
||||
base_address = _dyld_get_image_vmaddr_slide (i);
|
||||
base_address.m = _dyld_get_image_vmaddr_slide (i);
|
||||
|
||||
mff = macho_nodebug;
|
||||
if (!macho_add (state, name, d, 0, NULL, base_address, 0,
|
||||
@@ -1321,10 +1326,12 @@ backtrace_initialize (struct backtrace_state *state, const char *filename,
|
||||
void *data, fileline *fileline_fn)
|
||||
{
|
||||
fileline macho_fileline_fn;
|
||||
struct libbacktrace_base_address zero_base_address;
|
||||
int found_sym;
|
||||
|
||||
macho_fileline_fn = macho_nodebug;
|
||||
if (!macho_add (state, filename, descriptor, 0, NULL, 0, 0,
|
||||
memset (&zero_base_address, 0, sizeof zero_base_address);
|
||||
if (!macho_add (state, filename, descriptor, 0, NULL, zero_base_address, 0,
|
||||
error_callback, data, &macho_fileline_fn, &found_sym))
|
||||
return 0;
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#endif
|
||||
|
||||
#ifndef TracyLine
|
||||
# define TracyLine __LINE__
|
||||
# define TracyLine TracyConcat(__LINE__,U) // MSVC Edit and continue __LINE__ is non-constant. See https://developercommunity.visualstudio.com/t/-line-cannot-be-used-as-an-argument-for-constexpr/195665
|
||||
#endif
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
@@ -75,8 +75,10 @@
|
||||
|
||||
#define TracyAlloc(x,y)
|
||||
#define TracyFree(x)
|
||||
#define TracyMemoryDiscard(x)
|
||||
#define TracySecureAlloc(x,y)
|
||||
#define TracySecureFree(x)
|
||||
#define TracySecureMemoryDiscard(x)
|
||||
|
||||
#define TracyAllocN(x,y,z)
|
||||
#define TracyFreeN(x,y)
|
||||
@@ -98,8 +100,10 @@
|
||||
|
||||
#define TracyAllocS(x,y,z)
|
||||
#define TracyFreeS(x,y)
|
||||
#define TracyMemoryDiscardS(x,y)
|
||||
#define TracySecureAllocS(x,y,z)
|
||||
#define TracySecureFreeS(x,y)
|
||||
#define TracySecureMemoryDiscardS(x,y)
|
||||
|
||||
#define TracyAllocNS(x,y,z,w)
|
||||
#define TracyFreeNS(x,y,z)
|
||||
@@ -130,32 +134,49 @@
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../client/TracyScoped.hpp"
|
||||
|
||||
#define TracyNoop tracy::ProfilerAvailable()
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
|
||||
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active )
|
||||
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active )
|
||||
# define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active )
|
||||
#else
|
||||
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active )
|
||||
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active )
|
||||
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active )
|
||||
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active )
|
||||
|
||||
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, active )
|
||||
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active )
|
||||
# define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, active )
|
||||
#ifndef TRACY_CALLSTACK
|
||||
#define TRACY_CALLSTACK 0
|
||||
#endif
|
||||
|
||||
#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true )
|
||||
#define ZoneScopedN( name ) ZoneNamedN( ___tracy_scoped_zone, name, true )
|
||||
#define ZoneScopedC( color ) ZoneNamedC( ___tracy_scoped_zone, color, true )
|
||||
#define ZoneScopedNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, true )
|
||||
#define TracyNoop tracy::ProfilerAvailable()
|
||||
|
||||
#define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
#define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
#define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
#define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active )
|
||||
|
||||
#define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active )
|
||||
#define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active )
|
||||
#define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active )
|
||||
|
||||
#if defined(TRACY_ALLOW_SHADOW_WARNING)
|
||||
#define SuppressVarShadowWarning(Expr) Expr
|
||||
#elif defined(__clang__)
|
||||
#define SuppressVarShadowWarning(Expr) \
|
||||
_Pragma("clang diagnostic push") \
|
||||
_Pragma("clang diagnostic ignored \"-Wshadow\"") \
|
||||
Expr; \
|
||||
_Pragma("clang diagnostic pop")
|
||||
#elif defined(__GNUC__)
|
||||
#define SuppressVarShadowWarning(Expr) \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wshadow\"") \
|
||||
Expr; \
|
||||
_Pragma("GCC diagnostic pop")
|
||||
#elif defined(_MSC_VER)
|
||||
#define SuppressVarShadowWarning(Expr) \
|
||||
_Pragma("warning(push)") \
|
||||
_Pragma("warning(disable : 4456)") \
|
||||
Expr; \
|
||||
_Pragma("warning(pop)")
|
||||
#else
|
||||
#define SuppressVarShadowWarning(Expr) Expr
|
||||
#endif
|
||||
|
||||
#define ZoneScoped SuppressVarShadowWarning( ZoneNamed( ___tracy_scoped_zone, true ) )
|
||||
#define ZoneScopedN( name ) SuppressVarShadowWarning( ZoneNamedN( ___tracy_scoped_zone, name, true ) )
|
||||
#define ZoneScopedC( color ) SuppressVarShadowWarning( ZoneNamedC( ___tracy_scoped_zone, color, true ) )
|
||||
#define ZoneScopedNC( name, color ) SuppressVarShadowWarning( ZoneNamedNC( ___tracy_scoped_zone, name, color, true ) )
|
||||
|
||||
#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size )
|
||||
#define ZoneTextV( varname, txt, size ) varname.Text( txt, size )
|
||||
@@ -185,7 +206,7 @@
|
||||
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() }
|
||||
#define LockableBase( type ) tracy::Lockable<type>
|
||||
#define SharedLockableBase( type ) tracy::SharedLockable<type>
|
||||
#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &__tracy_lock_location_##varname )
|
||||
#define LockMark( varname ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_lock_location_,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &TracyConcat(__tracy_lock_location_,TracyLine) )
|
||||
#define LockableName( varname, txt, size ) varname.CustomName( txt, size )
|
||||
|
||||
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val )
|
||||
@@ -193,95 +214,52 @@
|
||||
|
||||
#define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size )
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK )
|
||||
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK )
|
||||
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK )
|
||||
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK )
|
||||
#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK )
|
||||
#define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK )
|
||||
#define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK )
|
||||
#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK )
|
||||
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false )
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false )
|
||||
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true )
|
||||
# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true )
|
||||
#define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false )
|
||||
#define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false )
|
||||
#define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true )
|
||||
#define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true )
|
||||
|
||||
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name )
|
||||
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name )
|
||||
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name )
|
||||
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name )
|
||||
#else
|
||||
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 )
|
||||
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 )
|
||||
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 )
|
||||
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 )
|
||||
#define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name )
|
||||
#define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name )
|
||||
#define TracyMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, false, TRACY_CALLSTACK )
|
||||
#define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name )
|
||||
#define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name )
|
||||
#define TracySecureMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, true, TRACY_CALLSTACK )
|
||||
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false )
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false )
|
||||
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true )
|
||||
# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true )
|
||||
#define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
#define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
#define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
#define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
|
||||
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name )
|
||||
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name )
|
||||
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name )
|
||||
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name )
|
||||
#endif
|
||||
#define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active )
|
||||
#define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active )
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active )
|
||||
#define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
|
||||
#define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
|
||||
#define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
|
||||
#define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
|
||||
|
||||
# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active )
|
||||
# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active )
|
||||
#define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false )
|
||||
#define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false )
|
||||
#define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true )
|
||||
#define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true )
|
||||
|
||||
# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
|
||||
# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
|
||||
# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
|
||||
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
|
||||
#define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name )
|
||||
#define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name )
|
||||
#define TracyMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, false, depth )
|
||||
#define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name )
|
||||
#define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name )
|
||||
#define TracySecureMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, true, depth )
|
||||
|
||||
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false )
|
||||
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false )
|
||||
# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true )
|
||||
# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true )
|
||||
|
||||
# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name )
|
||||
# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name )
|
||||
# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name )
|
||||
# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name )
|
||||
|
||||
# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth )
|
||||
# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth )
|
||||
# define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth )
|
||||
# define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth )
|
||||
#else
|
||||
# define ZoneNamedS( varname, depth, active ) ZoneNamed( varname, active )
|
||||
# define ZoneNamedNS( varname, name, depth, active ) ZoneNamedN( varname, name, active )
|
||||
# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active )
|
||||
# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active )
|
||||
|
||||
# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active )
|
||||
# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active )
|
||||
|
||||
# define ZoneScopedS( depth ) ZoneScoped
|
||||
# define ZoneScopedNS( name, depth ) ZoneScopedN( name )
|
||||
# define ZoneScopedCS( color, depth ) ZoneScopedC( color )
|
||||
# define ZoneScopedNCS( name, color, depth ) ZoneScopedNC( name, color )
|
||||
|
||||
# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size )
|
||||
# define TracyFreeS( ptr, depth ) TracyFree( ptr )
|
||||
# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size )
|
||||
# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr )
|
||||
|
||||
# define TracyAllocNS( ptr, size, depth, name ) TracyAllocN( ptr, size, name )
|
||||
# define TracyFreeNS( ptr, depth, name ) TracyFreeN( ptr, name )
|
||||
# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAllocN( ptr, size, name )
|
||||
# define TracySecureFreeNS( ptr, depth, name ) TracySecureFreeN( ptr, name )
|
||||
|
||||
# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size )
|
||||
# define TracyMessageLS( txt, depth ) TracyMessageL( txt )
|
||||
# define TracyMessageCS( txt, size, color, depth ) TracyMessageC( txt, size, color )
|
||||
# define TracyMessageLCS( txt, color, depth ) TracyMessageLC( txt, color )
|
||||
#endif
|
||||
#define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth )
|
||||
#define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth )
|
||||
#define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth )
|
||||
#define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth )
|
||||
|
||||
#define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data )
|
||||
#define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data )
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../client/TracyCallstack.h"
|
||||
#include "../common/TracyApi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -53,8 +52,10 @@ typedef const void* TracyCLockCtx;
|
||||
|
||||
#define TracyCAlloc(x,y)
|
||||
#define TracyCFree(x)
|
||||
#define TracyCMemoryDiscard(x)
|
||||
#define TracyCSecureAlloc(x,y)
|
||||
#define TracyCSecureFree(x)
|
||||
#define TracyCSecureMemoryDiscard(x)
|
||||
|
||||
#define TracyCAllocN(x,y,z)
|
||||
#define TracyCFreeN(x,y)
|
||||
@@ -85,8 +86,10 @@ typedef const void* TracyCLockCtx;
|
||||
|
||||
#define TracyCAllocS(x,y,z)
|
||||
#define TracyCFreeS(x,y)
|
||||
#define TracyCMemoryDiscardS(x,y)
|
||||
#define TracyCSecureAllocS(x,y,z)
|
||||
#define TracyCSecureFreeS(x,y)
|
||||
#define TracyCSecureMemoryDiscardS(x,y)
|
||||
|
||||
#define TracyCAllocNS(x,y,z,w)
|
||||
#define TracyCFreeNS(x,y,z)
|
||||
@@ -111,6 +114,9 @@ typedef const void* TracyCLockCtx;
|
||||
#define TracyCIsConnected 0
|
||||
#define TracyCIsStarted 0
|
||||
|
||||
#define TracyCBeginSamplingProfiling() 0
|
||||
#define TracyCEndSamplingProfiling()
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
# define TracyCFiberEnter(fiber)
|
||||
# define TracyCFiberLeave
|
||||
@@ -137,7 +143,7 @@ struct ___tracy_source_location_data
|
||||
struct ___tracy_c_zone_context
|
||||
{
|
||||
uint32_t id;
|
||||
int active;
|
||||
int32_t active;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_time_data
|
||||
@@ -155,7 +161,7 @@ struct ___tracy_gpu_zone_begin_data {
|
||||
|
||||
struct ___tracy_gpu_zone_begin_callstack_data {
|
||||
uint64_t srcloc;
|
||||
int depth;
|
||||
int32_t depth;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
@@ -201,7 +207,7 @@ typedef struct __tracy_lockable_context_data* TracyCLockCtx;
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
TRACY_API void ___tracy_startup_profiler(void);
|
||||
TRACY_API void ___tracy_shutdown_profiler(void);
|
||||
TRACY_API int ___tracy_profiler_started(void);
|
||||
TRACY_API int32_t ___tracy_profiler_started(void);
|
||||
|
||||
# define TracyCIsStarted ___tracy_profiler_started()
|
||||
#else
|
||||
@@ -211,10 +217,10 @@ TRACY_API int ___tracy_profiler_started(void);
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color );
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color );
|
||||
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active );
|
||||
TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx );
|
||||
TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size );
|
||||
TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size );
|
||||
@@ -243,20 +249,17 @@ TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_
|
||||
TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data );
|
||||
TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data );
|
||||
|
||||
TRACY_API int ___tracy_connected(void);
|
||||
TRACY_API int32_t ___tracy_connected(void);
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
#else
|
||||
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active );
|
||||
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active );
|
||||
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active );
|
||||
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active );
|
||||
#ifndef TRACY_CALLSTACK
|
||||
#define TRACY_CALLSTACK 0
|
||||
#endif
|
||||
|
||||
#define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
#define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
#define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
#define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
|
||||
#define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx );
|
||||
|
||||
#define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size );
|
||||
@@ -265,57 +268,44 @@ TRACY_API int ___tracy_connected(void);
|
||||
#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value );
|
||||
|
||||
|
||||
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure );
|
||||
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure );
|
||||
TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth );
|
||||
|
||||
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack );
|
||||
TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack );
|
||||
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack );
|
||||
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack );
|
||||
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth );
|
||||
TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth );
|
||||
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth );
|
||||
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth );
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
|
||||
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
|
||||
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
|
||||
#define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
|
||||
#define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
|
||||
#define TracyCMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 0, TRACY_CALLSTACK );
|
||||
#define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
|
||||
#define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
|
||||
#define TracyCSecureMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 1, TRACY_CALLSTACK );
|
||||
|
||||
# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name )
|
||||
# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name )
|
||||
# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name )
|
||||
# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name )
|
||||
#define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name )
|
||||
#define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name )
|
||||
#define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name )
|
||||
#define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name )
|
||||
|
||||
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
|
||||
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
|
||||
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
|
||||
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
|
||||
#else
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 );
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 );
|
||||
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 );
|
||||
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 );
|
||||
|
||||
# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 0, name );
|
||||
# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 0, name );
|
||||
# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 1, name );
|
||||
# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 1, name );
|
||||
|
||||
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 );
|
||||
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 );
|
||||
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, 0 );
|
||||
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, 0 );
|
||||
#endif
|
||||
#define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
|
||||
#define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
|
||||
#define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
|
||||
#define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
|
||||
|
||||
|
||||
TRACY_API void ___tracy_emit_frame_mark( const char* name );
|
||||
TRACY_API void ___tracy_emit_frame_mark_start( const char* name );
|
||||
TRACY_API void ___tracy_emit_frame_mark_end( const char* name );
|
||||
TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip );
|
||||
TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip );
|
||||
|
||||
#define TracyCFrameMark ___tracy_emit_frame_mark( 0 );
|
||||
#define TracyCFrameMarkNamed( name ) ___tracy_emit_frame_mark( name );
|
||||
@@ -327,7 +317,7 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_
|
||||
TRACY_API void ___tracy_emit_plot( const char* name, double val );
|
||||
TRACY_API void ___tracy_emit_plot_float( const char* name, float val );
|
||||
TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val );
|
||||
TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color );
|
||||
TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color );
|
||||
TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
|
||||
|
||||
#define TracyCPlot( name, val ) ___tracy_emit_plot( name, val );
|
||||
@@ -337,55 +327,35 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
|
||||
#define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size );
|
||||
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
# define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
#define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
#define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
#define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
#define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active );
|
||||
|
||||
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
|
||||
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
|
||||
# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
|
||||
# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
|
||||
#define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
|
||||
#define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
|
||||
#define TracyCMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 0, depth )
|
||||
#define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
|
||||
#define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
|
||||
#define TracyCSecureMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 1, depth )
|
||||
|
||||
# define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name )
|
||||
# define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name )
|
||||
# define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name )
|
||||
# define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name )
|
||||
#define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name )
|
||||
#define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name )
|
||||
#define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name )
|
||||
#define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name )
|
||||
|
||||
# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
|
||||
# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
|
||||
# define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth );
|
||||
# define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth );
|
||||
#else
|
||||
# define TracyCZoneS( ctx, depth, active ) TracyCZone( ctx, active )
|
||||
# define TracyCZoneNS( ctx, name, depth, active ) TracyCZoneN( ctx, name, active )
|
||||
# define TracyCZoneCS( ctx, color, depth, active ) TracyCZoneC( ctx, color, active )
|
||||
# define TracyCZoneNCS( ctx, name, color, depth, active ) TracyCZoneNC( ctx, name, color, active )
|
||||
|
||||
# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size )
|
||||
# define TracyCFreeS( ptr, depth ) TracyCFree( ptr )
|
||||
# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size )
|
||||
# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr )
|
||||
|
||||
# define TracyCAllocNS( ptr, size, depth, name ) TracyCAllocN( ptr, size, name )
|
||||
# define TracyCFreeNS( ptr, depth, name ) TracyCFreeN( ptr, name )
|
||||
# define TracyCSecureAllocNS( ptr, size, depth, name ) TracyCSecureAllocN( ptr, size, name )
|
||||
# define TracyCSecureFreeNS( ptr, depth, name ) TracyCSecureFreeN( ptr, name )
|
||||
|
||||
# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size )
|
||||
# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt )
|
||||
# define TracyCMessageCS( txt, size, color, depth ) TracyCMessageC( txt, size, color )
|
||||
# define TracyCMessageLCS( txt, color, depth ) TracyCMessageLC( txt, color )
|
||||
#endif
|
||||
#define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
|
||||
#define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
|
||||
#define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth );
|
||||
#define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth );
|
||||
|
||||
|
||||
TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc );
|
||||
TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
|
||||
TRACY_API int ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
|
||||
TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
|
||||
TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
|
||||
TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata );
|
||||
TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int acquired );
|
||||
TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired );
|
||||
TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc );
|
||||
TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz );
|
||||
|
||||
@@ -400,6 +370,12 @@ TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_contex
|
||||
|
||||
#define TracyCIsConnected ___tracy_connected()
|
||||
|
||||
TRACY_API int ___tracy_begin_sampling_profiler( void );
|
||||
TRACY_API void ___tracy_end_sampling_profiler( void );
|
||||
|
||||
#define TracyCBeginSamplingProfiling() ___tracy_begin_sampling_profiling()
|
||||
#define TracyCEndSamplingProfiling() ___tracy_end_sampling_profiling()
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
TRACY_API void ___tracy_fiber_enter( const char* fiber );
|
||||
TRACY_API void ___tracy_fiber_leave( void );
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -95,6 +95,10 @@ public:
|
||||
|
||||
int64_t tcpu0 = Profiler::GetTime();
|
||||
WaitForQuery(m_disjointQuery);
|
||||
// NOTE: one would expect that by waiting for the enclosing disjoint query to finish,
|
||||
// all timestamp queries within would also be readily available, but that does not
|
||||
// seem to be the case here... See https://github.com/wolfpld/tracy/issues/947
|
||||
WaitForQuery(m_queries[0]);
|
||||
int64_t tcpu1 = Profiler::GetTime();
|
||||
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { };
|
||||
@@ -109,7 +113,7 @@ public:
|
||||
|
||||
UINT64 timestamp = 0;
|
||||
if (m_immediateDevCtx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) != S_OK)
|
||||
continue; // this should never happen, since the enclosing disjoint query succeeded
|
||||
continue; // this should never happen (we waited for the query to finish above)
|
||||
|
||||
tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2;
|
||||
tgpu = timestamp * (1000000000 / disjoint.Frequency);
|
||||
@@ -307,13 +311,21 @@ public:
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcloc));
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool active )
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int32_t depth, bool active )
|
||||
: D3D11ZoneScope(ctx, active)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcloc));
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcloc));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcloc));
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active)
|
||||
@@ -327,15 +339,23 @@ public:
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool active)
|
||||
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool active)
|
||||
: D3D11ZoneScope(ctx, active)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
|
||||
if ( depth > 0 && has_callstack() )
|
||||
{
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline ~D3D11ZoneScope()
|
||||
|
||||
@@ -385,7 +385,7 @@ namespace tracy
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcLocation));
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active)
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int32_t depth, bool active)
|
||||
: D3D12ZoneScope(ctx, cmdList, active)
|
||||
{
|
||||
if (!m_active) return;
|
||||
@@ -405,7 +405,7 @@ namespace tracy
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active)
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int32_t depth, bool active)
|
||||
: D3D12ZoneScope(ctx, cmdList, active)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
@@ -120,6 +120,8 @@ static inline void LuaRemove( char* script )
|
||||
}
|
||||
}
|
||||
|
||||
static inline void LuaHook( lua_State* L, lua_Debug* ar ) {}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -143,6 +145,13 @@ TRACY_API LuaZoneState& GetLuaZoneState();
|
||||
namespace detail
|
||||
{
|
||||
|
||||
static inline void LuaShortenSrc( char* dst, const char* src )
|
||||
{
|
||||
size_t l = std::min( (size_t)255, strlen( src ) );
|
||||
memcpy( dst, src, l );
|
||||
dst[l] = 0;
|
||||
}
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
|
||||
{
|
||||
@@ -188,13 +197,6 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
|
||||
TracyQueueCommit( callstackAllocFatThread );
|
||||
}
|
||||
|
||||
static inline void LuaShortenSrc( char* dst, const char* src )
|
||||
{
|
||||
size_t l = std::min( (size_t)255, strlen( src ) );
|
||||
memcpy( dst, src, l );
|
||||
dst[l] = 0;
|
||||
}
|
||||
|
||||
static inline int LuaZoneBeginS( lua_State* L )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
@@ -439,6 +441,44 @@ static inline void LuaRegister( lua_State* L )
|
||||
|
||||
static inline void LuaRemove( char* script ) {}
|
||||
|
||||
static inline void LuaHook( lua_State* L, lua_Debug* ar )
|
||||
{
|
||||
if ( ar->event == LUA_HOOKCALL )
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const auto zoneCnt = GetLuaZoneState().counter++;
|
||||
if ( zoneCnt != 0 && !GetLuaZoneState().active ) return;
|
||||
GetLuaZoneState().active = GetProfiler().IsConnected();
|
||||
if ( !GetLuaZoneState().active ) return;
|
||||
#endif
|
||||
lua_getinfo( L, "Snl", ar );
|
||||
|
||||
char src[256];
|
||||
detail::LuaShortenSrc( src, ar->short_src );
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( ar->currentline, src, ar->name ? ar->name : ar->short_src );
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
else if (ar->event == LUA_HOOKRET) {
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
assert( GetLuaZoneState().counter != 0 );
|
||||
GetLuaZoneState().counter--;
|
||||
if ( !GetLuaZoneState().active ) return;
|
||||
if ( !GetProfiler().IsConnected() )
|
||||
{
|
||||
GetLuaZoneState().active = false;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::ZoneEnd );
|
||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
|
||||
TracyQueueCommit( zoneEndThread );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,644 @@
|
||||
#ifndef __TRACYMETAL_HMM__
|
||||
#define __TRACYMETAL_HMM__
|
||||
|
||||
/* This file implements a Metal API back-end for Tracy (it has only been tested on Apple
|
||||
Silicon devices, but it should also work on Intel-based Macs and older iOS devices).
|
||||
The Metal back-end in Tracy operates differently than other GPU back-ends like Vulkan,
|
||||
Direct3D and OpenGL. Specifically, TracyMetalZone() must be placed around the site where
|
||||
a command encoder is created. This is because not all hardware supports timestamps at
|
||||
command granularity, and can only provide timestamps around an entire command encoder.
|
||||
This accommodates for all tiers of hardware; in the future, variants of TracyMetalZone()
|
||||
will be added to support the habitual command-level granularity of Tracy GPU back-ends.
|
||||
Metal also imposes a few restrictions that make the process of requesting and collecting
|
||||
queries more complicated in Tracy:
|
||||
a) timestamp query buffers are limited to 4096 queries (32KB, where each query is 8 bytes)
|
||||
b) when a timestamp query buffer is created, Metal initializes all timestamps with zeroes,
|
||||
and there's no way to reset them back to zero after timestamps get resolved; the only
|
||||
way to clear the timestamps is by allocating a new timestamp query buffer
|
||||
c) if a command encoder records no commands and its corresponding command buffer ends up
|
||||
committed to the command queue, Metal will "optimize-away" the encoder along with any
|
||||
timestamp queries associated with it (the timestamp will remain as zero and will never
|
||||
get resolved)
|
||||
Because of the limitations above, two timestamp buffers are managed internally. Once one
|
||||
of the buffers fills up with requests, the second buffer can start serving new requests.
|
||||
Once all requests in a buffer get resolved and collected, the entire buffer is discarded
|
||||
and a new one allocated for future requests. (Proper cycling through a ring buffer would
|
||||
require bookkeeping and completion handlers to collect only the known complete queries.)
|
||||
In the current implementation, there is potential for a race condition when the buffer is
|
||||
discarded and reallocated. In practice, the race condition will never materialize so long
|
||||
as TracyMetalCollect() is called frequently to keep the amount of unresolved queries low.
|
||||
Finally, there's a timeout mechanism during timestamp collection to detect "empty" command
|
||||
encoders and ensure progress.
|
||||
*/
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define TracyMetalContext(device) nullptr
|
||||
#define TracyMetalDestroy(ctx)
|
||||
#define TracyMetalContextName(ctx, name, size)
|
||||
|
||||
#define TracyMetalZone(ctx, encoderDesc, name)
|
||||
#define TracyMetalZoneC(ctx, encoderDesc, name, color)
|
||||
#define TracyMetalNamedZone(ctx, varname, encoderDesc, name, active)
|
||||
#define TracyMetalNamedZoneC(ctx, varname, encoderDesc, name, color, active)
|
||||
|
||||
#define TracyMetalCollect(ctx)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class MetalZoneScope {};
|
||||
}
|
||||
|
||||
using TracyMetalCtx = void;
|
||||
|
||||
#else
|
||||
|
||||
#if not __has_feature(objc_arc)
|
||||
#error TracyMetal requires ARC to be enabled.
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../client/TracyCallstack.hpp"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
|
||||
// ok to import if in obj-c code
|
||||
#import <Metal/Metal.h>
|
||||
|
||||
#define TRACY_METAL_VA_ARGS(...) , ##__VA_ARGS__
|
||||
|
||||
#define TracyMetalPanic(ret, msg, ...) do { \
|
||||
char buffer [1024]; \
|
||||
snprintf(buffer, sizeof(buffer), "TracyMetal: " msg TRACY_METAL_VA_ARGS(__VA_ARGS__)); \
|
||||
TracyMessageC(buffer, strlen(buffer), tracy::Color::OrangeRed); \
|
||||
fprintf(stderr, "%s\n", buffer); \
|
||||
ret; \
|
||||
} while(false);
|
||||
|
||||
#ifndef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
|
||||
#define TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT 0.200f
|
||||
#endif//TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
|
||||
|
||||
#ifndef TRACY_METAL_DEBUG_MASK
|
||||
#define TRACY_METAL_DEBUG_MASK (0)
|
||||
#endif//TRACY_METAL_DEBUG_MASK
|
||||
|
||||
#if TRACY_METAL_DEBUG_MASK
|
||||
#define TracyMetalDebugMasked(mask, ...) if constexpr (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; }
|
||||
#else
|
||||
#define TracyMetalDebugMasked(mask, ...)
|
||||
#endif
|
||||
|
||||
#if TRACY_METAL_DEBUG_MASK & (1 << 1)
|
||||
#define TracyMetalDebug_0b00010(...) __VA_ARGS__;
|
||||
#else
|
||||
#define TracyMetalDebug_0b00010(...)
|
||||
#endif
|
||||
|
||||
#if TRACY_METAL_DEBUG_MASK & (1 << 4)
|
||||
#define TracyMetalDebug_0b10000(...) __VA_ARGS__;
|
||||
#else
|
||||
#define TracyMetalDebug_0b10000(...)
|
||||
#endif
|
||||
|
||||
#ifndef TracyMetalDebugZoneScopeWireTap
|
||||
#define TracyMetalDebugZoneScopeWireTap
|
||||
#endif//TracyMetalDebugZoneScopeWireTap
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class MetalCtx
|
||||
{
|
||||
friend class MetalZoneScope;
|
||||
|
||||
enum { MaxQueries = 4 * 1024 }; // Metal: between 8 and 32768 _BYTES_...
|
||||
|
||||
public:
|
||||
static MetalCtx* Create(id<MTLDevice> device)
|
||||
{
|
||||
ZoneScopedNC("tracy::MetalCtx::Create", Color::Red4);
|
||||
auto ctx = static_cast<MetalCtx*>(tracy_malloc(sizeof(MetalCtx)));
|
||||
new (ctx) MetalCtx(device);
|
||||
if (ctx->m_contextId == 255)
|
||||
{
|
||||
TracyMetalPanic({assert(false);} return nullptr, "ERROR: unable to create context.");
|
||||
Destroy(ctx);
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static void Destroy(MetalCtx* ctx)
|
||||
{
|
||||
ZoneScopedNC("tracy::MetalCtx::Destroy", Color::Red4);
|
||||
ctx->~MetalCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_contextId );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
|
||||
bool Collect()
|
||||
{
|
||||
ZoneScopedNC("tracy::MetalCtx::Collect", Color::Red4);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Only one thread is allowed to collect timestamps at any given time
|
||||
// but there's no need to block contending threads
|
||||
if (!m_collectionMutex.try_lock())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_lock lock (m_collectionMutex, std::adopt_lock);
|
||||
|
||||
uintptr_t begin = m_previousCheckpoint.load();
|
||||
uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?;
|
||||
TracyMetalDebugMasked(1<<3, ZoneValue(begin));
|
||||
TracyMetalDebugMasked(1<<3, ZoneValue(latestCheckpoint));
|
||||
|
||||
uint32_t count = RingCount(begin, latestCheckpoint);
|
||||
if (count == 0) // no pending timestamp queries
|
||||
{
|
||||
//uintptr_t nextCheckpoint = m_queryCounter.load();
|
||||
//if (nextCheckpoint != latestCheckpoint)
|
||||
//{
|
||||
// // TODO: signal event / fence now?
|
||||
//}
|
||||
return true;
|
||||
}
|
||||
|
||||
// resolve up until the ring buffer boundary and let a subsequenty call
|
||||
// to Collect handle the wrap-around
|
||||
bool reallocateBuffer = false;
|
||||
if (RingIndex(begin) + count >= RingSize())
|
||||
{
|
||||
count = RingSize() - RingIndex(begin);
|
||||
reallocateBuffer = true;
|
||||
}
|
||||
TracyMetalDebugMasked(1<<3, ZoneValue(count));
|
||||
|
||||
auto buffer_idx = (begin / MaxQueries) % 2;
|
||||
auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx];
|
||||
|
||||
if (count >= RingSize())
|
||||
{
|
||||
TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count);
|
||||
}
|
||||
|
||||
TracyMetalDebugMasked(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count));
|
||||
|
||||
NSRange range = NSMakeRange(RingIndex(begin), count);
|
||||
NSData* data = [counterSampleBuffer resolveCounterRange:range];
|
||||
NSUInteger numResolvedTimestamps = data.length / sizeof(MTLCounterResultTimestamp);
|
||||
MTLCounterResultTimestamp* timestamps = (MTLCounterResultTimestamp *)(data.bytes);
|
||||
if (timestamps == nil)
|
||||
{
|
||||
TracyMetalPanic(return false, "Collect: unable to resolve timestamps.");
|
||||
}
|
||||
|
||||
if (numResolvedTimestamps != count)
|
||||
{
|
||||
TracyMetalPanic(, "Collect: numResolvedTimestamps != count : %u != %u", (uint32_t)numResolvedTimestamps, count);
|
||||
}
|
||||
|
||||
int resolved = 0;
|
||||
for (auto i = 0; i < numResolvedTimestamps; i += 2)
|
||||
{
|
||||
TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::[i]") );
|
||||
MTLTimestamp t_start = timestamps[i+0].timestamp;
|
||||
MTLTimestamp t_end = timestamps[i+1].timestamp;
|
||||
uint32_t k = RingIndex(begin + i);
|
||||
TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start)));
|
||||
if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue))
|
||||
{
|
||||
TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k);
|
||||
break;
|
||||
}
|
||||
// Metal will initialize timestamp buffer with zeroes; encountering a zero-value
|
||||
// timestamp means that the timestamp has not been written and resolved yet
|
||||
if ((t_start == 0) || (t_end == 0))
|
||||
{
|
||||
auto checkTime = std::chrono::high_resolution_clock::now();
|
||||
auto requestTime = m_timestampRequestTime[k];
|
||||
auto ms_in_flight = std::chrono::duration<float>(checkTime-requestTime).count()*1000.0f;
|
||||
TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight));
|
||||
const float timeout_ms = TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT * 1000.0f;
|
||||
if (ms_in_flight < timeout_ms)
|
||||
break;
|
||||
TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::Drop") );
|
||||
TracyMetalPanic(, "Collect: giving up on timestamp at %u [%.0fms in flight].", k, ms_in_flight);
|
||||
t_start = m_mostRecentTimestamp + 5;
|
||||
t_end = t_start + 5;
|
||||
}
|
||||
TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone"));
|
||||
TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone"));
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_start));
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k));
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(t_end));
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k+1));
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
m_mostRecentTimestamp = (t_end > m_mostRecentTimestamp) ? t_end : m_mostRecentTimestamp;
|
||||
TracyMetalDebugMasked(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId"));
|
||||
resolved += 2;
|
||||
}
|
||||
TracyMetalDebugMasked(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load())));
|
||||
|
||||
m_previousCheckpoint += resolved;
|
||||
|
||||
// Check whether the timestamp buffer has been fully resolved/collected:
|
||||
// WARN: there's technically a race condition here: NextQuery() may reference the
|
||||
// buffer that is being released instead of the new one. In practice, this should
|
||||
// never happen so long as Collect is called frequently enough to prevent pending
|
||||
// timestamp query requests from piling up too quickly.
|
||||
if ((resolved == count) && (m_previousCheckpoint.load() % MaxQueries) == 0)
|
||||
{
|
||||
m_counterSampleBuffers[buffer_idx] = NewTimestampSampleBuffer(m_device, MaxQueries);
|
||||
}
|
||||
|
||||
//RecalibrateClocks(); // to account for drift
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
MetalCtx(id<MTLDevice> device)
|
||||
: m_device(device)
|
||||
{
|
||||
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue));
|
||||
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample));
|
||||
|
||||
if (m_device == nil)
|
||||
{
|
||||
TracyMetalPanic({assert(false);} return, "device is nil.");
|
||||
}
|
||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary])
|
||||
{
|
||||
TracyMetalPanic({assert(false);} return, "ERROR: timestamp sampling at pipeline stage boundary is not supported.");
|
||||
}
|
||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary])
|
||||
{
|
||||
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n"));
|
||||
}
|
||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary])
|
||||
{
|
||||
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n"));
|
||||
}
|
||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary])
|
||||
{
|
||||
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n"));
|
||||
}
|
||||
if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary])
|
||||
{
|
||||
TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n"));
|
||||
}
|
||||
|
||||
m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries);
|
||||
m_counterSampleBuffers[1] = NewTimestampSampleBuffer(m_device, MaxQueries);
|
||||
|
||||
m_timestampRequestTime.resize(MaxQueries);
|
||||
|
||||
MTLTimestamp cpuTimestamp = 0;
|
||||
MTLTimestamp gpuTimestamp = 0;
|
||||
[m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp];
|
||||
m_mostRecentTimestamp = gpuTimestamp;
|
||||
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp));
|
||||
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp));
|
||||
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp));
|
||||
|
||||
float period = 1.0f;
|
||||
|
||||
m_contextId = GetGpuCtxCounter().fetch_add(1);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp));
|
||||
MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp));
|
||||
MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()?
|
||||
MemWrite(&item->gpuNewContext.period, period);
|
||||
MemWrite(&item->gpuNewContext.context, m_contextId);
|
||||
//MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
|
||||
MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0));
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Metal);
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
|
||||
~MetalCtx()
|
||||
{
|
||||
// collect the last remnants of Metal GPU activity...
|
||||
// TODO: add a timeout to this loop?
|
||||
while (m_previousCheckpoint.load() != m_queryCounter.load())
|
||||
Collect();
|
||||
}
|
||||
|
||||
tracy_force_inline void SubmitQueueItem(QueueItem* item)
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t RingIndex(uintptr_t index)
|
||||
{
|
||||
index %= MaxQueries;
|
||||
return static_cast<uint32_t>(index);
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end)
|
||||
{
|
||||
// wrap-around safe: all unsigned
|
||||
uintptr_t count = end - begin;
|
||||
return static_cast<uint32_t>(count);
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t RingSize() const
|
||||
{
|
||||
return MaxQueries;
|
||||
}
|
||||
|
||||
struct Query { id<MTLCounterSampleBuffer> buffer; uint32_t idx; };
|
||||
|
||||
tracy_force_inline Query NextQuery()
|
||||
{
|
||||
TracyMetalDebug_0b00010( ZoneScopedNC("Tracy::MetalCtx::NextQuery", tracy::Color::LightCoral) );
|
||||
auto id = m_queryCounter.fetch_add(2);
|
||||
TracyMetalDebug_0b00010( ZoneValue(id) );
|
||||
auto count = RingCount(m_previousCheckpoint, id);
|
||||
if (count >= MaxQueries)
|
||||
{
|
||||
// TODO: return a proper (hidden) "sentinel" query
|
||||
Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 };
|
||||
TracyMetalPanic(
|
||||
return sentinel,
|
||||
"NextQueryId: FULL! too many pending timestamp queries. Consider calling TracyMetalCollect() more frequently. [%llu, %llu] (%u)",
|
||||
m_previousCheckpoint.load(), id, count
|
||||
);
|
||||
}
|
||||
uint32_t buffer_idx = (id / MaxQueries) % 2;
|
||||
TracyMetalDebug_0b00010( ZoneValue(buffer_idx) );
|
||||
auto buffer = m_counterSampleBuffers[buffer_idx];
|
||||
if (buffer == nil)
|
||||
TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id);
|
||||
uint32_t idx = RingIndex(id);
|
||||
TracyMetalDebug_0b00010( ZoneValue(idx) );
|
||||
TracyMetalDebug_0b00010( TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId") );
|
||||
m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now();
|
||||
return Query{ buffer, idx };
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetContextId() const
|
||||
{
|
||||
return m_contextId;
|
||||
}
|
||||
|
||||
static id<MTLCounterSampleBuffer> NewTimestampSampleBuffer(id<MTLDevice> device, size_t count)
|
||||
{
|
||||
ZoneScopedN("tracy::MetalCtx::NewTimestampSampleBuffer");
|
||||
|
||||
id<MTLCounterSet> timestampCounterSet = nil;
|
||||
for (id<MTLCounterSet> counterSet in device.counterSets)
|
||||
{
|
||||
if ([counterSet.name isEqualToString:MTLCommonCounterSetTimestamp])
|
||||
{
|
||||
timestampCounterSet = counterSet;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (timestampCounterSet == nil)
|
||||
{
|
||||
TracyMetalPanic({assert(false);} return nil, "ERROR: timestamp counters are not supported on the platform.");
|
||||
}
|
||||
|
||||
MTLCounterSampleBufferDescriptor* sampleDescriptor = [[MTLCounterSampleBufferDescriptor alloc] init];
|
||||
sampleDescriptor.counterSet = timestampCounterSet;
|
||||
sampleDescriptor.sampleCount = MaxQueries;
|
||||
sampleDescriptor.storageMode = MTLStorageModeShared;
|
||||
sampleDescriptor.label = @"TracyMetalTimestampPool";
|
||||
|
||||
NSError* error = nil;
|
||||
id<MTLCounterSampleBuffer> counterSampleBuffer = [device newCounterSampleBufferWithDescriptor:sampleDescriptor error:&error];
|
||||
if (error != nil)
|
||||
{
|
||||
//NSLog(@"%@ | %@", error.localizedDescription, error.localizedFailureReason);
|
||||
TracyMetalPanic({assert(false);} return nil,
|
||||
"ERROR: unable to create sample buffer for timestamp counters : %s | %s",
|
||||
[error.localizedDescription cString], [error.localizedFailureReason cString]);
|
||||
}
|
||||
|
||||
return counterSampleBuffer;
|
||||
}
|
||||
|
||||
uint8_t m_contextId = 255;
|
||||
|
||||
id<MTLDevice> m_device = nil;
|
||||
id<MTLCounterSampleBuffer> m_counterSampleBuffers [2] = {};
|
||||
|
||||
using atomic_counter = std::atomic<uintptr_t>;
|
||||
static_assert(atomic_counter::is_always_lock_free);
|
||||
atomic_counter m_queryCounter = 0;
|
||||
|
||||
atomic_counter m_previousCheckpoint = 0;
|
||||
MTLTimestamp m_mostRecentTimestamp = 0;
|
||||
|
||||
std::vector<std::chrono::high_resolution_clock::time_point> m_timestampRequestTime;
|
||||
|
||||
std::mutex m_collectionMutex;
|
||||
};
|
||||
|
||||
class MetalZoneScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLComputePassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if ( !m_active ) return;
|
||||
if (desc == nil) TracyMetalPanic({assert(false);} return, "compute pass descriptor is nil.");
|
||||
m_ctx = ctx;
|
||||
|
||||
auto& query = m_query = ctx->NextQuery();
|
||||
|
||||
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
|
||||
desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0;
|
||||
desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1;
|
||||
|
||||
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
|
||||
}
|
||||
|
||||
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLBlitPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if ( !m_active ) return;
|
||||
if (desc == nil) TracyMetalPanic({assert(false); }return, "blit pass descriptor is nil.");
|
||||
m_ctx = ctx;
|
||||
|
||||
auto& query = m_query = ctx->NextQuery();
|
||||
|
||||
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
|
||||
desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0;
|
||||
desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1;
|
||||
|
||||
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
|
||||
}
|
||||
|
||||
tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLRenderPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if ( !m_active ) return;
|
||||
if (desc == nil) TracyMetalPanic({assert(false);} return, "render pass descriptor is nil.");
|
||||
m_ctx = ctx;
|
||||
|
||||
auto& query = m_query = ctx->NextQuery();
|
||||
|
||||
desc.sampleBufferAttachments[0].sampleBuffer = query.buffer;
|
||||
desc.sampleBufferAttachments[0].startOfVertexSampleIndex = query.idx+0;
|
||||
desc.sampleBufferAttachments[0].endOfVertexSampleIndex = MTLCounterDontSample;
|
||||
desc.sampleBufferAttachments[0].startOfFragmentSampleIndex = MTLCounterDontSample;
|
||||
desc.sampleBufferAttachments[0].endOfFragmentSampleIndex = query.idx+1;
|
||||
|
||||
SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc);
|
||||
}
|
||||
|
||||
/* TODO: implement this constructor interfarce for "command-level" profiling, if the device supports it
|
||||
tracy_force_inline MetalZoneScope( MetalCtx* ctx, id<MTLComputeCommandEncoder> cmdEncoder, const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
m_cmdEncoder = cmdEncoder;
|
||||
|
||||
auto& query = m_query = ctx->NextQueryId();
|
||||
|
||||
[m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:query.idx withBarrier:YES];
|
||||
|
||||
SubmitZoneBeginGpu(ctx, query.idx, srcloc);
|
||||
}
|
||||
*/
|
||||
|
||||
tracy_force_inline ~MetalZoneScope()
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
SubmitZoneEndGpu(m_ctx, m_query.idx + 1);
|
||||
}
|
||||
|
||||
TracyMetalDebugZoneScopeWireTap;
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
|
||||
MetalCtx* m_ctx;
|
||||
|
||||
/* TODO: declare it for "command-level" profiling
|
||||
id<MTLComputeCommandEncoder> m_cmdEncoder;
|
||||
*/
|
||||
|
||||
static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc)
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone"));
|
||||
}
|
||||
|
||||
static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId)
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone"));
|
||||
}
|
||||
|
||||
MetalCtx::Query m_query = {};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
using TracyMetalCtx = tracy::MetalCtx;
|
||||
|
||||
#define TracyMetalContext(device) tracy::MetalCtx::Create(device)
|
||||
#define TracyMetalDestroy(ctx) tracy::MetalCtx::Destroy(ctx)
|
||||
#define TracyMetalContextName(ctx, name, size) ctx->Name(name, size)
|
||||
|
||||
#define TracyMetalZone( ctx, encoderDesc, name ) TracyMetalNamedZone( ctx, ___tracy_gpu_zone, encoderDesc, name, true )
|
||||
#define TracyMetalZoneC( ctx, encoderDesc, name, color ) TracyMetalNamedZoneC( ctx, ___tracy_gpu_zone, encoderDesc, name, color, true )
|
||||
#define TracyMetalNamedZone( ctx, varname, encoderDesc, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
|
||||
#define TracyMetalNamedZoneC( ctx, varname, encoderDesc, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
|
||||
|
||||
#define TracyMetalCollect( ctx ) ctx->Collect();
|
||||
|
||||
|
||||
|
||||
#undef TracyMetalDebug_ZoneScopeWireTap
|
||||
#undef TracyMetalDebug_0b00010
|
||||
#undef TracyMetalDebug_0b10000
|
||||
#undef TracyMetalDebugMasked
|
||||
#undef TRACY_METAL_DEBUG_MASK
|
||||
#undef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT
|
||||
#undef TracyMetalPanic
|
||||
#undef TRACY_METAL_VA_ARGS
|
||||
|
||||
#endif
|
||||
|
||||
#endif//__TRACYMETAL_HMM__
|
||||
@@ -255,7 +255,7 @@ namespace tracy {
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int depth, bool is_active)
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int32_t depth, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
@@ -304,7 +304,7 @@ namespace tracy {
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active)
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active && GetProfiler().IsConnected())
|
||||
#else
|
||||
|
||||
@@ -25,7 +25,7 @@ class GpuCtxScope
|
||||
{
|
||||
public:
|
||||
GpuCtxScope( const SourceLocationData*, bool ) {}
|
||||
GpuCtxScope( const SourceLocationData*, int, bool ) {}
|
||||
GpuCtxScope( const SourceLocationData*, int32_t, bool ) {}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -222,7 +222,7 @@ public:
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active )
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int32_t depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
@@ -271,7 +271,7 @@ public:
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active )
|
||||
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#define TracyVkZoneC(c,x,y,z)
|
||||
#define TracyVkZoneTransient(c,x,y,z,w)
|
||||
#define TracyVkCollect(c,x)
|
||||
#define TracyVkCollectHost(c)
|
||||
|
||||
#define TracyVkNamedZoneS(c,x,y,z,w,a)
|
||||
#define TracyVkNamedZoneCS(c,x,y,z,w,v,a)
|
||||
@@ -256,7 +257,9 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) );
|
||||
cmdbuf ?
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ) :
|
||||
VK_FUNCTION_WRAPPER( vkResetQueryPool( m_device, m_query, 0, m_queryCount ) );
|
||||
m_tail = head;
|
||||
m_oldCnt = 0;
|
||||
int64_t tgpu;
|
||||
@@ -265,7 +268,7 @@ public:
|
||||
}
|
||||
#endif
|
||||
assert( head > m_tail );
|
||||
|
||||
|
||||
const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount );
|
||||
|
||||
unsigned int cnt;
|
||||
@@ -325,7 +328,9 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) );
|
||||
cmdbuf ?
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) ) :
|
||||
VK_FUNCTION_WRAPPER( vkResetQueryPool( m_device, m_query, wrappedTail, cnt ) );
|
||||
|
||||
m_tail += cnt;
|
||||
}
|
||||
@@ -531,7 +536,7 @@ public:
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth, bool is_active )
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int32_t depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
@@ -545,8 +550,17 @@ public:
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
|
||||
|
||||
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
|
||||
QueueItem *item;
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
|
||||
}
|
||||
else
|
||||
{
|
||||
item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
}
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
@@ -580,7 +594,7 @@ public:
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int depth, bool is_active )
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int32_t depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
@@ -595,8 +609,17 @@ public:
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
|
||||
QueueItem *item;
|
||||
if( depth > 0 && has_callstack() )
|
||||
{
|
||||
item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
|
||||
}
|
||||
else
|
||||
{
|
||||
item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
|
||||
}
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
@@ -703,6 +726,7 @@ using TracyVkCtx = tracy::VkCtx*;
|
||||
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, active );
|
||||
#endif
|
||||
#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf );
|
||||
#define TracyVkCollectHost( ctx ) ctx->Collect( VK_NULL_HANDLE );
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active );
|
||||
|
||||
Reference in New Issue
Block a user