update tracy from 11.0 to 13.1 and fix build with tracy enabled

This commit is contained in:
Sven Balzer
2026-05-01 18:24:04 +02:00
parent 7fa5294e02
commit 2adf75973a
304 changed files with 20579 additions and 170182 deletions
+139 -59
View File
@@ -173,8 +173,11 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId );
MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId );
MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber );
MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason );
MemWrite( &item->contextSwitch.state, cswitch->oldThreadState );
MemWrite( &item->contextSwitch.oldThreadWaitReason, cswitch->oldThreadWaitReason );
MemWrite( &item->contextSwitch.oldThreadState, cswitch->oldThreadState );
MemWrite( &item->contextSwitch.newThreadPriority, cswitch->newThreadPriority );
MemWrite( &item->contextSwitch.oldThreadPriority, cswitch->oldThreadPriority );
MemWrite( &item->contextSwitch.previousCState, cswitch->previousCState );
TracyLfqCommit;
}
else if( hdr.EventDescriptor.Opcode == 50 )
@@ -183,7 +186,10 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart );
MemWrite( &item->threadWakeup.cpu, record->BufferContext.ProcessorNumber );
MemWrite( &item->threadWakeup.thread, rt->threadId );
MemWrite( &item->threadWakeup.adjustReason, rt->adjustReason );
MemWrite( &item->threadWakeup.adjustIncrement, rt->adjustIncrement );
TracyLfqCommit;
}
else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 )
@@ -232,6 +238,10 @@ void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record )
#endif
const auto& hdr = record->EventHeader;
// Check for Lost_Event (6a399ae0-4bc6-4de9-870b-3657f8947e7e)
if( hdr.ProviderId.Data1 == 0x6A399AE0 ) return;
assert( hdr.ProviderId.Data1 == 0x802EC45A );
assert( hdr.EventDescriptor.Id == 0x0011 );
@@ -498,11 +508,11 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
if( _GetThreadDescription )
{
PWSTR tmp;
_GetThreadDescription( hnd, &tmp );
char buf[256];
if( tmp )
if ( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) )
{
char buf[256];
auto ret = wcstombs( buf, tmp, 256 );
LocalFree(tmp);
if( ret != 0 )
{
threadName = CopyString( buf, ret );
@@ -521,25 +531,23 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
const auto phnd = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid );
if( phnd != INVALID_HANDLE_VALUE )
{
HMODULE modules[1024];
DWORD needed;
if( _EnumProcessModules( phnd, modules, 1024 * sizeof( HMODULE ), &needed ) != 0 )
MEMORY_BASIC_INFORMATION vmeminfo;
SIZE_T infosize = VirtualQueryEx( phnd, ptr, &vmeminfo, sizeof( vmeminfo ) );
if( infosize == sizeof( vmeminfo ) )
{
const auto sz = std::min( DWORD( needed / sizeof( HMODULE ) ), DWORD( 1024 ) );
for( DWORD i=0; i<sz; i++ )
if (vmeminfo.Type == MEM_IMAGE)
{
// for MEM_IMAGE regions, vmeminfo.AllocationBase _is_ the HMODULE
HMODULE mod = (HMODULE)vmeminfo.AllocationBase;
MODULEINFO info;
if( _GetModuleInformation( phnd, modules[i], &info, sizeof( info ) ) != 0 )
if( _GetModuleInformation( phnd, mod, &info, sizeof( info ) ) != 0 )
{
if( (uint64_t)ptr >= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage )
char buf2[1024];
const auto modlen = _GetModuleBaseNameA( phnd, mod, buf2, 1024 );
if( modlen != 0 )
{
char buf2[1024];
const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 );
if( modlen != 0 )
{
threadName = CopyString( buf2, modlen );
threadSent = true;
}
threadName = CopyString( buf2, modlen );
threadSent = true;
}
}
}
@@ -606,6 +614,7 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch
# include <fcntl.h>
# include <inttypes.h>
# include <limits>
# include <mntent.h>
# include <poll.h>
# include <stdio.h>
# include <stdlib.h>
@@ -678,7 +687,7 @@ enum TraceEventId
EventBranchMiss,
EventVsync,
EventContextSwitch,
EventWakeup,
EventWaking,
};
static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid )
@@ -753,6 +762,42 @@ static const char* ReadFile( const char* path )
return tmp;
}
static const char* ReadFile( const char* base, const char* path )
{
const auto blen = strlen( base );
const auto plen = strlen( path );
auto tmp = (char*)tracy_malloc( blen + plen + 1 );
memcpy( tmp, base, blen );
memcpy( tmp + blen, path, plen );
tmp[blen+plen] = '\0';
auto res = ReadFile( tmp );
tracy_free( tmp );
return res;
}
static char* GetTraceFsPath()
{
auto f = setmntent( "/proc/mounts", "r" );
if( !f ) return nullptr;
char* ret = nullptr;
while( auto ent = getmntent( f ) )
{
if( strcmp( ent->mnt_fsname, "tracefs" ) == 0 )
{
auto len = strlen( ent->mnt_dir );
ret = (char*)tracy_malloc( len + 1 );
memcpy( ret, ent->mnt_dir, len );
ret[len] = '\0';
break;
}
}
endmntent( f );
return ret;
}
bool SysTraceStart( int64_t& samplingPeriod )
{
#ifndef CLOCK_MONOTONIC_RAW
@@ -767,16 +812,22 @@ bool SysTraceStart( int64_t& samplingPeriod )
TracyDebug( "perf_event_paranoid: %i\n", paranoidLevel );
#endif
int switchId = -1, wakeupId = -1, vsyncId = -1;
const auto switchIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_switch/id" );
auto traceFsPath = GetTraceFsPath();
if( !traceFsPath ) return false;
TracyDebug( "tracefs path: %s\n", traceFsPath );
int switchId = -1, wakingId = -1, vsyncId = -1;
const auto switchIdStr = ReadFile( traceFsPath, "/events/sched/sched_switch/id" );
if( switchIdStr ) switchId = atoi( switchIdStr );
const auto wakeupIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_wakeup/id" );
if( wakeupIdStr ) wakeupId = atoi( wakeupIdStr );
const auto vsyncIdStr = ReadFile( "/sys/kernel/debug/tracing/events/drm/drm_vblank_event/id" );
const auto wakingIdStr = ReadFile( traceFsPath, "/events/sched/sched_waking/id" );
if( wakingIdStr ) wakingId = atoi( wakingIdStr );
const auto vsyncIdStr = ReadFile( traceFsPath, "/events/drm/drm_vblank_event/id" );
if( vsyncIdStr ) vsyncId = atoi( vsyncIdStr );
tracy_free( traceFsPath );
TracyDebug( "sched_switch id: %i\n", switchId );
TracyDebug( "sched_wakeup id: %i\n", wakeupId );
TracyDebug( "sched_waking id: %i\n", wakingId );
TracyDebug( "drm_vblank_event id: %i\n", vsyncId );
#ifdef TRACY_NO_SAMPLING
@@ -831,7 +882,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
2 + // CPU cycles + instructions retired
2 + // cache reference + miss
2 + // branch retired + miss
2 + // context switches + wakeups
2 + // context switches + waking ups
1 // vsync
);
s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers );
@@ -1076,18 +1127,31 @@ bool SysTraceStart( int64_t& samplingPeriod )
}
}
if( wakeupId != -1 )
if( wakingId != -1 )
{
pe.config = wakeupId;
pe.config &= ~PERF_SAMPLE_CALLCHAIN;
pe = {};
pe.type = PERF_TYPE_TRACEPOINT;
pe.size = sizeof( perf_event_attr );
pe.sample_period = 1;
pe.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_RAW;
// Coult ask for callstack here
//pe.sample_type |= PERF_SAMPLE_CALLCHAIN;
pe.disabled = 1;
pe.inherit = 1;
pe.config = wakingId;
pe.read_format = 0;
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
pe.use_clockid = 1;
pe.clockid = CLOCK_MONOTONIC_RAW;
#endif
TracyDebug( "Setup wakeup capture\n" );
TracyDebug( "Setup waking up capture\n" );
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC );
if( fd != -1 )
{
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWakeup, i );
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWaking, i );
if( s_ring[s_numBuffers].IsValid() )
{
s_numBuffers++;
@@ -1332,6 +1396,7 @@ void SysTraceWorker( void* ptr )
hadData = true;
while( activeNum > 0 )
{
// Find the earliest event from the active buffers
int sel = -1;
int selPos;
int64_t t0 = std::numeric_limits<int64_t>::max();
@@ -1369,6 +1434,7 @@ void SysTraceWorker( void* ptr )
}
}
}
// Found any event
if( sel >= 0 )
{
auto& ring = ringArray[ctxBufferIdx + sel];
@@ -1384,10 +1450,10 @@ void SysTraceWorker( void* ptr )
const auto rid = ring.GetId();
if( rid == EventContextSwitch )
{
// Layout:
// u64 time
// u64 cnt
// u64 ip[cnt]
// Layout: See /sys/kernel/debug/tracing/events/sched/sched_switch/format
// u64 time // PERF_SAMPLE_TIME
// u64 cnt // PERF_SAMPLE_CALLCHAIN
// u64 ip[cnt] // PERF_SAMPLE_CALLCHAIN
// u32 size
// u8 data[size]
// Data (not ABI stable, but has not changed since it was added, in 2009):
@@ -1408,35 +1474,43 @@ void SysTraceWorker( void* ptr )
const auto traceOffset = offset;
offset += sizeof( uint64_t ) * cnt + sizeof( uint32_t ) + 8 + 16;
uint32_t prev_pid, next_pid;
uint32_t prev_pid, prev_prio;
uint32_t next_pid, next_prio;
long prev_state;
ring.Read( &prev_pid, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t ) + sizeof( uint32_t );
offset += sizeof( uint32_t );
ring.Read( &prev_prio, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t );
ring.Read( &prev_state, offset, sizeof( long ) );
offset += sizeof( long ) + 16;
ring.Read( &next_pid, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t );
ring.Read( &next_prio, offset, sizeof( uint32_t ) );
uint8_t reason = 100;
uint8_t state;
uint8_t oldThreadWaitReason = 100;
uint8_t oldThreadState;
if( prev_state & 0x0001 ) state = 104;
else if( prev_state & 0x0002 ) state = 101;
else if( prev_state & 0x0004 ) state = 105;
else if( prev_state & 0x0008 ) state = 106;
else if( prev_state & 0x0010 ) state = 108;
else if( prev_state & 0x0020 ) state = 109;
else if( prev_state & 0x0040 ) state = 110;
else if( prev_state & 0x0080 ) state = 102;
else state = 103;
if( prev_state & 0x0001 ) oldThreadState = 104;
else if( prev_state & 0x0002 ) oldThreadState = 101;
else if( prev_state & 0x0004 ) oldThreadState = 105;
else if( prev_state & 0x0008 ) oldThreadState = 106;
else if( prev_state & 0x0010 ) oldThreadState = 108;
else if( prev_state & 0x0020 ) oldThreadState = 109;
else if( prev_state & 0x0040 ) oldThreadState = 110;
else if( prev_state & 0x0080 ) oldThreadState = 102;
else oldThreadState = 103;
TracyLfqPrepare( QueueType::ContextSwitch );
MemWrite( &item->contextSwitch.time, t0 );
MemWrite( &item->contextSwitch.oldThread, prev_pid );
MemWrite( &item->contextSwitch.newThread, next_pid );
MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) );
MemWrite( &item->contextSwitch.reason, reason );
MemWrite( &item->contextSwitch.state, state );
MemWrite( &item->contextSwitch.oldThreadWaitReason, oldThreadWaitReason );
MemWrite( &item->contextSwitch.oldThreadState, oldThreadState );
MemWrite( &item->contextSwitch.previousCState, uint8_t( 0 ) );
MemWrite( &item->contextSwitch.newThreadPriority, int8_t( next_prio ) );
MemWrite( &item->contextSwitch.oldThreadPriority, int8_t( prev_prio ) );
TracyLfqCommit;
if( cnt > 0 && prev_pid != 0 && CurrentProcOwnsThread( prev_pid ) )
@@ -1450,27 +1524,33 @@ void SysTraceWorker( void* ptr )
TracyLfqCommit;
}
}
else if( rid == EventWakeup )
else if( rid == EventWaking)
{
// See /sys/kernel/debug/tracing/events/sched/sched_waking/format
// Layout:
// u64 time
// u64 time // PERF_SAMPLE_TIME
// u32 size
// u8 data[size]
// Data:
// u8 hdr[8]
// u8 comm[16]
// u32 pid
// u32 prio
// u64 target_cpu
offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16;
// i32 prio
// i32 target_cpu
const uint32_t dataOffset = sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t );
offset += dataOffset + 8 + 16;
uint32_t pid;
ring.Read( &pid, offset, sizeof( uint32_t ) );
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, t0 );
MemWrite( &item->threadWakeup.thread, pid );
MemWrite( &item->threadWakeup.cpu, (uint8_t)ring.GetCpu() );
int8_t adjustReason = -1; // Does not exist on Linux
int8_t adjustIncrement = 0; // Should perhaps store the new prio?
MemWrite( &item->threadWakeup.adjustReason, adjustReason );
MemWrite( &item->threadWakeup.adjustIncrement, adjustIncrement );
TracyLfqCommit;
}
else