update tracy from 11.0 to 13.1 and fix build with tracy enabled
This commit is contained in:
+359
-112
@@ -24,7 +24,7 @@
|
||||
#include <capstone.h>
|
||||
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#include "../zstd/zdict.h"
|
||||
#include <zdict.h>
|
||||
|
||||
#include "../public/common/TracyProtocol.hpp"
|
||||
#include "../public/common/TracySystem.hpp"
|
||||
@@ -37,16 +37,11 @@
|
||||
#include "TracySort.hpp"
|
||||
#include "TracyTaskDispatch.hpp"
|
||||
#include "TracyWorker.hpp"
|
||||
#include "tracy_pdqsort.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static tracy_force_inline uint32_t UnpackFileLine( uint64_t packed, uint32_t& line )
|
||||
{
|
||||
line = packed & 0xFFFFFFFF;
|
||||
return packed >> 32;
|
||||
}
|
||||
|
||||
static bool SourceFileValid( const char* fn, uint64_t olderThan )
|
||||
{
|
||||
struct stat buf;
|
||||
@@ -268,14 +263,6 @@ Worker::Worker( const char* addr, uint16_t port, int64_t memoryLimit )
|
||||
, m_buffer( new char[TargetFrameSize*3 + 1] )
|
||||
, m_bufferOffset( 0 )
|
||||
, m_inconsistentSamples( false )
|
||||
, m_pendingStrings( 0 )
|
||||
, m_pendingThreads( 0 )
|
||||
, m_pendingFibers( 0 )
|
||||
, m_pendingExternalNames( 0 )
|
||||
, m_pendingSourceLocation( 0 )
|
||||
, m_pendingCallstackFrames( 0 )
|
||||
, m_pendingCallstackSubframes( 0 )
|
||||
, m_pendingSymbolCode( 0 )
|
||||
, m_memoryLimit( memoryLimit )
|
||||
, m_callstackFrameStaging( nullptr )
|
||||
, m_traceVersion( CurrentVersion )
|
||||
@@ -306,7 +293,6 @@ Worker::Worker( const char* addr, uint16_t port, int64_t memoryLimit )
|
||||
|
||||
Worker::Worker( const char* name, const char* program, const std::vector<ImportEventTimeline>& timeline, const std::vector<ImportEventMessages>& messages, const std::vector<ImportEventPlots>& plots, const std::unordered_map<uint64_t, std::string>& threadNames )
|
||||
: m_hasData( true )
|
||||
, m_delay( 0 )
|
||||
, m_resolution( 0 )
|
||||
, m_captureName( name )
|
||||
, m_captureProgram( program )
|
||||
@@ -581,8 +567,10 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
|
||||
{
|
||||
throw LegacyVersion( fileVer );
|
||||
}
|
||||
|
||||
f.Read( m_delay );
|
||||
if( fileVer < FileVersion( 0, 12, 3 ) )
|
||||
{
|
||||
f.Skip( 8 ); // m_delay
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -643,22 +631,50 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
|
||||
uint32_t packageId;
|
||||
uint64_t psz;
|
||||
f.Read2( packageId, psz );
|
||||
auto& package = *m_data.cpuTopology.emplace( packageId, unordered_flat_map<uint32_t, std::vector<uint32_t>> {} ).first;
|
||||
auto& package = *m_data.cpuTopology.emplace( packageId, unordered_flat_map<uint32_t, unordered_flat_map<uint32_t, std::vector<uint32_t>>> {} ).first;
|
||||
package.second.reserve( psz );
|
||||
for( uint64_t j=0; j<psz; j++ )
|
||||
{
|
||||
uint32_t coreId;
|
||||
uint64_t csz;
|
||||
f.Read2( coreId, csz );
|
||||
auto& core = *package.second.emplace( coreId, std::vector<uint32_t> {} ).first;
|
||||
core.second.reserve( csz );
|
||||
for( uint64_t k=0; k<csz; k++ )
|
||||
if( fileVer >= FileVersion( 0, 11, 2 ) )
|
||||
{
|
||||
uint32_t thread;
|
||||
f.Read( thread );
|
||||
core.second.emplace_back( thread );
|
||||
uint32_t dieId;
|
||||
uint64_t dsz;
|
||||
f.Read2( dieId, dsz );
|
||||
auto& die = *package.second.emplace( dieId, unordered_flat_map<uint32_t, std::vector<uint32_t>> {} ).first;
|
||||
die.second.reserve( dsz );
|
||||
for( uint64_t k=0; k<dsz; k++ )
|
||||
{
|
||||
uint32_t coreId;
|
||||
uint64_t csz;
|
||||
f.Read2( coreId, csz );
|
||||
auto& core = *die.second.emplace( coreId, std::vector<uint32_t> {} ).first;
|
||||
core.second.reserve( csz );
|
||||
for( uint64_t l=0; l<csz; l++ )
|
||||
{
|
||||
uint32_t thread;
|
||||
f.Read( thread );
|
||||
core.second.emplace_back( thread );
|
||||
|
||||
m_data.cpuTopologyMap.emplace( thread, CpuThreadTopology { packageId, coreId } );
|
||||
m_data.cpuTopologyMap.emplace( thread, CpuThreadTopology { packageId, dieId, coreId } );
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto& die = *package.second.emplace( 0, unordered_flat_map<uint32_t, std::vector<uint32_t>> {} ).first;
|
||||
uint32_t coreId;
|
||||
uint64_t csz;
|
||||
f.Read2( coreId, csz );
|
||||
auto& core = *die.second.emplace( coreId, std::vector<uint32_t> {} ).first;
|
||||
core.second.reserve( csz );
|
||||
for( uint64_t k=0; k<csz; k++ )
|
||||
{
|
||||
uint32_t thread;
|
||||
f.Read( thread );
|
||||
core.second.emplace_back( thread );
|
||||
|
||||
m_data.cpuTopologyMap.emplace( thread, CpuThreadTopology { packageId, 0, coreId } );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -717,7 +733,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
|
||||
{
|
||||
m_data.stringData.reserve_exact( sz, m_slab );
|
||||
}
|
||||
|
||||
|
||||
for( uint64_t i=0; i<sz; i++ )
|
||||
{
|
||||
uint64_t ptr, ssz;
|
||||
@@ -1081,6 +1097,18 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
|
||||
auto ctx = m_slab.AllocInit<GpuCtxData>();
|
||||
uint8_t calibration;
|
||||
f.Read7( ctx->thread, calibration, ctx->count, ctx->period, ctx->type, ctx->name, ctx->overflow );
|
||||
uint64_t notesz;
|
||||
if( fileVer >= FileVersion( 0, 12, 4 ) )
|
||||
{
|
||||
f.Read( notesz );
|
||||
for( uint64_t i = 0; i < notesz; i++ )
|
||||
{
|
||||
decltype( ctx->noteNames )::key_type key;
|
||||
decltype( ctx->noteNames )::mapped_type value;
|
||||
f.Read2( key, value );
|
||||
ctx->noteNames[key] = value;
|
||||
}
|
||||
}
|
||||
ctx->hasCalibration = calibration;
|
||||
ctx->hasPeriod = ctx->period != 1.f;
|
||||
m_data.gpuCnt += ctx->count;
|
||||
@@ -1095,9 +1123,32 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
|
||||
int64_t refTime = 0;
|
||||
int64_t refGpuTime = 0;
|
||||
auto td = ctx->threadData.emplace( tid, GpuCtxThreadData {} ).first;
|
||||
ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime, childIdx );
|
||||
ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime, childIdx, fileVer >= FileVersion( 0, 12, 4 ) );
|
||||
}
|
||||
}
|
||||
|
||||
if( fileVer >= FileVersion( 0, 12, 4 ) )
|
||||
{
|
||||
f.Read( notesz );
|
||||
ctx->notes.reserve( notesz );
|
||||
for( uint64_t i = 0; i < notesz; i++ )
|
||||
{
|
||||
uint16_t query_id;
|
||||
f.Read( query_id );
|
||||
auto& notes = ctx->notes[query_id];
|
||||
uint64_t note_count;
|
||||
f.Read( note_count );
|
||||
notes.reserve( note_count );
|
||||
for( uint64_t i = 0; i < note_count; i++ )
|
||||
{
|
||||
int64_t id;
|
||||
double value;
|
||||
f.Read2( id, value );
|
||||
notes[id] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_data.gpuData[i] = ctx;
|
||||
}
|
||||
|
||||
@@ -1397,6 +1448,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
|
||||
s_loadProgress.subTotal.store( 0, std::memory_order_relaxed );
|
||||
s_loadProgress.progress.store( LoadProgress::ContextSwitches, std::memory_order_relaxed );
|
||||
|
||||
const bool ctxSwitchesHaveWakeupCpu = fileVer >= FileVersion( 0, 11, 3 );
|
||||
if( eventMask & EventType::ContextSwitches )
|
||||
{
|
||||
f.Read( sz );
|
||||
@@ -1415,16 +1467,28 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
|
||||
for( uint64_t j=0; j<csz; j++ )
|
||||
{
|
||||
int64_t deltaWakeup, deltaStart, diff, thread;
|
||||
uint8_t cpu;
|
||||
uint8_t cpu, wakeupcpu;
|
||||
int8_t reason, state;
|
||||
f.Read7( deltaWakeup, deltaStart, diff, cpu, reason, state, thread );
|
||||
if ( ctxSwitchesHaveWakeupCpu )
|
||||
{
|
||||
f.Read(wakeupcpu);
|
||||
}
|
||||
else
|
||||
{
|
||||
wakeupcpu = cpu;
|
||||
}
|
||||
refTime += deltaWakeup;
|
||||
ptr->SetWakeup( refTime );
|
||||
ptr->SetWakeupCpu( wakeupcpu );
|
||||
refTime += deltaStart;
|
||||
ptr->SetStartCpu( refTime, cpu );
|
||||
ptr->SetStart( refTime );
|
||||
ptr->SetCpu( cpu );
|
||||
if( diff > 0 ) runningTime += diff;
|
||||
refTime += diff;
|
||||
ptr->SetEndReasonState( refTime, reason, state );
|
||||
ptr->SetEnd( refTime );
|
||||
ptr->SetReason( reason );
|
||||
ptr->SetState( state );
|
||||
ptr->SetThread( CompressThread( thread ) );
|
||||
ptr++;
|
||||
}
|
||||
@@ -1442,7 +1506,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
|
||||
f.Skip( sizeof( uint64_t ) );
|
||||
uint64_t csz;
|
||||
f.Read( csz );
|
||||
f.Skip( csz * ( sizeof( int64_t ) * 4 + sizeof( int8_t ) * 3 ) );
|
||||
f.Skip( csz * ( sizeof( int64_t ) * 4 + sizeof( int8_t ) * ( 3 + int( ctxSwitchesHaveWakeupCpu ) ) ) );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1530,12 +1594,13 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow
|
||||
m_data.symbolLoc[symIdx++] = SymbolLocation { symAddr, size.Val() };
|
||||
}
|
||||
}
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
pdqsort_branchless( m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
std::sort( std::execution::par_unseq, m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() );
|
||||
ppqsort::sort( ppqsort::execution::par, m_data.symbolLoc.begin(), m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
ppqsort::sort( ppqsort::execution::par, m_data.symbolLocInline.begin(), m_data.symbolLocInline.end() );
|
||||
#endif
|
||||
|
||||
f.Read( sz );
|
||||
@@ -2441,12 +2506,17 @@ const SourceLocation& Worker::GetSourceLocation( int16_t srcloc ) const
|
||||
{
|
||||
return *m_data.sourceLocationPayload[-srcloc-1];
|
||||
}
|
||||
else
|
||||
else if( srcloc != std::numeric_limits<int16_t>::max() )
|
||||
{
|
||||
const auto it = m_data.sourceLocation.find( m_data.sourceLocationExpand[srcloc] );
|
||||
assert( it != m_data.sourceLocation.end() );
|
||||
return it->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
static const SourceLocation emptySourceLoc = {};
|
||||
return emptySourceLoc;
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<const char*, const char*> Worker::GetExternalName( uint64_t id ) const
|
||||
@@ -2715,7 +2785,6 @@ void Worker::Exec()
|
||||
m_data.framesBase->frames.push_back( FrameEvent{ 0, -1, -1 } );
|
||||
m_data.framesBase->frames.push_back( FrameEvent{ initEnd, -1, -1 } );
|
||||
m_data.lastTime = initEnd;
|
||||
m_delay = TscPeriod( welcome.delay );
|
||||
m_resolution = TscPeriod( welcome.resolution );
|
||||
m_pid = welcome.pid;
|
||||
m_samplingPeriod = welcome.samplingPeriod;
|
||||
@@ -2723,7 +2792,7 @@ void Worker::Exec()
|
||||
m_captureProgram = welcome.programName;
|
||||
m_captureTime = welcome.epoch;
|
||||
m_executableTime = welcome.exectime;
|
||||
m_ignoreMemFreeFaults = ( welcome.flags & WelcomeFlag::OnDemand ) || ( welcome.flags & WelcomeFlag::IsApple );
|
||||
m_ignoreMemFreeFaults = ( welcome.flags & WelcomeFlag::OnDemand ) || ( welcome.flags & WelcomeFlag::IgnoreMemFaults );
|
||||
m_ignoreFrameEndFaults = welcome.flags & WelcomeFlag::OnDemand;
|
||||
m_data.cpuArch = (CpuArchitecture)welcome.cpuArch;
|
||||
m_codeTransfer = welcome.flags & WelcomeFlag::CodeTransfer;
|
||||
@@ -2790,7 +2859,15 @@ void Worker::Exec()
|
||||
const char* end = ptr + netbuf.size;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock( m_data.lock );
|
||||
std::unique_lock<std::mutex> lk( m_data.lock );
|
||||
if( m_data.mainThreadWantsLock )
|
||||
{
|
||||
// Hand over the lock to the main thread to avoid starving it.
|
||||
// Wait for a millisecond maximum to avoid the opposite
|
||||
// problem where main thread would never let us execute
|
||||
m_data.lockCv.wait_for( lk, std::chrono::milliseconds( 1 ) );
|
||||
}
|
||||
|
||||
while( ptr < end )
|
||||
{
|
||||
auto ev = (const QueueItem*)ptr;
|
||||
@@ -3155,6 +3232,16 @@ void Worker::QueryDataTransfer( const void* ptr, size_t size )
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::QueryCallstackFrame( uint64_t addr )
|
||||
{
|
||||
const auto packed = PackPointer( addr );
|
||||
if( m_data.callstackFrameMap.contains( packed ) ) return;
|
||||
|
||||
m_pendingCallstackFrames++;
|
||||
m_data.callstackFrameMap.emplace( packed, nullptr );
|
||||
Query( ServerQueryCallstackFrame, addr );
|
||||
}
|
||||
|
||||
bool Worker::DispatchProcess( const QueueItem& ev, const char*& ptr )
|
||||
{
|
||||
if( ev.hdr.idx >= (int)QueueType::StringData )
|
||||
@@ -3310,6 +3397,9 @@ int16_t Worker::ShrinkSourceLocationReal( uint64_t srcloc )
|
||||
int16_t Worker::NewShrinkedSourceLocation( uint64_t srcloc )
|
||||
{
|
||||
assert( m_data.sourceLocationExpand.size() < std::numeric_limits<int16_t>::max() );
|
||||
if( ( m_data.sourceLocationExpand.size() + 1 ) == std::numeric_limits<int16_t>::max() )
|
||||
return std::numeric_limits<int16_t>::max();
|
||||
|
||||
const auto sz = int16_t( m_data.sourceLocationExpand.size() );
|
||||
m_data.sourceLocationExpand.push_back( srcloc );
|
||||
#ifndef TRACY_NO_STATISTICS
|
||||
@@ -3822,7 +3912,7 @@ void Worker::AddSymbolCode( uint64_t ptr, const char* data, size_t sz )
|
||||
rval = cs_open( CS_ARCH_ARM, CS_MODE_ARM, &handle );
|
||||
break;
|
||||
case CpuArchArm64:
|
||||
rval = cs_open( CS_ARCH_ARM64, CS_MODE_ARM, &handle );
|
||||
rval = cs_open( CS_ARCH_AARCH64, CS_MODE_ARM, &handle );
|
||||
break;
|
||||
default:
|
||||
assert( false );
|
||||
@@ -3838,11 +3928,7 @@ void Worker::AddSymbolCode( uint64_t ptr, const char* data, size_t sz )
|
||||
{
|
||||
const auto& op = insn[i];
|
||||
const auto addr = op.address;
|
||||
if( m_data.callstackFrameMap.find( PackPointer( addr ) ) == m_data.callstackFrameMap.end() )
|
||||
{
|
||||
m_pendingCallstackFrames++;
|
||||
Query( ServerQueryCallstackFrame, addr );
|
||||
}
|
||||
QueryCallstackFrame( addr );
|
||||
|
||||
uint64_t callAddr = 0;
|
||||
const auto& detail = *op.detail;
|
||||
@@ -3866,9 +3952,9 @@ void Worker::AddSymbolCode( uint64_t ptr, const char* data, size_t sz )
|
||||
}
|
||||
break;
|
||||
case CpuArchArm64:
|
||||
if( detail.arm64.op_count == 1 && detail.arm64.operands[0].type == ARM64_OP_IMM )
|
||||
if( detail.aarch64.op_count == 1 && detail.aarch64.operands[0].type == AARCH64_OP_IMM )
|
||||
{
|
||||
callAddr = (uint64_t)detail.arm64.operands[0].imm;
|
||||
callAddr = (uint64_t)detail.aarch64.operands[0].imm;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -3878,11 +3964,7 @@ void Worker::AddSymbolCode( uint64_t ptr, const char* data, size_t sz )
|
||||
if( callAddr != 0 ) break;
|
||||
}
|
||||
}
|
||||
if( callAddr != 0 && m_data.callstackFrameMap.find( PackPointer( callAddr ) ) == m_data.callstackFrameMap.end() )
|
||||
{
|
||||
m_pendingCallstackFrames++;
|
||||
Query( ServerQueryCallstackFrame, callAddr );
|
||||
}
|
||||
if( callAddr != 0 ) QueryCallstackFrame( callAddr );
|
||||
}
|
||||
cs_free( insn, cnt );
|
||||
}
|
||||
@@ -3947,12 +4029,7 @@ void Worker::AddCallstackPayload( const char* _data, size_t _sz )
|
||||
|
||||
for( auto& frame : *arr )
|
||||
{
|
||||
auto fit = m_data.callstackFrameMap.find( frame );
|
||||
if( fit == m_data.callstackFrameMap.end() )
|
||||
{
|
||||
m_pendingCallstackFrames++;
|
||||
Query( ServerQueryCallstackFrame, GetCanonicalPointer( frame ) );
|
||||
}
|
||||
QueryCallstackFrame( GetCanonicalPointer( frame ) );
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -4040,12 +4117,7 @@ void Worker::AddCallstackAllocPayload( const char* data )
|
||||
|
||||
for( auto& frame : *arr )
|
||||
{
|
||||
auto fit = m_data.callstackFrameMap.find( frame );
|
||||
if( fit == m_data.callstackFrameMap.end() )
|
||||
{
|
||||
m_pendingCallstackFrames++;
|
||||
Query( ServerQueryCallstackFrame, GetCanonicalPointer( frame ) );
|
||||
}
|
||||
QueryCallstackFrame( GetCanonicalPointer( frame ) );
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -4133,10 +4205,10 @@ void Worker::DoPostponedSymbols()
|
||||
{
|
||||
if( m_data.newSymbolsIndex >= 0 )
|
||||
{
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
ppqsort::sort( ppqsort::execution::par, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
#endif
|
||||
const auto ms = std::lower_bound( m_data.symbolLoc.begin(), m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc[m_data.newSymbolsIndex], [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
std::inplace_merge( ms, m_data.symbolLoc.begin() + m_data.newSymbolsIndex, m_data.symbolLoc.end(), [] ( const auto& l, const auto& r ) { return l.addr < r.addr; } );
|
||||
@@ -4148,10 +4220,10 @@ void Worker::DoPostponedInlineSymbols()
|
||||
{
|
||||
if( m_data.newInlineSymbolsIndex >= 0 )
|
||||
{
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
|
||||
ppqsort::sort( ppqsort::execution::par, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
|
||||
#endif
|
||||
const auto ms = std::lower_bound( m_data.symbolLocInline.begin(), m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline[m_data.newInlineSymbolsIndex] );
|
||||
std::inplace_merge( ms, m_data.symbolLocInline.begin() + m_data.newInlineSymbolsIndex, m_data.symbolLocInline.end() );
|
||||
@@ -4577,6 +4649,12 @@ bool Worker::Process( const QueueItem& ev )
|
||||
case QueueType::GpuContextName:
|
||||
ProcessGpuContextName( ev.gpuContextName );
|
||||
break;
|
||||
case QueueType::GpuAnnotationName:
|
||||
ProcessGpuAnnotationName( ev.gpuAnnotationName );
|
||||
break;
|
||||
case QueueType::GpuZoneAnnotation:
|
||||
ProcessGpuZoneAnnotation( ev.zoneAnnotation );
|
||||
break;
|
||||
case QueueType::MemAlloc:
|
||||
ProcessMemAlloc( ev.memAlloc );
|
||||
break;
|
||||
@@ -4601,6 +4679,12 @@ bool Worker::Process( const QueueItem& ev )
|
||||
case QueueType::MemFreeCallstackNamed:
|
||||
ProcessMemFreeCallstackNamed( ev.memFree );
|
||||
break;
|
||||
case QueueType::MemDiscard:
|
||||
ProcessMemDiscard( ev.memDiscard );
|
||||
break;
|
||||
case QueueType::MemDiscardCallstack:
|
||||
ProcessMemDiscardCallstack( ev.memDiscard );
|
||||
break;
|
||||
case QueueType::CallstackSerial:
|
||||
ProcessCallstackSerial();
|
||||
break;
|
||||
@@ -5069,7 +5153,7 @@ void Worker::ProcessFrameMarkStart( const QueueFrameMark& ev )
|
||||
|
||||
void Worker::ProcessFrameMarkEnd( const QueueFrameMark& ev )
|
||||
{
|
||||
auto fd = m_data.frames.Retrieve( ev.name, [this] ( uint64_t name ) -> FrameData* {
|
||||
auto fd = m_data.frames.Retrieve( ev.name, [] ( uint64_t name ) -> FrameData* {
|
||||
return nullptr;
|
||||
}, [this] ( uint64_t name ) {
|
||||
Query( ServerQueryFrameName, name );
|
||||
@@ -5278,8 +5362,8 @@ void Worker::ProcessZoneColor( const QueueZoneColor& ev )
|
||||
|
||||
void Worker::ProcessZoneValue( const QueueZoneValue& ev )
|
||||
{
|
||||
char tmp[32];
|
||||
const auto tsz = sprintf( tmp, "%" PRIu64, ev.value );
|
||||
char tmp[64];
|
||||
const auto tsz = sprintf( tmp, "%" PRIu64 " [0x%" PRIx64 "]", ev.value, ev.value );
|
||||
|
||||
auto td = RetrieveThread( m_threadCtx );
|
||||
if( !td )
|
||||
@@ -5707,6 +5791,7 @@ void Worker::ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBe
|
||||
zone->SetGpuEnd( -1 );
|
||||
zone->callstack.SetVal( 0 );
|
||||
zone->SetChild( -1 );
|
||||
zone->query_id = ev.queryId;
|
||||
|
||||
uint64_t ztid;
|
||||
if( ctx->thread == 0 )
|
||||
@@ -5930,7 +6015,7 @@ void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev )
|
||||
ctx->calibratedGpuTime = gpuTime;
|
||||
ctx->calibratedCpuTime = TscTime( ev.cpuTime );
|
||||
}
|
||||
|
||||
|
||||
void Worker::ProcessGpuTimeSync( const QueueGpuTimeSync& ev )
|
||||
{
|
||||
auto ctx = m_gpuCtxMap[ev.context];
|
||||
@@ -5962,6 +6047,26 @@ void Worker::ProcessGpuContextName( const QueueGpuContextName& ev )
|
||||
ctx->name = StringIdx( idx );
|
||||
}
|
||||
|
||||
void Worker::ProcessGpuAnnotationName( const QueueGpuAnnotationName& ev )
|
||||
{
|
||||
auto ctx = m_gpuCtxMap[ev.context];
|
||||
assert( ctx );
|
||||
const auto idx = GetSingleStringIdx();
|
||||
ctx->noteNames[ev.noteId] = StringIdx( idx );
|
||||
}
|
||||
|
||||
void Worker::ProcessGpuZoneAnnotation( const QueueGpuZoneAnnotation& ev )
|
||||
{
|
||||
auto ctx = m_gpuCtxMap[ev.context];
|
||||
assert( ctx );
|
||||
auto note = ctx->notes.find( ev.queryId );
|
||||
if( note == ctx->notes.end() ) {
|
||||
note = ctx->notes.emplace( ev.queryId, decltype(ctx->notes)::mapped_type{} ).first;
|
||||
note->second.reserve( ctx->noteNames.size() );
|
||||
}
|
||||
note->second[ev.noteId] = ev.value;
|
||||
}
|
||||
|
||||
MemEvent* Worker::ProcessMemAllocImpl( MemData& memdata, const QueueMemAlloc& ev )
|
||||
{
|
||||
if( memdata.active.find( ev.ptr ) != memdata.active.end() )
|
||||
@@ -6130,6 +6235,65 @@ void Worker::ProcessMemFreeCallstackNamed( const QueueMemFree& ev )
|
||||
m_serialNextCallstack = 0;
|
||||
}
|
||||
|
||||
void Worker::ProcessMemDiscard( const QueueMemDiscard& ev )
|
||||
{
|
||||
assert( m_memNamePayload == 0 );
|
||||
auto it = m_data.memNameMap.find( ev.name );
|
||||
if( it == m_data.memNameMap.end() ) return;
|
||||
|
||||
const auto refTime = RefTime( m_refTimeSerial, ev.time );
|
||||
auto& memdata = *it->second;
|
||||
|
||||
const auto time = TscTime( refTime );
|
||||
if( m_data.lastTime < time ) m_data.lastTime = time;
|
||||
NoticeThread( ev.thread );
|
||||
const auto thread = CompressThread( ev.thread );
|
||||
|
||||
for( auto& v : memdata.active )
|
||||
{
|
||||
memdata.frees.push_back( v.second );
|
||||
auto& mem = memdata.data[v.second];
|
||||
mem.SetTimeThreadFree( time, thread );
|
||||
memdata.usage -= mem.Size();
|
||||
MemAllocChanged( memdata, time );
|
||||
}
|
||||
|
||||
memdata.active.clear();
|
||||
assert( memdata.usage == 0 );
|
||||
}
|
||||
|
||||
void Worker::ProcessMemDiscardCallstack( const QueueMemDiscard& ev )
|
||||
{
|
||||
assert( m_serialNextCallstack != 0 );
|
||||
auto cs = m_serialNextCallstack;
|
||||
m_serialNextCallstack = 0;
|
||||
|
||||
assert( m_memNamePayload == 0 );
|
||||
auto it = m_data.memNameMap.find( ev.name );
|
||||
if( it == m_data.memNameMap.end() ) return;
|
||||
|
||||
const auto refTime = RefTime( m_refTimeSerial, ev.time );
|
||||
auto& memdata = *it->second;
|
||||
|
||||
const auto time = TscTime( refTime );
|
||||
if( m_data.lastTime < time ) m_data.lastTime = time;
|
||||
NoticeThread( ev.thread );
|
||||
const auto thread = CompressThread( ev.thread );
|
||||
|
||||
for( auto& v : memdata.active )
|
||||
{
|
||||
memdata.frees.push_back( v.second );
|
||||
auto& mem = memdata.data[v.second];
|
||||
mem.SetTimeThreadFree( time, thread );
|
||||
mem.csFree.SetVal( cs );
|
||||
memdata.usage -= mem.Size();
|
||||
MemAllocChanged( memdata, time );
|
||||
}
|
||||
|
||||
memdata.active.clear();
|
||||
assert( memdata.usage == 0 );
|
||||
}
|
||||
|
||||
void Worker::ProcessCallstackSerial()
|
||||
{
|
||||
assert( m_pendingCallstackId != 0 );
|
||||
@@ -6414,7 +6578,7 @@ void Worker::ProcessCallstackFrameSize( const QueueCallstackFrameSize& ev )
|
||||
|
||||
// Frames may be duplicated due to recursion
|
||||
auto fmit = m_data.callstackFrameMap.find( PackPointer( ev.ptr ) );
|
||||
if( fmit == m_data.callstackFrameMap.end() )
|
||||
if( !fmit->second )
|
||||
{
|
||||
m_callstackFrameStaging = m_slab.Alloc<CallstackFrameData>();
|
||||
m_callstackFrameStaging->size = ev.size;
|
||||
@@ -6527,8 +6691,10 @@ void Worker::ProcessCallstackFrame( const QueueCallstackFrame& ev, bool querySym
|
||||
|
||||
if( --m_pendingCallstackSubframes == 0 )
|
||||
{
|
||||
assert( m_data.callstackFrameMap.find( frameId ) == m_data.callstackFrameMap.end() );
|
||||
m_data.callstackFrameMap.emplace( frameId, m_callstackFrameStaging );
|
||||
auto fit = m_data.callstackFrameMap.find( frameId );
|
||||
assert( fit != m_data.callstackFrameMap.end() );
|
||||
assert( !fit->second );
|
||||
fit->second = m_callstackFrameStaging;
|
||||
m_data.codeSymbolMap.emplace( m_callstackFrameStagingPtr, m_callstackFrameStaging->data[0].symAddr );
|
||||
m_callstackFrameStaging = nullptr;
|
||||
}
|
||||
@@ -6683,9 +6849,12 @@ void Worker::ProcessContextSwitch( const QueueContextSwitch& ev )
|
||||
auto& item = data.back();
|
||||
assert( item.Start() <= time );
|
||||
assert( item.End() == -1 );
|
||||
//TODO: It may happen that events are being dropped (for example due to breaking in the debugger, or we are simply too slow to handle the events)
|
||||
// We should handle this properly in some way, but it is unclear how. We can't even really detect it properly here other than when cpu doesn't match.
|
||||
// Something could be displayed onscreen when gaps are detected at the event ringbuffer level?
|
||||
item.SetEnd( time );
|
||||
item.SetReason( ev.reason );
|
||||
item.SetState( ev.state );
|
||||
item.SetReason( ev.oldThreadWaitReason );
|
||||
item.SetState( ev.oldThreadState );
|
||||
|
||||
const auto dt = time - item.Start();
|
||||
it->second->runningTime += dt;
|
||||
@@ -6733,6 +6902,25 @@ void Worker::ProcessContextSwitch( const QueueContextSwitch& ev )
|
||||
}
|
||||
item = &data.push_next();
|
||||
item->SetWakeup( time );
|
||||
item->SetWakeupCpu( ev.cpu );
|
||||
|
||||
if ( it->second->pendingWakeUp.time != 0 )
|
||||
{
|
||||
auto wakeupTime = it->second->pendingWakeUp.time;
|
||||
if ( data.size() > 1 )
|
||||
{
|
||||
// Sometimes the OS tell us it scheduled a thread that was still alive but on the
|
||||
// verge of being switched out. We thus end up with `wakeup < switchout`.
|
||||
// So instead, compare with the previous wakeup.
|
||||
const auto previousWakeup = data[data.size() - 2].WakeupVal();
|
||||
if ( previousWakeup <= wakeupTime && wakeupTime <= time )
|
||||
{
|
||||
item->SetWakeup( wakeupTime );
|
||||
item->SetWakeupCpu( it->second->pendingWakeUp.cpu );
|
||||
it->second->pendingWakeUp.time = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
item->SetStart( time );
|
||||
item->SetEnd( -1 );
|
||||
@@ -6772,15 +6960,32 @@ void Worker::ProcessThreadWakeup( const QueueThreadWakeup& ev )
|
||||
it = m_data.ctxSwitch.emplace( ev.thread, ctx ).first;
|
||||
}
|
||||
auto& data = it->second->v;
|
||||
if( !data.empty() && !data.back().IsEndValid() ) return; // wakeup of a running thread
|
||||
auto& item = data.push_next();
|
||||
item.SetWakeup( time );
|
||||
item.SetStart( time );
|
||||
item.SetEnd( -1 );
|
||||
item.SetCpu( 0 );
|
||||
item.SetReason( ContextSwitchData::Wakeup );
|
||||
item.SetState( -1 );
|
||||
item.SetThread( 0 );
|
||||
if( !data.empty() && !data.back().IsEndValid() )
|
||||
{
|
||||
// We received the wakeup before thread switches out. This can actually happen!
|
||||
// So instead of dropping the information, keep the last one around so that we
|
||||
// may fetch it once the thread actually switches out.
|
||||
// We rely on the fact we won't get another one in the meantime.
|
||||
auto& item = data.back();
|
||||
it->second->pendingWakeUp.time = time;
|
||||
it->second->pendingWakeUp.cpu = ev.cpu;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto& item = data.push_next();
|
||||
item.SetWakeupCpu( ev.cpu );
|
||||
item.SetWakeup( time );
|
||||
item.SetStart( time );
|
||||
item.SetEnd( -1 );
|
||||
item.SetCpu( 0 );
|
||||
item.SetReason( ContextSwitchData::Wakeup );
|
||||
item.SetState( -1 );
|
||||
item.SetThread( 0 );
|
||||
//TODO: Adjust reason + adjust count instead of thread which is unused?
|
||||
// Adjust Reason 1 => Unwait
|
||||
// Adjust Reason 2 => Boost
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::ProcessTidToPid( const QueueTidToPid& ev )
|
||||
@@ -6859,13 +7064,17 @@ void Worker::ProcessSourceCodeNotAvailable( const QueueSourceCodeNotAvailable& e
|
||||
void Worker::ProcessCpuTopology( const QueueCpuTopology& ev )
|
||||
{
|
||||
auto package = m_data.cpuTopology.find( ev.package );
|
||||
if( package == m_data.cpuTopology.end() ) package = m_data.cpuTopology.emplace( ev.package, unordered_flat_map<uint32_t, std::vector<uint32_t>> {} ).first;
|
||||
auto core = package->second.find( ev.core );
|
||||
if( core == package->second.end() ) core = package->second.emplace( ev.core, std::vector<uint32_t> {} ).first;
|
||||
if( package == m_data.cpuTopology.end() ) package = m_data.cpuTopology.emplace( ev.package, unordered_flat_map<uint32_t, unordered_flat_map<uint32_t, std::vector<uint32_t>>> {} ).first;
|
||||
|
||||
auto die = package->second.find( ev.die );
|
||||
if( die == package->second.end() ) die = package->second.emplace( ev.die, unordered_flat_map<uint32_t, std::vector<uint32_t>> {} ).first;
|
||||
|
||||
auto core = die->second.find( ev.core );
|
||||
if( core == die->second.end() ) core = die->second.emplace( ev.core, std::vector<uint32_t> {} ).first;
|
||||
core->second.emplace_back( ev.thread );
|
||||
|
||||
assert( m_data.cpuTopologyMap.find( ev.thread ) == m_data.cpuTopologyMap.end() );
|
||||
m_data.cpuTopologyMap.emplace( ev.thread, CpuThreadTopology { ev.package, ev.core } );
|
||||
m_data.cpuTopologyMap.emplace( ev.thread, CpuThreadTopology { ev.package, ev.die, ev.core } );
|
||||
}
|
||||
|
||||
void Worker::ProcessMemNamePayload( const QueueMemNamePayload& ev )
|
||||
@@ -6922,9 +7131,13 @@ void Worker::ProcessFiberEnter( const QueueFiberEnter& ev )
|
||||
}
|
||||
auto& data = cit->second->v;
|
||||
auto& item = data.push_next();
|
||||
item.SetStartCpu( t, 0 );
|
||||
item.SetStart( t );
|
||||
item.SetCpu( 0 );
|
||||
item.SetWakeup( t );
|
||||
item.SetEndReasonState( -1, ContextSwitchData::Fiber, -1 );
|
||||
item.SetWakeupCpu( 0 );
|
||||
item.SetEnd( -1 );
|
||||
item.SetReason( ContextSwitchData::Fiber );
|
||||
item.SetState( -1 );
|
||||
item.SetThread( CompressThread( ev.thread ) );
|
||||
}
|
||||
|
||||
@@ -6991,10 +7204,10 @@ void Worker::CreateMemAllocPlot( MemData& memdata )
|
||||
|
||||
void Worker::ReconstructMemAllocPlot( MemData& mem )
|
||||
{
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } );
|
||||
ppqsort::sort( ppqsort::execution::par, mem.frees.begin(), mem.frees.end(), [&mem] ( const auto& lhs, const auto& rhs ) { return mem.data[lhs].TimeFree() < mem.data[rhs].TimeFree(); } );
|
||||
#endif
|
||||
|
||||
const auto psz = mem.data.size() + mem.frees.size() + 1;
|
||||
@@ -7440,14 +7653,14 @@ int64_t Worker::ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, int64_t refT
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx )
|
||||
void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId )
|
||||
{
|
||||
uint64_t sz;
|
||||
f.Read( sz );
|
||||
ReadTimelineHaveSize( f, zone, refTime, refGpuTime, childIdx, sz );
|
||||
ReadTimelineHaveSize( f, zone, refTime, refGpuTime, childIdx, sz, hasQueryId );
|
||||
}
|
||||
|
||||
void Worker::ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz )
|
||||
void Worker::ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz, bool hasQueryId )
|
||||
{
|
||||
if( sz == 0 )
|
||||
{
|
||||
@@ -7458,7 +7671,7 @@ void Worker::ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime
|
||||
const auto idx = childIdx;
|
||||
childIdx++;
|
||||
zone->SetChild( idx );
|
||||
ReadTimeline( f, m_data.gpuChildren[idx], sz, refTime, refGpuTime, childIdx );
|
||||
ReadTimeline( f, m_data.gpuChildren[idx], sz, refTime, refGpuTime, childIdx, hasQueryId );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7600,7 +7813,7 @@ int64_t Worker::ReadTimeline( FileRead& f, Vector<short_ptr<ZoneEvent>>& _vec, u
|
||||
return refTime;
|
||||
}
|
||||
|
||||
void Worker::ReadTimeline( FileRead& f, Vector<short_ptr<GpuEvent>>& _vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx )
|
||||
void Worker::ReadTimeline( FileRead& f, Vector<short_ptr<GpuEvent>>& _vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId )
|
||||
{
|
||||
assert( size != 0 );
|
||||
const auto lp = s_loadProgress.subProgress.load( std::memory_order_relaxed );
|
||||
@@ -7624,13 +7837,14 @@ void Worker::ReadTimeline( FileRead& f, Vector<short_ptr<GpuEvent>>& _vec, uint6
|
||||
zone->SetCpuStart( refTime );
|
||||
zone->SetGpuStart( refGpuTime );
|
||||
|
||||
ReadTimelineHaveSize( f, zone, refTime, refGpuTime, childIdx, childSz );
|
||||
ReadTimelineHaveSize( f, zone, refTime, refGpuTime, childIdx, childSz, hasQueryId );
|
||||
|
||||
f.Read2( tcpu, tgpu );
|
||||
refTime += tcpu;
|
||||
refGpuTime += tgpu;
|
||||
zone->SetCpuEnd( refTime );
|
||||
zone->SetGpuEnd( refGpuTime );
|
||||
if( hasQueryId ) f.Read( zone->query_id );
|
||||
}
|
||||
while( ++zone != end );
|
||||
}
|
||||
@@ -7663,7 +7877,6 @@ void Worker::Write( FileWrite& f, bool fiDict )
|
||||
|
||||
f.Write( FileHeader, sizeof( FileHeader ) );
|
||||
|
||||
f.Write( &m_delay, sizeof( m_delay ) );
|
||||
f.Write( &m_resolution, sizeof( m_resolution ) );
|
||||
f.Write( &m_timerMul, sizeof( m_timerMul ) );
|
||||
f.Write( &m_data.lastTime, sizeof( m_data.lastTime ) );
|
||||
@@ -7699,14 +7912,20 @@ void Worker::Write( FileWrite& f, bool fiDict )
|
||||
sz = package.second.size();
|
||||
f.Write( &package.first, sizeof( package.first ) );
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
for( auto& core : package.second )
|
||||
for( auto& die : package.second )
|
||||
{
|
||||
sz = core.second.size();
|
||||
f.Write( &core.first, sizeof( core.first ) );
|
||||
sz = die.second.size();
|
||||
f.Write( &die.first, sizeof( die.first ) );
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
for( auto& thread : core.second )
|
||||
for( auto& core : die.second )
|
||||
{
|
||||
f.Write( &thread, sizeof( thread ) );
|
||||
sz = core.second.size();
|
||||
f.Write( &core.first, sizeof( core.first ) );
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
for( auto& thread : core.second )
|
||||
{
|
||||
f.Write( &thread, sizeof( thread ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -7930,10 +8149,10 @@ void Worker::Write( FileWrite& f, bool fiDict )
|
||||
}
|
||||
if( m_inconsistentSamples )
|
||||
{
|
||||
#ifdef NO_PARALLEL_SORT
|
||||
#ifdef __EMSCRIPTEN__
|
||||
pdqsort_branchless( thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } );
|
||||
#else
|
||||
std::sort( std::execution::par_unseq, thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } );
|
||||
ppqsort::sort( ppqsort::execution::par, thread->samples.begin(), thread->samples.end(), [] ( const auto& lhs, const auto& rhs ) { return lhs.time.Val() < rhs.time.Val(); } );
|
||||
#endif
|
||||
}
|
||||
sz = thread->samples.size();
|
||||
@@ -7963,6 +8182,13 @@ void Worker::Write( FileWrite& f, bool fiDict )
|
||||
f.Write( &ctx->type, sizeof( ctx->type ) );
|
||||
f.Write( &ctx->name, sizeof( ctx->name ) );
|
||||
f.Write( &ctx->overflow, sizeof( ctx->overflow ) );
|
||||
sz = ctx->noteNames.size();
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
for( auto& p : ctx->noteNames )
|
||||
{
|
||||
f.Write( &p.first, sizeof( p.first ) );
|
||||
f.Write( &p.second, sizeof( p.second ) );
|
||||
}
|
||||
sz = ctx->threadData.size();
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
for( auto& td : ctx->threadData )
|
||||
@@ -7973,6 +8199,20 @@ void Worker::Write( FileWrite& f, bool fiDict )
|
||||
f.Write( &tid, sizeof( tid ) );
|
||||
WriteTimeline( f, td.second.timeline, refTime, refGpuTime );
|
||||
}
|
||||
|
||||
sz = ctx->notes.size();
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
for( auto& notes : ctx->notes )
|
||||
{
|
||||
f.Write( ¬es.first, sizeof( notes.first ) );
|
||||
sz = notes.second.size();
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
for( auto& note : notes.second )
|
||||
{
|
||||
f.Write( ¬e.first, sizeof( note.first ) );
|
||||
f.Write( ¬e.second, sizeof( note.second ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sz = m_data.plots.Data().size();
|
||||
@@ -8057,15 +8297,19 @@ void Worker::Write( FileWrite& f, bool fiDict )
|
||||
f.Write( cs->data(), sizeof( CallstackFrameId ) * csz );
|
||||
}
|
||||
|
||||
sz = m_data.callstackFrameMap.size();
|
||||
sz = m_data.callstackFrameMap.size() - m_pendingCallstackFrames;
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
uint64_t check = 0;
|
||||
for( auto& frame : m_data.callstackFrameMap )
|
||||
{
|
||||
if( !frame.second ) continue;
|
||||
f.Write( &frame.first, sizeof( CallstackFrameId ) );
|
||||
f.Write( &frame.second->size, sizeof( frame.second->size ) );
|
||||
f.Write( &frame.second->imageName, sizeof( frame.second->imageName ) );
|
||||
f.Write( frame.second->data, sizeof( CallstackFrame ) * frame.second->size );
|
||||
check++;
|
||||
}
|
||||
assert( check == sz );
|
||||
|
||||
sz = m_data.appInfo.size();
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
@@ -8171,6 +8415,7 @@ void Worker::Write( FileWrite& f, bool fiDict )
|
||||
WriteTimeOffset( f, refTime, cs.Start() );
|
||||
WriteTimeOffset( f, refTime, cs.End() );
|
||||
uint8_t cpu = cs.Cpu();
|
||||
uint8_t wakeupcpu = cs.WakeupCpu();
|
||||
int8_t reason = cs.Reason();
|
||||
int8_t state = cs.State();
|
||||
uint64_t thread = DecompressThread( cs.Thread() );
|
||||
@@ -8178,6 +8423,7 @@ void Worker::Write( FileWrite& f, bool fiDict )
|
||||
f.Write( &reason, sizeof( reason ) );
|
||||
f.Write( &state, sizeof( state ) );
|
||||
f.Write( &thread, sizeof( thread ) );
|
||||
f.Write( &wakeupcpu, sizeof( wakeupcpu ) );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8347,6 +8593,7 @@ void Worker::WriteTimelineImpl( FileWrite& f, const V& vec, int64_t& refTime, in
|
||||
|
||||
WriteTimeOffset( f, refTime, v.CpuEnd() );
|
||||
WriteTimeOffset( f, refGpuTime, v.GpuEnd() );
|
||||
f.Write( &v.query_id, sizeof( v.query_id ) );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user