diff --git a/programs/perf-script-to-profile.py b/programs/perf-script-to-profile.py new file mode 100644 index 0000000000..8b2215d189 --- /dev/null +++ b/programs/perf-script-to-profile.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 + +""" +Copyright 2025 Hans-Kristian Arntzen for Valve Corporation + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +""" + +""" +Script which reads perf.data and converts it into the VKD3D_QUEUE_PROFILE format for correlation with D3D12 events. + +To use: + +Disable paranoid perf (or use sudo when capturing): +# echo -1 > /proc/sys/kernel/perf_event_paranoid + +$ VKD3D_QUEUE_PROFILE=/tmp/profile.json VKD3D_QUEUE_PROFILE_ABSOLUTE=1 $game + +# While game is running, find the PID and capture it. +# Proton uses MONOTONIC_RAW for its timebase it seems, so need that to correlate with vkd3d-proton profile. +$ perf record -F $rate -k CLOCK_MONOTONIC_RAW --pid $pid + +# Add perf info to queue profile +$ python perf-script-to-profile.py --rate $rate perf.data >> /tmp/profile.json +""" + +import argparse +import os +import sys +import subprocess +import re +import collections +import math + +class ThreadState: + def __init__(self, comm, tid, expected_delta): + self.start_ts = 0.0 + self.last_ts = 0.0 + self.idle_delta = expected_delta + self.comm = comm + self.current_sym = '' + self.current_dso = '' + self.cycles = 0 + self.tid = tid + + def _begin_event(self, ts, cycles, sym, dso): + self.start_ts = ts + self.last_ts = ts + self.current_sym = sym + self.current_dso = dso + self.cycles = cycles + + def register_event(self, ts, cycles, sym, dso): + if self.start_ts == 0.0: + # First event + self._begin_event(ts, cycles, sym, dso) + elif (ts - self.last_ts) > (1.5 * self.idle_delta): + # Detected some idling + # Assume that thread went idle in the middle of the sampling period + self.last_ts += 0.5 * self.idle_delta + self.flush_event() + + #self.start_ts = self.last_ts + #self.last_ts = ts + #self.current_dso = 'idle' + #self.current_sym = 'idle' + #self.flush_event() + + self._begin_event(ts, cycles, sym, dso) + elif self.current_sym != sym or self.current_dso != dso: + self.last_ts = ts + self.flush_event() + self._begin_event(ts, cycles, sym, dso) + else: + # Keep going + self.last_ts = ts + self.cycles += cycles + + def flush_event(self): + if self.start_ts != self.last_ts: + print('{', '"name": "{} ({}) ({} cycles)", "ph" : "X", "tid": "{} ({})", "pid": "perf", "ts": {}, "dur": {}'.format( + self.current_dso, self.current_sym, self.cycles, self.comm, self.tid, self.start_ts * 1000000.0, + (self.last_ts - self.start_ts) * 1000000.0), '},') + +def main(): + parser = argparse.ArgumentParser(description = 'Script for parsing perf profiling data.') + parser.add_argument('profile', help = 'The profile binary blob (perf.data).') + parser.add_argument('--rate', type = int, default = 4000, help = 'The sampling rate used (used to detect discontinuity)') + parser.add_argument('--start', type = float, default = 0.0, help = 'Start timestamp to emit (in seconds)') + parser.add_argument('--end', type = float, default = math.inf, help = 'End timestamp to emit (in seconds)') + parser.add_argument('--filter-tids', nargs = '+', action = 'extend', type = int, help = 'Only emit provided tids') + + args = parser.parse_args() + if not args.profile: + raise AssertionError('Need profile folder.') + + expected_delta = 1.0 / args.rate + thread_state = {} + + with subprocess.Popen(['perf', 'script', '-F', 'trace:sym,tid,time', '--ns', '-i', args.profile], stdout = subprocess.PIPE) as proc: + while True: + line = proc.stdout.readline() + if not line: + break + line = line.decode('utf-8') + if line[-1] == '\n': + line = line[0:-1] + + if line[0] == '\t': + # Ignore empty first lines, since they are used for backtraces when -g is used. + continue + + comm = line[0:16].strip() + line = line[16:] + + elems = list(filter(None, line.split(' '))) + + if len(elems) < 7: + continue + + tid = int(elems[0]) + + if args.filter_tids and (tid not in args.filter_tids): + continue + + ts = float(elems[1][0:-1]) + + if ts < args.start or ts > args.end: + continue + + cycles = int(elems[2]) + sym = elems[5] + dso = elems[6] + + # Massage the DSO so it's more readable. + dso = os.path.basename(dso) + if dso[-1] == ')': + dso = dso[:-1] + + if tid not in thread_state: + thread_state[tid] = ThreadState(comm, tid, expected_delta) + + state = thread_state[tid] + state.register_event(ts, cycles, sym, dso) + + for i in thread_state.items(): + i[1].flush_event() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/programs/vkd3d-fsync-log-to-profile.py b/programs/vkd3d-fsync-log-to-profile.py new file mode 100644 index 0000000000..2fd9db30b1 --- /dev/null +++ b/programs/vkd3d-fsync-log-to-profile.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 + +""" +Copyright 2024 Hans-Kristian Arntzen for Valve Corporation + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +""" + +""" +Hacky script that parses PROTON_LOG=+fsync,+microsecs and emits wait-states in VKD3D_QUEUE_PROFILE format with VKD3D_QUEUE_PROFILE_ABSOLUTE=1. +Equivalent in more plain Wine would be WINEDEBUG=+fsync,+timestamp,+pid,+tid,+threadname,+microsecs WINEFSYNC=1. +Can be used to directly append to vkd3d-proton generated JSON. +""" + +import re +import sys +import argparse + +def print_json_wait(tid, desc, start_ts, end_ts): + print('{', '"name": "{}", "ph" : "X", "tid": "{}", "pid": "fsync wait", "ts": {}, "dur": {}'.format(desc, tid, start_ts, end_ts - start_ts), '},') + +def print_json_signal(tid, desc, ts): + print('{', '"name": "{}", "ph" : "i", "tid": "{}", "pid": "fsync signal", "ts": {}'.format(desc, tid, ts), '},') + +def print_nt_read_file(tid, desc, ts): + print('{', '"name": "{}", "ph" : "i", "tid": "{}", "pid": "NtReadFile", "ts": {}'.format(desc, tid, ts), '},') + +def strip_hex_prefix(x): + if x.startswith('0x'): + return x[2:] + else: + return x + +def time_in_bounds(time, start_time, end_time): + if start_time == end_time: + return True + else: + return time >= start_time and time <= end_time + +def parse_proton_log(file, start_time, duration_time): + end_time = start_time + duration_time + find_threadname = re.compile(r'.+:([0-9A-Fa-f]+):warn:threadname:.*renamed to L"(.+)"$') + find_wait = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:fsync:__fsync_wait_objects Waiting for (all|any) of \d+ handles: ([^,]+),.*$') + find_wake = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:fsync:__fsync_wait_objects Woken up by handle 0x([0-9A-Fa-f]+) .*$') + find_wake_timeout = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:fsync:__fsync_wait_objects Wait timed out.$') + find_set_event = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:fsync:fsync_set_event 0x([0-9A-Fa-f]+)\.$') + find_release_sem = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:fsync:fsync_release_semaphore 0x([0-9A-Fa-f]+),.*$') + find_nt_read_file = re.compile(r'(\d+\.\d+):.+:([0-9A-Fa-f]+):trace:file:NtReadFile \((.+)\)$') + thread_id_to_name = {} + sleep_states = {} + signal_cause = {} + + for line in file.readlines(): + m = find_threadname.match(line) + if m: + thread_id_to_name[m[1]] = m[1] + ' (' + m[2] + ')' + + m = find_wait.search(line) + if m: + time = int(float(m[1]) * 1e6) + tid = m[2] + any_or_all = m[3] + handle_list = m[4] + if tid in sleep_states: + raise Exception('{} has a sleep state already. line: "{}"'.format(tid, m[0])) + sleep_states[tid] = (any_or_all, [strip_hex_prefix(x) for x in handle_list.split(' ')], time) + continue + + m = find_wake_timeout.search(line) + if m: + time = int(float(m[1]) * 1e6) + tid = m[2] + try: + state = sleep_states.pop(tid) + name = thread_id_to_name.get(tid, tid) + pretty_list = ', '.join(state[1]) + desc = '{} {} timeout'.format(state[0], pretty_list) + if time_in_bounds(time, start_time, end_time): + print_json_wait(name, desc, state[2], time) + except KeyError as e: + raise Exception('{} timed out, but there is no wait state? line: "{}"'.format(tid, m[0])) + continue + + m = find_wake.search(line) + if m: + time = int(float(m[1]) * 1e6) + tid = m[2] + handle = m[3] + try: + state = sleep_states.pop(tid) + name = thread_id_to_name.get(tid, tid) + + pretty_list = ', '.join(state[1]) + if len(state[1]) > 1 and state[0] == 'any': + desc = '{} {} woken by {}'.format(state[0], pretty_list, handle) + else: + desc = '{} {}'.format(state[0], pretty_list) + + cause_tid = signal_cause.get(handle) + if cause_tid: + cause_tid = signal_cause.pop(handle) + desc += ' [signal by {}]'.format(thread_id_to_name.get(cause_tid, cause_tid)) + + if time_in_bounds(time, start_time, end_time) or time_in_bounds(state[2], start_time, end_time): + print_json_wait(name, desc, state[2], time) + except KeyError as e: + raise Exception('{} was woken up, but there is no wait state? line: "{}"'.format(tid, m[0])) + continue + + m = find_set_event.search(line) + if m: + time = int(float(m[1]) * 1e6) + tid = m[2] + handle = m[3] + name = thread_id_to_name.get(tid, tid) + if time_in_bounds(time, start_time, end_time): + print_json_signal(name, 'event ' + handle, time) + signal_cause[handle] = tid + continue + + m = find_release_sem.search(line) + if m: + time = int(float(m[1]) * 1e6) + tid = m[2] + handle = m[3] + name = thread_id_to_name.get(tid, tid) + if time_in_bounds(time, start_time, end_time): + print_json_signal(name, 'semaphore ' + handle, time) + signal_cause[handle] = tid + continue + + m = find_nt_read_file.search(line) + if m: + time = int(float(m[1]) * 1e6) + tid = m[2] + name = thread_id_to_name.get(tid, tid) + if 'vkd3d' in name or 'wine_' in name: + continue + if time_in_bounds(time, start_time, end_time): + args = m[3].split(',') + print_nt_read_file(name, 'handle {}, {} bytes'.format(args[0], int(args[6], 16)), time) + +def main(): + parser = argparse.ArgumentParser(description = 'Script for dumping Proton fsync to trace.') + parser.add_argument('--start', default = 0, type = float, help = 'Filter events starting from time in seconds.') + parser.add_argument('--duration', default = 0, type = float, help = 'Duration for event filtering in seconds.') + parser.add_argument('log', help = 'Path to PROTON_LOG=+fsync,+microsecs output.') + args = parser.parse_args() + + if not args.log: + raise AssertionError('Missing log.') + + with open(args.log, 'r') as f: + parse_proton_log(f, int(args.start * 1e6), int(args.duration * 1e6)) + +if __name__ == '__main__': + main()