-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Grim/explorer memory vis #2319
base: main
Are you sure you want to change the base?
Grim/explorer memory vis #2319
Changes from 4 commits
df46c15
8b8975b
ff69240
f066776
2552d47
aed612e
5bcab19
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,8 @@ | |
from ttmlir import ir, util | ||
from . import utils | ||
|
||
global memory_data | ||
global node_id_number | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See above: We shouldn't be using |
||
|
||
def parse_loc_string(loc_str): | ||
""" | ||
|
@@ -394,6 +396,7 @@ def parse_ttnn_ttnn_layout(attr): | |
result = [] | ||
result.append(graph_builder.KeyValue(key="linear", value=str(layout.linear))) | ||
memory_layout = layout.tensor_memory_layout_as_int | ||
|
||
if memory_layout is not None: | ||
result.append( | ||
utils.make_editable_kv( | ||
|
@@ -462,6 +465,32 @@ def parse_ttnn_ttnn_layout(attr): | |
}, | ||
) | ||
) | ||
|
||
try: | ||
result.append( | ||
utils.add_to_dataclass( | ||
graph_builder.KeyValue( | ||
key="dram_memory", | ||
value=str(memory_data[str(node_id_number)]["dram"]), | ||
), | ||
'display_type', | ||
'memory' | ||
) | ||
) | ||
|
||
result.append( | ||
utils.add_to_dataclass( | ||
graph_builder.KeyValue( | ||
key="l1_memory", | ||
value=str(memory_data[str(node_id_number)]["l1"]), | ||
), | ||
'display_type', | ||
'memory' | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While these do need to be added to each op, I don't think the best location to keep this is in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried adding this code to the build_graph function, because that's definitely more intuitive and eliminates the need for the global variables. I couldn't figure out how to make it work though. When I did, the memory attributes showed up under the outputs section of explorer instead of attributes. If you have any thoughts on how to resolve that, lmk. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah check the parent comment to this review. Essentially you need to modify the # Example
def make_graph_node(self, extra_attrs=None):
attrs = self.get_attributes()
if extra_attrs is not None:
attrs.extend(extra_attrs)
return graph_builder.GraphNode(
id=self.id,
label=str(self.op.name),
namespace=self.get_namespace(),
attrs=attrs,
) |
||
) | ||
except: | ||
pass | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need the try/except block if the exception is just to pass. This case should ideally be successful every time or we guard it with a strict check on how exactly this can go wrong with specific telemetry to the user if this is the case. |
||
|
||
return result | ||
|
||
|
||
|
@@ -528,6 +557,7 @@ def get_attributes(self): | |
key="rank", value=str(output_tensor.type.rank) | ||
), | ||
] | ||
|
||
if hasattr(output_tensor.type, "encoding") and output_tensor.type.encoding: | ||
if "ttnn_layout" in str(output_tensor.type.encoding): | ||
output_attrs.extend( | ||
|
@@ -544,6 +574,7 @@ def get_attributes(self): | |
) | ||
result.extend(output_attrs) | ||
|
||
|
||
# Add schedule as an attribute | ||
result.append( | ||
graph_builder.KeyValue(key="schedule", value=str(OpHandler.schedule)) | ||
|
@@ -580,7 +611,7 @@ def make_constant_node(self, constant_name): | |
] | ||
|
||
|
||
def build_graph(module, perf_trace=None): | ||
def build_graph(module, perf_trace=None, memory_trace=None): | ||
output_connections = defaultdict(int) | ||
graph = graph_builder.Graph(id="tt-graph") | ||
|
||
|
@@ -598,9 +629,19 @@ def build_graph(module, perf_trace=None): | |
if loc: | ||
loc_to_perf[loc] = row["DEVICE FW DURATION [ns]"] | ||
|
||
global memory_data | ||
global node_id_number | ||
memory_data = {} | ||
if memory_trace is not None: | ||
for node in memory_trace: | ||
memory_data[node] = {} | ||
memory_data[node]["dram"] = memory_trace[node]["dram"]["device_0"]["total_bytes_allocated_per_bank"]/memory_trace[node]["dram"]["device_0"]["total_bytes_per_bank"] | ||
memory_data[node]["l1"] = memory_trace[node]["l1"]["device_0"]["total_bytes_allocated_per_bank"]/memory_trace[node]["l1"]["device_0"]["total_bytes_per_bank"] | ||
|
||
module_op = OpHandler(module.operation) | ||
module_attrs = module_op.get_attributes() | ||
module_attrs = dict((attr.key, attr.value) for attr in module_attrs) | ||
|
||
# Add module attributes to the graph as "namespace attributes" | ||
group_node_attrs = {} | ||
group_node_attrs[module_op.get_namespace()] = module_attrs | ||
|
@@ -669,6 +710,7 @@ def build_graph(module, perf_trace=None): | |
) | ||
|
||
output_attrs = [] | ||
node_id_number = operand_index | ||
if isinstance(operand.type, ir.RankedTensorType): | ||
output_attrs = [ | ||
graph_builder.KeyValue( | ||
|
@@ -681,6 +723,7 @@ def build_graph(module, perf_trace=None): | |
key="rank", value=str(operand.type.rank) | ||
), | ||
] | ||
|
||
if hasattr(operand.type, "encoding") and operand.type.encoding: | ||
if "ttnn_layout" in str(operand.type.encoding): | ||
output_attrs.extend( | ||
|
@@ -694,7 +737,7 @@ def build_graph(module, perf_trace=None): | |
AttrHandler.parse_attr( | ||
operand.type.encoding.get_named("tt.layout") | ||
) | ||
) | ||
) | ||
source_node.outputsMetadata.append( | ||
graph_builder.MetadataItem( | ||
id=str(output_connections[source_node.id]), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
import pandas as pd | ||
import threading | ||
import queue | ||
import json | ||
|
||
|
||
class ExplorerRunException(Exception): | ||
|
@@ -139,6 +140,19 @@ def get_perf_trace(self, model_path): | |
raise FileNotFoundError(f"Performance file {op_perf_file} not found.") | ||
|
||
return pd.read_csv(op_perf_file) | ||
|
||
def get_memory_usage(self, model_path): | ||
|
||
mem_file = ( | ||
f"{self.model_state[model_path].model_output_dir}/run/program_0/memory_results.json" | ||
) | ||
if not os.path.exists(mem_file): | ||
raise FileNotFoundError(f"Memory file {mem_file} not found. Memory file may not have been created. Try running command: ttrt run out.ttnn --memory --save-artifacts") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The explorer user will often be unable to directly access the flatbuffer being executed, nor should we expect them to manipulate this. The error message is good, but we don't need the last sentence. |
||
|
||
with open(mem_file, "r") as file: | ||
memory_trace = json.load(file) | ||
|
||
return memory_trace | ||
|
||
def run_in_subprocess(self, command): | ||
self.log(f"Running command:\n{' '.join(command)}\n") | ||
|
@@ -299,6 +313,7 @@ def compile_and_run(self, model_path, overrides_string): | |
"perf", | ||
flatbuffer_file, | ||
f"--artifact-dir={self._explorer_artifacts_dir}", | ||
"--memory" | ||
] | ||
|
||
ttrt_process = self.run_in_subprocess(ttrt_perf_command) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Try to clean up unused code if it's not going to be used in the future