Skip to content

Commit

Permalink
#7449: allow users to specify compile opt level
Browse files Browse the repository at this point in the history
Add a field for users to set the compiler optimization level in
the config passed to CreateKernel

Default is still O3 for compute and Os for rest
  • Loading branch information
nhuang-tt committed Feb 25, 2025
1 parent 854990f commit ef1f62a
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 63 deletions.
26 changes: 21 additions & 5 deletions tt_metal/api/tt-metalium/build.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
// SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once
#include <string_view>
#include <thread>
#include <string>
#include <future>
Expand Down Expand Up @@ -101,14 +102,22 @@ class alignas(CACHE_LINE_ALIGNMENT) JitBuildState {

string link_objs_;

// Default compiler optimization setting
// Used when JitBuildSettings is not provided
string default_compile_opt_level_;

// Default linker optimization setting
// Used when JitBuildSettings is not provided
string default_linker_opt_level_;

void compile(const string& log_file, const string& out_path, const JitBuildSettings* settings) const;
void compile_one(
const string& log_file,
const string& out_path,
const JitBuildSettings* settings,
const string& src,
const string& obj) const;
void link(const string& log_file, const string& out_path) const;
void link(const string& log_file, const string& out_path, const JitBuildSettings* settings) const;
void weaken(const string& log_file, const string& out_path) const;
void copy_kernel(const string& kernel_in_path, const string& op_out_path) const;
void extract_zone_src_locations(const string& log_file) const;
Expand Down Expand Up @@ -169,12 +178,19 @@ class JitBuildIdleEthernet : public JitBuildState {
// (eg, API specified settings)
class JitBuildSettings {
public:
virtual const string& get_full_kernel_name() const = 0;
// Returns the full kernel name
virtual const std::string& get_full_kernel_name() const = 0;
// Returns the compiler optimization level
virtual std::string_view get_compiler_opt_level() const = 0;
// Returns the linker optimization level
virtual std::string_view get_linker_opt_level() const = 0;

// Called to process the user defines
virtual void process_defines(const std::function<void(const string& define, const string& value)>) const = 0;
// Called to process the user compile time args
virtual void process_compile_time_args(const std::function<void(int i, uint32_t value)>) const = 0;

private:
bool use_multi_threaded_compile = true;
virtual ~JitBuildSettings() = default;
};

void jit_build(const JitBuildState& build, const JitBuildSettings* settings);
Expand Down
21 changes: 17 additions & 4 deletions tt_metal/api/tt-metalium/kernel.hpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
// SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include <string_view>
#include <vector>
#include <map>
#include <variant>
Expand Down Expand Up @@ -176,7 +177,11 @@ class DataMovementKernel : public Kernel {

void process_defines(const std::function<void (const string& define, const string &value)>) const override;

private:
std::string_view get_compiler_opt_level() const override;

std::string_view get_linker_opt_level() const override;

private:
const DataMovementConfig config_;

uint8_t expected_num_binaries() const override;
Expand Down Expand Up @@ -204,7 +209,11 @@ class EthernetKernel : public Kernel {

void process_defines(const std::function<void(const string &define, const string &value)>) const override;

private:
std::string_view get_compiler_opt_level() const override;

std::string_view get_linker_opt_level() const override;

private:
const EthernetConfig config_;

uint8_t expected_num_binaries() const override;
Expand Down Expand Up @@ -233,7 +242,11 @@ class ComputeKernel : public Kernel {

void process_defines(const std::function<void (const string& define, const string &value)>) const override;

private:
std::string_view get_compiler_opt_level() const override;

std::string_view get_linker_opt_level() const override;

private:
const ComputeConfig config_;

uint8_t expected_num_binaries() const override;
Expand Down
19 changes: 9 additions & 10 deletions tt_metal/api/tt-metalium/kernel_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,14 @@ namespace tt::tt_metal {

using KernelHandle = std::uint32_t;

// Option that controls compiler optimization level
// Option that controls build optimization level
enum class KernelBuildOptLevel : uint8_t {
O1, // Level 1 optimization. Same as O.
O2, // Level 2 optimization. Turns on all flags specified by O1.
O3, // Level 3 optimizaiton. Turns on all flags specified by O2.
O1, // Turns on level 1 optimization. Same as O.
O2, // Turns on level 2 optimization and also all flags specified by O1.
O3, // Turns on level 3 optimization and also all flags specified by O2.
O0, // Reduce compilation time and make debugging produce the expected results.
Os, // Optimize for size. Enables O2 optimizations except for those that increase binary size.
Ofast, // Enable all O3 and non standard optimizations.
Og, // Optimize for debugging.
Os, // Optimize for size and also O2 optimizations except for those that increase binary size.
Ofast, // Turns on level O3 and also non standard optimizations.
Oz, // Aggresively optimize for size rather than speed.
};

Expand All @@ -36,7 +35,7 @@ struct DataMovementConfig {
// Each unique combination of defines will produce a unique compiled instantiation
// This file is then automatically included in the generated compiled kernel files
std::map<std::string, std::string> defines;
// Kernel optimization level
// Set the compiler and linker optimization level
KernelBuildOptLevel opt_level = KernelBuildOptLevel::Os;
};

Expand All @@ -60,7 +59,7 @@ struct ComputeConfig {
// Each unique combination of defines will produce a unique compiled instantiation
// This file is then automatically included in the generated compiled kernel files
std::map<std::string, std::string> defines;
// Kernel optimization level
// Set the compiler and linker optimization level
KernelBuildOptLevel opt_level = KernelBuildOptLevel::O3;
};

Expand All @@ -73,7 +72,7 @@ struct EthernetConfig {
// Each unique combination of defines will produce a unique compiled instantiation
// This file is then automatically included in the generated compiled kernel files
std::map<std::string, std::string> defines;
// Kernel optimization level
// Set the compiler and linker optimization level
KernelBuildOptLevel opt_level = KernelBuildOptLevel::Os;
};

Expand Down
25 changes: 23 additions & 2 deletions tt_metal/impl/kernels/kernel.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
// SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#include <kernel.hpp>
#include <kernel_types.hpp>

#include <fmt/core.h>
#include <fmt/ranges.h>

#include <magic_enum/magic_enum.hpp>
#include <set>

#include <build.hpp>
#include "llrt.hpp"
#include <string_view>
#include <tt_metal.hpp>
#include "tt_metal/impl/debug/watcher_server.hpp"
#include "tt_metal/kernel.hpp"
Expand Down Expand Up @@ -105,7 +108,7 @@ CoreType Kernel::get_kernel_core_type() const {
return CoreType::WORKER;
}

const string &Kernel::get_full_kernel_name() const { return this->kernel_full_name_; }
const std::string& Kernel::get_full_kernel_name() const { return this->kernel_full_name_; }

void Kernel::add_defines(const std::map<std::string, std::string>& defines) {
this->defines_.insert(defines.begin(), defines.end());
Expand Down Expand Up @@ -141,6 +144,24 @@ void EthernetKernel::process_defines(
callback("NOC_MODE", std::to_string(NOC_MODE::DM_DEDICATED_NOC));
}

std::string_view DataMovementKernel::get_compiler_opt_level() const {
return magic_enum::enum_name(this->config_.opt_level);
}

std::string_view DataMovementKernel::get_linker_opt_level() const { return this->get_compiler_opt_level(); }

std::string_view ComputeKernel::get_compiler_opt_level() const {
return magic_enum::enum_name(this->config_.opt_level);
}

std::string_view ComputeKernel::get_linker_opt_level() const { return this->get_compiler_opt_level(); }

std::string_view EthernetKernel::get_compiler_opt_level() const {
return magic_enum::enum_name(this->config_.opt_level);
}

std::string_view EthernetKernel::get_linker_opt_level() const { return this->get_compiler_opt_level(); }

void Kernel::process_compile_time_args(const std::function<void(int i, uint32_t value)> callback) const {
for (int i = 0; i < this->compile_time_args_.size(); i++) {
callback(i, this->compile_time_args_[i]);
Expand Down
Loading

0 comments on commit ef1f62a

Please sign in to comment.