Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/NVIDIA/TRTorch into input…
Browse files Browse the repository at this point in the history
…_type

Signed-off-by: Naren Dasan <[email protected]>
  • Loading branch information
narendasan committed Jul 21, 2021
2 parents c39bf81 + 75e86e8 commit aaddaf1
Show file tree
Hide file tree
Showing 47 changed files with 1,503 additions and 196 deletions.
16 changes: 11 additions & 5 deletions core/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@ void AddEngineToGraph(
torch::jit::script::Module mod,
std::shared_ptr<torch::jit::Graph>& g,
const std::string& serialized_engine,
runtime::CudaDevice& device_info,
std::string engine_id = "",
bool fallback = false) {
auto engine_ptr = c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name() + engine_id, serialized_engine);
auto engine_ptr =
c10::make_intrusive<runtime::TRTEngine>(mod._ivalue()->name() + engine_id, serialized_engine, device_info);
// Get required metadata about the engine out
auto num_io = engine_ptr->num_io;
auto name = engine_ptr->name;
Expand Down Expand Up @@ -220,7 +222,9 @@ torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Mo
convert_cfg.inputs = inputs;
auto engine = conversion::ConvertBlockToEngine(seg_block.block(), convert_cfg, named_params);
auto temp_g = std::make_shared<torch::jit::Graph>();
AddEngineToGraph(new_mod, temp_g, engine, trt_engine_id.str(), true);
auto device_spec = convert_cfg.engine_settings.device;
auto cuda_device = runtime::CudaDevice(device_spec.gpu_id, device_spec.device_type);
AddEngineToGraph(new_mod, temp_g, engine, cuda_device, trt_engine_id.str(), true);

seg_block.update_graph(temp_g);
AddSegmentedBlockToGraph(new_g, seg_block, old_to_new_g);
Expand Down Expand Up @@ -260,7 +264,9 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
if (method.name().compare("forward") == 0) {
auto engine = ConvertGraphToTRTEngine(mod, method.name(), cfg);
auto new_g = std::make_shared<torch::jit::Graph>();
AddEngineToGraph(new_mod, new_g, engine);
auto device_spec = cfg.convert_info.engine_settings.device;
auto cuda_device = runtime::CudaDevice(device_spec.gpu_id, device_spec.device_type);
AddEngineToGraph(new_mod, new_g, engine, cuda_device);
auto new_method = new_mod._ivalue()->compilation_unit()->create_function(method.name(), new_g);
auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
new_mod.type()->addMethod(new_method);
Expand All @@ -271,12 +277,12 @@ torch::jit::script::Module CompileGraph(const torch::jit::script::Module& mod, C
return new_mod;
}

torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine) {
torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine, runtime::CudaDevice cuda_device) {
std::ostringstream engine_id;
engine_id << reinterpret_cast<const int*>(&engine);
torch::jit::script::Module new_mod("tensorrt_engine_mod_" + engine_id.str());
auto new_g = std::make_shared<torch::jit::Graph>();
AddEngineToGraph(new_mod, new_g, engine);
AddEngineToGraph(new_mod, new_g, engine, cuda_device);
auto new_method = new_mod._ivalue()->compilation_unit()->create_function("forward", new_g);
auto schema = util::GenerateGraphSchema(new_method->name(), new_g);
new_mod.type()->addMethod(new_method);
Expand Down
3 changes: 2 additions & 1 deletion core/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "core/conversion/conversion.h"
#include "core/ir/ir.h"
#include "core/partitioning/partitioning.h"
#include "core/runtime/runtime.h"
#include "torch/csrc/jit/api/module.h"

namespace trtorch {
Expand All @@ -22,7 +23,7 @@ std::string ConvertGraphToTRTEngine(const torch::jit::script::Module& mod, std::

torch::jit::script::Module CompileGraph(const torch::jit::script::Module& module, CompileSpec cfg);

torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine);
torch::jit::script::Module EmbedEngineInNewModule(const std::string& engine, runtime::CudaDevice cuda_device);

void set_device(const int gpu_id);

Expand Down
2 changes: 1 addition & 1 deletion core/conversion/converters/converter_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ nvinfer1::ITensor* addUnpadding(
TRTORCH_CHECK(shuffle_layer, "Unable to create shuffle layer");
shuffle_layer->setReshapeDimensions(newDims);
shuffle_layer->setZeroIsPlaceholder(use_zeros);
shuffle_layer->setName((util::node_info(n) + " [Reshape to " + util::toStr(newDims)).c_str() + ']');
shuffle_layer->setName((util::node_info(n) + " [Reshape to " + util::toStr(newDims) + "]").c_str());
return shuffle_layer->getOutput(0);
} else {
return tensor;
Expand Down
349 changes: 231 additions & 118 deletions core/conversion/converters/impl/lstm_cell.cpp

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions core/runtime/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ config_setting(
cc_library(
name = "runtime",
srcs = [
"CudaDevice.cpp",
"DeviceList.cpp",
"TRTEngine.cpp",
"register_trt_op.cpp",
"runtime.cpp"
],
hdrs = [
"runtime.h",
Expand Down
106 changes: 106 additions & 0 deletions core/runtime/CudaDevice.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include "cuda_runtime.h"

#include "core/runtime/runtime.h"
#include "core/util/prelude.h"

namespace trtorch {
namespace core {
namespace runtime {

const std::string DEVICE_INFO_DELIM = "%";

typedef enum { ID_IDX = 0, SM_MAJOR_IDX, SM_MINOR_IDX, DEVICE_TYPE_IDX, DEVICE_NAME_IDX } SerializedDeviceInfoIndex;

CudaDevice::CudaDevice() : id{-1}, major{-1}, minor{-1}, device_type{nvinfer1::DeviceType::kGPU} {}

CudaDevice::CudaDevice(int64_t gpu_id, nvinfer1::DeviceType device_type) {
CudaDevice cuda_device;
cudaDeviceProp device_prop;

// Device ID
this->id = gpu_id;

// Get Device Properties
cudaGetDeviceProperties(&device_prop, gpu_id);

// Compute capability major version
this->major = device_prop.major;

// Compute capability minor version
this->minor = device_prop.minor;

std::string device_name(device_prop.name);

// Set Device name
this->device_name = device_name;

// Set Device Type
this->device_type = device_type;
}

// NOTE: Serialization Format for Device Info:
// id%major%minor%(enum)device_type%device_name

CudaDevice::CudaDevice(std::string device_info) {
LOG_DEBUG("Deserializing Device Info: " << device_info);

std::vector<std::string> tokens;
int64_t start = 0;
int64_t end = device_info.find(DEVICE_INFO_DELIM);

while (end != -1) {
tokens.push_back(device_info.substr(start, end - start));
start = end + DEVICE_INFO_DELIM.size();
end = device_info.find(DEVICE_INFO_DELIM, start);
}
tokens.push_back(device_info.substr(start, end - start));

TRTORCH_CHECK(tokens.size() == DEVICE_NAME_IDX + 1, "Unable to deserializable program target device infomation");

id = std::stoi(tokens[ID_IDX]);
major = std::stoi(tokens[SM_MAJOR_IDX]);
minor = std::stoi(tokens[SM_MINOR_IDX]);
device_type = (nvinfer1::DeviceType)(std::stoi(tokens[DEVICE_TYPE_IDX]));
device_name = tokens[DEVICE_NAME_IDX];

LOG_DEBUG("Deserialized Device Info: " << *this);
}

std::string CudaDevice::serialize() {
std::vector<std::string> content;
content.resize(DEVICE_NAME_IDX + 1);

content[ID_IDX] = std::to_string(id);
content[SM_MAJOR_IDX] = std::to_string(major);
content[SM_MINOR_IDX] = std::to_string(minor);
content[DEVICE_TYPE_IDX] = std::to_string((int64_t)device_type);
content[DEVICE_NAME_IDX] = device_name;

std::stringstream ss;
for (size_t i = 0; i < content.size() - 1; i++) {
ss << content[i] << DEVICE_INFO_DELIM;
}
ss << content[DEVICE_NAME_IDX];

std::string serialized_device_info = ss.str();

LOG_DEBUG("Serialized Device Info: " << serialized_device_info);

return serialized_device_info;
}

std::string CudaDevice::getSMCapability() const {
std::stringstream ss;
ss << major << "." << minor;
return ss.str();
}

std::ostream& operator<<(std::ostream& os, const CudaDevice& device) {
os << "Device(ID: " << device.id << ", Name: " << device.device_name << ", SM Capability: " << device.major << '.'
<< device.minor << ", Type: " << device.device_type << ')';
return os;
}

} // namespace runtime
} // namespace core
} // namespace trtorch
48 changes: 48 additions & 0 deletions core/runtime/DeviceList.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include "cuda_runtime.h"

#include "core/runtime/runtime.h"
#include "core/util/prelude.h"

namespace trtorch {
namespace core {
namespace runtime {

DeviceList::DeviceList() {
int num_devices = 0;
auto status = cudaGetDeviceCount(&num_devices);
if (status != cudaSuccess) {
LOG_WARNING("Unable to read CUDA capable devices. Return status: " << status);
}

for (int i = 0; i < num_devices; i++) {
device_list[i] = CudaDevice(i, nvinfer1::DeviceType::kGPU);
}

// REVIEW: DO WE CARE ABOUT DLA?

LOG_DEBUG("Runtime:\n Available CUDA Devices: \n" << this->dump_list());
}

void DeviceList::insert(int device_id, CudaDevice cuda_device) {
device_list[device_id] = cuda_device;
}

CudaDevice DeviceList::find(int device_id) {
return device_list[device_id];
}

DeviceList::DeviceMap DeviceList::get_devices() {
return device_list;
}

std::string DeviceList::dump_list() {
std::stringstream ss;
for (auto it = device_list.begin(); it != device_list.end(); ++it) {
ss << " " << it->second << std::endl;
}
return ss.str();
}

} // namespace runtime
} // namespace core
} // namespace trtorch
55 changes: 46 additions & 9 deletions core/runtime/TRTEngine.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <algorithm>

#include <cuda_runtime.h>
#include "NvInfer.h"
#include "torch/csrc/jit/frontend/function_schema_parser.h"

Expand All @@ -10,30 +11,55 @@ namespace trtorch {
namespace core {
namespace runtime {

typedef enum { ABI_TARGET_IDX = 0, DEVICE_IDX, ENGINE_IDX } SerializedInfoIndex;

std::string slugify(std::string s) {
std::replace(s.begin(), s.end(), '.', '_');
return s;
}

TRTEngine::TRTEngine(std::string serialized_engine)
TRTEngine::TRTEngine(std::string serialized_engine, CudaDevice cuda_device)
: logger(
std::string("[] - "),
util::logging::get_logger().get_reportable_severity(),
util::logging::get_logger().get_is_colored_output_on()) {
std::string _name = "deserialized_trt";
new (this) TRTEngine(_name, serialized_engine);
new (this) TRTEngine(_name, serialized_engine, cuda_device);
}

TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine)
TRTEngine::TRTEngine(std::vector<std::string> serialized_info)
: logger(
std::string("[] = "),
util::logging::get_logger().get_reportable_severity(),
util::logging::get_logger().get_is_colored_output_on()) {
TRTORCH_CHECK(
serialized_info.size() == ENGINE_IDX + 1, "Program to be deserialized targets an incompatible TRTorch ABI");
TRTORCH_CHECK(
serialized_info[ABI_TARGET_IDX] == ABI_VERSION,
"Program to be deserialized targets a different TRTorch ABI Version ("
<< serialized_info[ABI_TARGET_IDX] << ") than the TRTorch Runtime ABI (" << ABI_VERSION << ")");
std::string _name = "deserialized_trt";
std::string engine_info = serialized_info[ENGINE_IDX];

CudaDevice cuda_device = deserialize_device(serialized_info[DEVICE_IDX]);
new (this) TRTEngine(_name, engine_info, cuda_device);
}

TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine, CudaDevice cuda_device)
: logger(
std::string("[") + mod_name + std::string("_engine] - "),
util::logging::get_logger().get_reportable_severity(),
util::logging::get_logger().get_is_colored_output_on()) {
device_info = cuda_device;
set_cuda_device(device_info);

rt = nvinfer1::createInferRuntime(logger);

name = slugify(mod_name) + "_engine";

cuda_engine = rt->deserializeCudaEngine(serialized_engine.c_str(), serialized_engine.size());
TRTORCH_CHECK((cuda_engine != nullptr), "Unable to deserialize the TensorRT engine");

// Easy way to get a unique name for each engine, maybe there is a more
// descriptive way (using something associated with the graph maybe)
id = reinterpret_cast<EngineID>(cuda_engine);
Expand Down Expand Up @@ -63,6 +89,7 @@ TRTEngine& TRTEngine::operator=(const TRTEngine& other) {
id = other.id;
rt = other.rt;
cuda_engine = other.cuda_engine;
device_info = other.device_info;
exec_ctx = other.exec_ctx;
num_io = other.num_io;
return (*this);
Expand All @@ -85,18 +112,28 @@ TRTEngine::~TRTEngine() {
namespace {
static auto TRTORCH_UNUSED TRTEngineTSRegistrtion =
torch::class_<TRTEngine>("tensorrt", "Engine")
.def(torch::init<std::string>())
.def(torch::init<std::vector<std::string>>())
// TODO: .def("__call__", &TRTEngine::Run)
// TODO: .def("run", &TRTEngine::Run)
.def_pickle(
[](const c10::intrusive_ptr<TRTEngine>& self) -> std::string {
auto serialized_engine = self->cuda_engine->serialize();
return std::string((const char*)serialized_engine->data(), serialized_engine->size());
[](const c10::intrusive_ptr<TRTEngine>& self) -> std::vector<std::string> {
// Serialize TensorRT engine
auto serialized_trt_engine = self->cuda_engine->serialize();

// Adding device info related meta data to the serialized file
auto trt_engine = std::string((const char*)serialized_trt_engine->data(), serialized_trt_engine->size());

std::vector<std::string> serialize_info;
serialize_info.push_back(ABI_VERSION);
serialize_info.push_back(serialize_device(self->device_info));
serialize_info.push_back(trt_engine);
return serialize_info;
},
[](std::string seralized_engine) -> c10::intrusive_ptr<TRTEngine> {
return c10::make_intrusive<TRTEngine>(std::move(seralized_engine));
[](std::vector<std::string> seralized_info) -> c10::intrusive_ptr<TRTEngine> {
return c10::make_intrusive<TRTEngine>(std::move(seralized_info));
});
} // namespace

} // namespace runtime
} // namespace core
} // namespace trtorch
Loading

0 comments on commit aaddaf1

Please sign in to comment.