Skip to content

Commit

Permalink
Improve memory leak
Browse files Browse the repository at this point in the history
  • Loading branch information
jhalakpatel committed Nov 9, 2024
1 parent 6e647ca commit 7b25b15
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -795,12 +795,6 @@ class AllocTracker {
/// Returns true if the ptr is released internally.
bool isReleasedInternally(uintptr_t ptr) const;

/// Set the pointer is allocated by TensorRT.
void setTensorRTAllocated(uintptr_t ptr);

/// Get that pointer is allocated by TensorRT.
bool getTensorRTAllocated(uintptr_t ptr);

private:
struct Metadata {
std::atomic<int32_t> externalReferenceCount = {0};
Expand Down
7 changes: 4 additions & 3 deletions mlir-tensorrt/executor/lib/CAPI/Runtime/Runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "mlir-executor/Runtime/API/API.h"
#include "mlir-executor/Runtime/API/ExecutableFlatbuffer.h"
#include "mlir-executor/Runtime/Backend/Lua/LuaRuntime.h"
#include "mlir-executor/Runtime/Support/Support.h"
#include "mlir-executor/Support/Status.h"
#include "mlir/Support/FileUtilities.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -324,9 +325,9 @@ MTRT_Status mtrtMemRefCreateExternal(

MTRT_Status mtrtMemRefValueDestroyAsync(MTRT_MemRefValue buffer,
MTRT_Stream stream) {

MemRefValue *memref = unwrap(buffer);
llvm::dbgs() << "[MLIR-TRT] Deallocating memref pointer " << memref->getMemory() << "\n";
MTRT_DBGF("destroying memref pointer 0x%lx asynchronously",
memref->getMemory());
Status s = memref->getClient()->deallocate(
std::unique_ptr<MemRefValue>(memref),
mtrtStreamIsNull(stream) ? std::nullopt
Expand All @@ -338,7 +339,7 @@ MTRT_Status mtrtMemRefValueDestroyAsync(MTRT_MemRefValue buffer,

MTRT_Status mtrtMemRefValueDestroy(MTRT_MemRefValue buffer) {
MemRefValue *memref = unwrap(buffer);
llvm::dbgs() << "[MLIR-TRT] Deallocating memref pointer " << memref->getMemory() << "\n";
MTRT_DBGF("destroying memref pointer 0x%lx", memref->getMemory());
Status s =
memref->getClient()->deallocate(std::unique_ptr<MemRefValue>(memref));
if (!s.isOk())
Expand Down
14 changes: 0 additions & 14 deletions mlir-tensorrt/executor/lib/Runtime/API/API.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,20 +396,6 @@ AllocTracker::~AllocTracker() {
MTRT_DBGF("freed %zu bytes of unfreed memory", totalSize);
}

void AllocTracker::setTensorRTAllocated(uintptr_t ptr) {
assert(llvm::is_contained(map, ptr) &&
llvm::formatv("Untracked pointer {0}", ptr).str().c_str());
std::unique_ptr<Metadata> const &metadata = map.at(ptr);
metadata->tensorrtAllocated = true;
}

bool AllocTracker::getTensorRTAllocated(uintptr_t ptr) {
assert(llvm::is_contained(map, ptr) &&
llvm::formatv("Untracked pointer {0}", ptr).str().c_str());
std::unique_ptr<Metadata> const &metadata = map.at(ptr);
return metadata->tensorrtAllocated;
}

void AllocTracker::markReleasedInternally(uintptr_t ptr) {
assert(llvm::is_contained(map, ptr) &&
llvm::formatv("Untracked pointer {0}", ptr).str().c_str());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,13 +432,9 @@ registerCudaMemoryManagementOps(sol::state_view &lua,
cudaMemcpyDeviceToHost,
stream),
state);
if (allocTracker->getTensorRTAllocated(
reinterpret_cast<uintptr_t>(srcPtr))) {
// Free tensorrt allocate source pointer, since there it won't be
// released by external memref.
SET_LUA_ERROR_IF_CUDART_ERROR(cudaFreeAsync(srcPtr, stream), state);
allocTracker->untrack(reinterpret_cast<uintptr_t>(srcPtr));
}
if (allocTracker->get(src).isInternallyManaged() &&
allocTracker->getExternalReferenceCount(src))
allocTracker->markReleasedInternally(src);
};

lua["__cuda_memcpy_host_pinned2device"] =
Expand Down Expand Up @@ -487,13 +483,9 @@ registerCudaMemoryManagementOps(sol::state_view &lua,
cudaMemcpyDeviceToHost,
stream),
state);
if (allocTracker->getTensorRTAllocated(
reinterpret_cast<uintptr_t>(srcPtr))) {
// Free tensorrt allocate source pointer, since there it won't be
// released by external memref.
SET_LUA_ERROR_IF_CUDART_ERROR(cudaFreeAsync(srcPtr, stream), state);
allocTracker->untrack(reinterpret_cast<uintptr_t>(srcPtr));
}
if (allocTracker->get(src).isInternallyManaged() &&
allocTracker->getExternalReferenceCount(src))
allocTracker->markReleasedInternally(src);
};
lua["__cuda_memcpy_device2device"] = [allocTracker](
sol::this_state state,
Expand All @@ -518,13 +510,9 @@ registerCudaMemoryManagementOps(sol::state_view &lua,
cudaMemcpyDeviceToDevice,
stream),
state);
if (allocTracker->getTensorRTAllocated(
reinterpret_cast<uintptr_t>(srcPtr))) {
// Free tensorrt allocate source pointer, since there it won't be
// released by external memref.
SET_LUA_ERROR_IF_CUDART_ERROR(cudaFreeAsync(srcPtr, stream), state);
allocTracker->untrack(reinterpret_cast<uintptr_t>(srcPtr));
}
if (allocTracker->get(src).isInternallyManaged() &&
allocTracker->getExternalReferenceCount(src))
allocTracker->markReleasedInternally(src);
return;
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ class OutputAllocatorImpl : public nvinfer1::IOutputAllocator {
if (memory.isOk()) {
mOutputPtr = (*memory).ptr;
mOutputSize = memory->size;
mTracker->setTensorRTAllocated(memory->ptr);
MTRT_DBGF(
"tensorrt module output allocator allocating %lu bytes at 0x%lx",
mOutputSize, mOutputPtr);
Expand Down
2 changes: 0 additions & 2 deletions mlir-tensorrt/python/bindings/Runtime/RuntimePyBind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,6 @@ static std::unique_ptr<PyMemRefValue>
createMemRefViewFromDLPack(PyRuntimeClient &client, py::capsule capsule,
std::optional<bool> assertCanonicalStrides) {

llvm::dbgs() << "Creating a memref view from DL pack tensors\n";

DLManagedTensor *managedTensor = static_cast<DLManagedTensor *>(
PyCapsule_GetPointer(capsule.ptr(), "dltensor"));

Expand Down

0 comments on commit 7b25b15

Please sign in to comment.