Skip to content

Commit

Permalink
Improve memory leak
Browse files Browse the repository at this point in the history
  • Loading branch information
jhalakpatel committed Nov 9, 2024
1 parent 6e647ca commit 9334481
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 59 deletions.
14 changes: 7 additions & 7 deletions mlir-tensorrt/executor/include/mlir-executor/Runtime/API/API.h
Original file line number Diff line number Diff line change
Expand Up @@ -795,11 +795,11 @@ class AllocTracker {
/// Returns true if the ptr is released internally.
bool isReleasedInternally(uintptr_t ptr) const;

/// Set the pointer is allocated by TensorRT.
void setTensorRTAllocated(uintptr_t ptr);
/// Mark pointer for release after consumption
void markForReleaseAfterConsumption(uintptr_t ptr);

/// Get that pointer is allocated by TensorRT.
bool getTensorRTAllocated(uintptr_t ptr);
/// Check if pointer is marked for release after consumption
bool isMarkedForReleaseAfterConsumption(uintptr_t ptr);

private:
struct Metadata {
Expand All @@ -808,7 +808,7 @@ class AllocTracker {
// if this is true then it should be truelly released and untracked
// when decrementExternalCount causes count to go to zero
bool releasedInternally{false};
bool tensorrtAllocated{false};
bool releaseAfterConsumption{false};
PointerInfo info;
};

Expand Down Expand Up @@ -877,7 +877,7 @@ class RuntimeSession {

ExecutableView getExecutable() const { return executable; }

PinnedMemoryAllocator &getPinnedMemorAllocator() {
PinnedMemoryAllocator &getPinnedMemoryAllocator() {
return *pinnedMemoryAllocator;
}

Expand Down Expand Up @@ -975,7 +975,7 @@ class RuntimeClient {
ResourceTracker &getResourceTracker() { return resourceTracker; }

/// Return the PinnedMemoryAllocator.
PinnedMemoryAllocator &getPinnedMemorAllocator() {
PinnedMemoryAllocator &getPinnedMemoryAllocator() {
return pinnedMemoryAllocator;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ class PinnedMemoryAllocator {
PinnedMemoryAllocator();
~PinnedMemoryAllocator();

/// Untracks
/// Marks a pointer as client-managed, deferring its deallocation
/// This method is used when a pinned memory pointer is returned to the client
/// and its lifecycle is no longer managed by the PinnedMemoryAllocator.
/// Pointers marked this way will not be automatically freed in the
/// allocator's destructor.
void untrack(uintptr_t ptr);

StatusOr<PinnedMemoryBlock> allocate(size_t size);
Expand All @@ -117,8 +121,8 @@ class PinnedMemoryAllocator {
private:
EventPool eventPool;

/// Tracks all the pointers which need not to freed up.
static std::vector<uintptr_t> untrackedPtrs;
/// Stores pointers to memory blocks that are now managed by the client.
static std::vector<uintptr_t> clientManagedPtrs;

/// Tracks all blocks allocated by the allocator.
struct BlockTracker;
Expand Down
7 changes: 4 additions & 3 deletions mlir-tensorrt/executor/lib/CAPI/Runtime/Runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "mlir-executor/Runtime/API/API.h"
#include "mlir-executor/Runtime/API/ExecutableFlatbuffer.h"
#include "mlir-executor/Runtime/Backend/Lua/LuaRuntime.h"
#include "mlir-executor/Runtime/Support/Support.h"
#include "mlir-executor/Support/Status.h"
#include "mlir/Support/FileUtilities.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -324,9 +325,9 @@ MTRT_Status mtrtMemRefCreateExternal(

MTRT_Status mtrtMemRefValueDestroyAsync(MTRT_MemRefValue buffer,
MTRT_Stream stream) {

MemRefValue *memref = unwrap(buffer);
llvm::dbgs() << "[MLIR-TRT] Deallocating memref pointer " << memref->getMemory() << "\n";
MTRT_DBGF("destroying memref pointer 0x%lx asynchronously",
memref->getMemory());
Status s = memref->getClient()->deallocate(
std::unique_ptr<MemRefValue>(memref),
mtrtStreamIsNull(stream) ? std::nullopt
Expand All @@ -338,7 +339,7 @@ MTRT_Status mtrtMemRefValueDestroyAsync(MTRT_MemRefValue buffer,

MTRT_Status mtrtMemRefValueDestroy(MTRT_MemRefValue buffer) {
MemRefValue *memref = unwrap(buffer);
llvm::dbgs() << "[MLIR-TRT] Deallocating memref pointer " << memref->getMemory() << "\n";
MTRT_DBGF("destroying memref pointer 0x%lx", memref->getMemory());
Status s =
memref->getClient()->deallocate(std::unique_ptr<MemRefValue>(memref));
if (!s.isOk())
Expand Down
14 changes: 7 additions & 7 deletions mlir-tensorrt/executor/lib/Runtime/API/API.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,18 +396,18 @@ AllocTracker::~AllocTracker() {
MTRT_DBGF("freed %zu bytes of unfreed memory", totalSize);
}

void AllocTracker::setTensorRTAllocated(uintptr_t ptr) {
void AllocTracker::markForReleaseAfterConsumption(uintptr_t ptr) {
assert(llvm::is_contained(map, ptr) &&
llvm::formatv("Untracked pointer {0}", ptr).str().c_str());
std::unique_ptr<Metadata> const &metadata = map.at(ptr);
metadata->tensorrtAllocated = true;
metadata->releaseAfterConsumption = true;
}

bool AllocTracker::getTensorRTAllocated(uintptr_t ptr) {
bool AllocTracker::isMarkedForReleaseAfterConsumption(uintptr_t ptr) {
assert(llvm::is_contained(map, ptr) &&
llvm::formatv("Untracked pointer {0}", ptr).str().c_str());
std::unique_ptr<Metadata> const &metadata = map.at(ptr);
return metadata->tensorrtAllocated;
return metadata->releaseAfterConsumption;
}

void AllocTracker::markReleasedInternally(uintptr_t ptr) {
Expand Down Expand Up @@ -486,8 +486,8 @@ void AllocTracker::track(PointerInfo info) {
auto value = std::make_unique<Metadata>();
value->externalReferenceCount.store(0);
value->releasedInternally = false;
value->releaseAfterConsumption = false;
value->info = info;
value->tensorrtAllocated = false;
if (!contains(info.ptr)) {
map.insert(std::make_pair(info.ptr, std::move(value)));
return;
Expand Down Expand Up @@ -1001,7 +1001,7 @@ RuntimeClient::copyToDevice(const MemRefValue &hostBufferImpl,
// TODO: Currently, this implementation supports only row major packed
// canonical layout (no padding).
StatusOr<mlirtrt::PinnedMemoryBlock> pinnedMemory =
this->getPinnedMemorAllocator().allocate(totalBufferSize);
this->getPinnedMemoryAllocator().allocate(totalBufferSize);
if (!pinnedMemory.isOk())
return pinnedMemory.getStatus();

Expand All @@ -1020,7 +1020,7 @@ RuntimeClient::copyToDevice(const MemRefValue &hostBufferImpl,
reinterpret_cast<cudaStream_t>(*cudaStream)));

// Free pinned host memory asynchronously.
getPinnedMemorAllocator().freeAsync(pinnedMemory->ptr, *cudaStream);
getPinnedMemoryAllocator().freeAsync(pinnedMemory->ptr, *cudaStream);
} else {
MTRT_DBG("synchronously copying {0} (host) to {1} (device), size={2} bytes",
hostBufferImpl.getVoidPtr(), (*deviceMemRef)->getVoidPtr(),
Expand Down
20 changes: 10 additions & 10 deletions mlir-tensorrt/executor/lib/Runtime/Backend/Lua/LuaRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ LuaRuntimeSession::create(RuntimeSessionOptions options,

// Register builtin methods.
registerLuaRuntimeMethods(lua.lua_state(), session->getOptions(),
&session->getPinnedMemorAllocator(),
&session->getPinnedMemoryAllocator(),
&session->getAllocTracker(),
&session->getResourceTracker());

Expand Down Expand Up @@ -624,22 +624,22 @@ parseResults(const sol::protected_function_result &pfr,
"This ptr is registered with the session and will now be tracked "
"by the client as well.",
allocPtr, static_cast<void *>(&session.getAllocTracker()),
static_cast<void *>(&session.getPinnedMemorAllocator()),
static_cast<void *>(&session.getPinnedMemoryAllocator()),
static_cast<void *>(*client),
static_cast<void *>(&(*client)->getAllocTracker()));

// We need here actually is to "release" the pointer from the session
// ownership and have the client assume
PointerInfo info = session.getAllocTracker().get(allocPtr);
session.getAllocTracker().untrack(info.ptr);

// It is possible that pinned memory also tracks the memory for
// deallocation.
session.getPinnedMemorAllocator().untrack(info.ptr);

AllocTracker &allocator = (*client)->getAllocTracker();
// if (!allocator.contains(info.ptr))
allocator.track(info);
(*client)->getAllocTracker().track(info);

// Defer deallocation of this pinned memory pointer
// This pointer is likely still in use by the client and should not be
// immediately freed. By untracking it here, we ensure it won't be
// deallocated in the PinnedMemoryAllocator's destructor, allowing
// the client to manage its lifecycle.
session.getPinnedMemoryAllocator().untrack(info.ptr);

// Create a memref so that client now tracks it.
auto memref = MemRefValue::create(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,12 +432,14 @@ registerCudaMemoryManagementOps(sol::state_view &lua,
cudaMemcpyDeviceToHost,
stream),
state);
if (allocTracker->getTensorRTAllocated(
reinterpret_cast<uintptr_t>(srcPtr))) {
// Free tensorrt allocate source pointer, since there it won't be
// released by external memref.
SET_LUA_ERROR_IF_CUDART_ERROR(cudaFreeAsync(srcPtr, stream), state);
allocTracker->untrack(reinterpret_cast<uintptr_t>(srcPtr));
// Check if the source pointer is marked for release after consumption
if (allocTracker->isMarkedForReleaseAfterConsumption(src)) {
// This pointer was allocated by TensorRT and used in a device-device
// or device-host copy operation. It's not wrapped in a memref, so it
// won't be released by external memref destruction. We need to
// explicitly free it.
SET_LUA_ERROR_IF_ERROR(runtime::safeDeallocate(*allocTracker, src),
state);
}
};

Expand Down Expand Up @@ -487,12 +489,14 @@ registerCudaMemoryManagementOps(sol::state_view &lua,
cudaMemcpyDeviceToHost,
stream),
state);
if (allocTracker->getTensorRTAllocated(
reinterpret_cast<uintptr_t>(srcPtr))) {
// Free tensorrt allocate source pointer, since there it won't be
// released by external memref.
SET_LUA_ERROR_IF_CUDART_ERROR(cudaFreeAsync(srcPtr, stream), state);
allocTracker->untrack(reinterpret_cast<uintptr_t>(srcPtr));
// Check if the source pointer is marked for release after consumption
if (allocTracker->isMarkedForReleaseAfterConsumption(src)) {
// This pointer was allocated by TensorRT and used in a device-device
// or device-host copy operation. It's not wrapped in a memref, so it
// won't be released by external memref destruction. We need to
// explicitly free it.
SET_LUA_ERROR_IF_ERROR(runtime::safeDeallocate(*allocTracker, src),
state);
}
};
lua["__cuda_memcpy_device2device"] = [allocTracker](
Expand All @@ -518,12 +522,14 @@ registerCudaMemoryManagementOps(sol::state_view &lua,
cudaMemcpyDeviceToDevice,
stream),
state);
if (allocTracker->getTensorRTAllocated(
reinterpret_cast<uintptr_t>(srcPtr))) {
// Free tensorrt allocate source pointer, since there it won't be
// released by external memref.
SET_LUA_ERROR_IF_CUDART_ERROR(cudaFreeAsync(srcPtr, stream), state);
allocTracker->untrack(reinterpret_cast<uintptr_t>(srcPtr));
// Check if the source pointer is marked for release after consumption
if (allocTracker->isMarkedForReleaseAfterConsumption(src)) {
// This pointer was allocated by TensorRT and used in a device-device
// or device-host copy operation. It's not wrapped in a memref, so it
// won't be released by external memref destruction. We need to
// explicitly free it.
SET_LUA_ERROR_IF_ERROR(runtime::safeDeallocate(*allocTracker, src),
state);
}
return;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,12 @@ class OutputAllocatorImpl : public nvinfer1::IOutputAllocator {
if (memory.isOk()) {
mOutputPtr = (*memory).ptr;
mOutputSize = memory->size;
mTracker->setTensorRTAllocated(memory->ptr);
// Mark the output pointer for release after consumption
// This is necessary because TensorRT-allocated pointers used in device-device
// or device-host copies may not be wrapped in a memref and tracked by the client.
// By marking it here, we ensure it will be explicitly freed after it's consumed
// in copy operations, preventing memory leaks.
mTracker->markForReleaseAfterConsumption(mOutputPtr);
MTRT_DBGF(
"tensorrt module output allocator allocating %lu bytes at 0x%lx",
mOutputSize, mOutputPtr);
Expand Down
18 changes: 10 additions & 8 deletions mlir-tensorrt/executor/lib/Support/Allocators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ static void cudaFreeHostWrapper(uintptr_t ptr) {
#endif
}

std::vector<uintptr_t> PinnedMemoryAllocator::untrackedPtrs;
std::vector<uintptr_t> PinnedMemoryAllocator::clientManagedPtrs;

struct PinnedMemoryAllocator::BlockTracker {
std::set<Block *, BlockComparison> blocks;
Expand All @@ -218,13 +218,14 @@ struct PinnedMemoryAllocator::BlockTracker {
"[PinnedMemoryAllocator] Releasing block tracker that has %lu blocks",
blocks.size());
for (Block *block : blocks) {
if (std::find(PinnedMemoryAllocator::untrackedPtrs.begin(),
PinnedMemoryAllocator::untrackedPtrs.end(),
block->ptr) == PinnedMemoryAllocator::untrackedPtrs.end()) {
if (std::find(clientManagedPtrs.begin(), clientManagedPtrs.end(),
block->ptr) == clientManagedPtrs.end()) {
ALLOC_DBGF("[PinnedMemoryAllocator] releasing block %lu of size %lu",
block->ptr, block->size);
cudaFreeHostWrapper(block->ptr);
}
// Blocks found in clientManagedPtrs are not freed here, as they are now
// managed by the client
}
}
};
Expand All @@ -251,7 +252,7 @@ StatusOr<PinnedMemoryBlock> PinnedMemoryAllocator::allocate(size_t size) {
if (lowerBound != freeBlocks->set.end()) {
Block *result = *lowerBound;
freeBlocks->set.erase(result);
ALLOC_DBGF("re-using block %lu of size %lu", result->ptr, result->size);
ALLOC_DBGF("re-using block %lx of size %lu", result->ptr, result->size);
return PinnedMemoryBlock{result->ptr, result->size};
}

Expand All @@ -275,10 +276,11 @@ StatusOr<PinnedMemoryBlock> PinnedMemoryAllocator::allocate(size_t size) {
#endif
}

// Free the given block.
std::vector<uintptr_t> clientManagedPtrs;

void PinnedMemoryAllocator::untrack(uintptr_t ptr) {
if (!llvm::is_contained(untrackedPtrs, ptr)) {
untrackedPtrs.emplace_back(ptr);
if (!llvm::is_contained(clientManagedPtrs, ptr)) {
clientManagedPtrs.emplace_back(ptr);
}
}

Expand Down
2 changes: 0 additions & 2 deletions mlir-tensorrt/python/bindings/Runtime/RuntimePyBind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,6 @@ static std::unique_ptr<PyMemRefValue>
createMemRefViewFromDLPack(PyRuntimeClient &client, py::capsule capsule,
std::optional<bool> assertCanonicalStrides) {

llvm::dbgs() << "Creating a memref view from DL pack tensors\n";

DLManagedTensor *managedTensor = static_cast<DLManagedTensor *>(
PyCapsule_GetPointer(capsule.ptr(), "dltensor"));

Expand Down

0 comments on commit 9334481

Please sign in to comment.