Skip to content

Commit

Permalink
[MNN:Bugfix] Fix bug for resize opt bug, support llama3 8b
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaying committed Apr 23, 2024
1 parent b0565d3 commit e00553c
Show file tree
Hide file tree
Showing 10 changed files with 75 additions and 40 deletions.
13 changes: 12 additions & 1 deletion llm/include/llm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,18 @@ class Llama2_7b : public Llm {
virtual VARP gen_position_ids(int seq_len) override;
virtual bool is_stop(int token_id) override;
};

class Llama3_8b : public Llama2_7b {
public:
Llama3_8b() {
model_name_ = "Llama3_8b";
layer_nums_ = 32;
key_value_shape_ = {2, 1, 8, 0, 128};
hidden_size_ = 4096;
}
private:
virtual std::vector<int> tokenizer(const std::string& query) override;
virtual bool is_stop(int token_id) override;
};
class Qwen2 : public Llama2_7b {
public:
Qwen2() {
Expand Down
17 changes: 17 additions & 0 deletions llm/src/llm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include <MNN/expr/ExecutorScope.hpp>
#include <MNN/AutoTime.hpp>
#include "cpp/ExprDebug.hpp"
#include "llm.hpp"
#include "tokenizer.hpp"

Expand Down Expand Up @@ -86,6 +87,9 @@ Llm* Llm::createLLM(const std::string& path, std::string model_type, int forward
} else if (model_type.find("yi") != std::string::npos) {
llm = new Yi_6b;
llm->model_name_ = "Yi_6b";
} else if (model_type.find("llama3") != std::string::npos) {
llm = new Llama3_8b;
llm->model_name_ = "Llama3_8b";
}
if (!llm) {
std::cerr << "model type can't judge!" << std::endl;
Expand Down Expand Up @@ -229,6 +233,8 @@ void Llm::load(const std::string& model_dir) {
config.backendConfig = &cpuBackendConfig;
runtime_manager_.reset(Executor::RuntimeManager::createRuntimeManager(config));
runtime_manager_->setHint(MNN::Interpreter::MEM_ALLOCATOR_TYPE, 0);
// runtime_manager_->setMode(MNN::Interpreter::Session_Debug);
// _initTensorStatic();
{
runtime_manager_->setCache(".tempcache");
}
Expand Down Expand Up @@ -801,6 +807,17 @@ std::vector<int> Yi_6b::tokenizer(const std::string& query) {
bool Yi_6b::is_stop(int token_id) {
return token_id == 7 || token_id == 64001;
}
std::vector<int> Llama3_8b::tokenizer(const std::string& query) {
// <|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n+query+<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n
auto ids = tokenizer_encode(query);
ids.insert(ids.begin(), {128000, 128006, 882, 128007, 271});
ids.insert(ids.end(), {128009, 128006, 78191, 128007, 271});
return ids;
}

bool Llama3_8b::is_stop(int token_id) {
return token_id == 128001 || token_id == 128009;
}
// Llm end

// Embedding start
Expand Down
32 changes: 21 additions & 11 deletions source/core/Pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -706,16 +706,26 @@ static void _makeCopyOp(std::shared_ptr<BufferStorage>& copyOp) {
copyOp->storage = builder.ReleaseRaw(copyOp->allocated_size, copyOp->offset);
}
}
static ErrorCode _InsertCopy(Schedule::PipelineInfo& mInfo, std::map<Tensor*, std::shared_ptr<Tensor>>& mCacheConstTensors, std::map<std::pair<Tensor*, Backend*>, std::shared_ptr<Tensor>>& shapeFixConstCache, bool ownInput, bool permitCodegen) {
static ErrorCode _InsertCopy(Schedule::PipelineInfo& mInfo, std::map<Tensor*, std::shared_ptr<Tensor>>& mCacheConstTensors, Pipeline::WrapTensorCache& shapeFixConstCache, bool ownInput, bool permitCodegen) {
std::shared_ptr<BufferStorage> copyOp;
for (auto& iter : shapeFixConstCache) {
auto des = TensorUtils::getDescribe(iter.second.get());
if (des->usage == Tensor::InsideDescribe::CONSTANT && des->stageMask == 0) {
// If the tensor is not compute in shape-geometry stage, needn't recopy it
for (auto iterP = shapeFixConstCache.begin(); iterP != shapeFixConstCache.end();) {
auto& iter = *iterP;
if (iter.second.first.lock() == nullptr) {
// Has released, remove cache
iterP = shapeFixConstCache.erase(iterP);
continue;
}
TensorUtils::getDescribeOrigin(iter.second.get())->setBackend(nullptr);
TensorUtils::getDescribeOrigin(iter.second.get())->mem = nullptr;
auto des = iter.first.first;
bool needReset = true;
if (des->usage == Tensor::InsideDescribe::CONSTANT && ((des->stageMask & Tensor::InsideDescribe::CONTENT_NOT_CHANGE) != 0)) {
// If the tensor is not compute in shape-geometry stage, needn't recopy it
needReset = false;
}
if (needReset) {
TensorUtils::getDescribeOrigin(iter.second.second.get())->setBackend(nullptr);
TensorUtils::getDescribeOrigin(iter.second.second.get())->mem = nullptr;
}
iterP++;
}
for (auto& info : mInfo.second) {
if (info.type == Schedule::CONSTANT) {
Expand Down Expand Up @@ -778,12 +788,12 @@ static ErrorCode _InsertCopy(Schedule::PipelineInfo& mInfo, std::map<Tensor*, st
}
}
{
auto titer = shapeFixConstCache.find(std::make_pair(t, curBackend));
auto titer = shapeFixConstCache.find(std::make_pair(des, curBackend));
if (titer != shapeFixConstCache.end()) {
newTensor = titer->second.get();
newTensor = titer->second.second.get();
} else {
std::shared_ptr<MNN::Tensor> tensor(new Tensor);
shapeFixConstCache.insert(std::make_pair(std::make_pair(t, curBackend), tensor));
shapeFixConstCache.insert(std::make_pair(std::make_pair(des, curBackend), std::make_pair(std::weak_ptr<Tensor::InsideDescribe::NativeInsideDescribe>(TensorUtils::getDescribeOrigin(t)->mContent), tensor)));
newTensor = tensor.get();
}
iter.workInputs[v] = newTensor;
Expand Down Expand Up @@ -1067,7 +1077,7 @@ ErrorCode Pipeline::allocMemory(bool firstMalloc, bool forbidReplace) {
}
auto des = TensorUtils::getDescribe(t);
auto usage = des->usage;
if (TensorUtils::getDescribeOrigin(t)->mContent->count() > 1 && usage != Tensor::InsideDescribe::CONSTANT) {
if (TensorUtils::getDescribeOrigin(t)->mContent.use_count() > 1 && usage != Tensor::InsideDescribe::CONSTANT) {
TensorUtils::getDescribeOrigin(t)->mem = nullptr;
auto res = TensorUtils::getDescribeOrigin(t)->getBackend()->onAcquireBuffer(t, Backend::STATIC);
if (!res) {
Expand Down
3 changes: 2 additions & 1 deletion source/core/Pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class Pipeline : public NonCopyable {
MNNForwardType getMainForwardType() const {
return mInfo.first.cache.first->type();
}
typedef std::map<std::pair<Tensor::InsideDescribe::NativeInsideDescribe*, Backend*>, std::pair<std::weak_ptr<Tensor::InsideDescribe::NativeInsideDescribe>, std::shared_ptr<Tensor>>> WrapTensorCache;
private:
ErrorCode _allocForTensor(int index, bool allocInput);
void _copyInputs();
Expand All @@ -76,7 +77,7 @@ class Pipeline : public NonCopyable {

// For gpu or other backend
std::map<Tensor*, std::shared_ptr<Tensor>> mCacheConstTensors;
std::map<std::pair<Tensor*, Backend*>, std::shared_ptr<Tensor>> mWrapTensors;
WrapTensorCache mWrapTensors;
#ifndef MNN_BUILD_MINI
GeometryComputer::Context mContext;
Runtime::CompilerType mUseGeometry;
Expand Down
3 changes: 3 additions & 0 deletions source/core/Schedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ bool Schedule::OpResizeCache::match(const std::vector<Tensor*>& inputs) {
void Schedule::OpResizeCache::open() {
mCanCache = true;
}
void Schedule::OpResizeCache::copyImmutable(const OpResizeCache& cache) {
mNeedCompareContent = cache.mNeedCompareContent;
}

void Schedule::OpResizeCache::insert(const std::vector<Tensor*>& inputs) {
if (!mCanCache) {
Expand Down
1 change: 1 addition & 0 deletions source/core/Schedule.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class MNN_PUBLIC Schedule {
bool needComputeShape = true;
bool needExecuteConst = false;
void addContentIndex(int index);
void copyImmutable(const OpResizeCache& cache);
private:
struct ShapeInfo {
int order;
Expand Down
1 change: 1 addition & 0 deletions source/core/Session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ Session* Session::clone(RuntimeInfo&& runtime, std::shared_ptr<Schedule::Schedul
auto& opInfo = oplists[i];
opInfo.op = opCaches[i].op;
opInfo.type = srcOpInfo.type;
opInfo.computeCache.copyImmutable(srcOpInfo.computeCache);
auto op = opInfo.op;
if (nullptr != op->outputIndexes()) {
auto data = op->outputIndexes()->data();
Expand Down
4 changes: 2 additions & 2 deletions source/core/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace MNN {
Tensor::Tensor(int dimSize, DimensionType type) {
MNN_ASSERT(dimSize <= MNN_MAX_TENSOR_DIM);
mDescribe = new InsideDescribe;
mDescribe->mContent = new InsideDescribe::NativeInsideDescribe;
mDescribe->mContent.reset(new InsideDescribe::NativeInsideDescribe);
auto nativeDescribe = mDescribe->mContent.get();
mBuffer.dimensions = dimSize;
mBuffer.type = halide_type_of<float>();
Expand Down Expand Up @@ -49,7 +49,7 @@ Tensor::Tensor(const Tensor* tensor, DimensionType type, bool allocMemory) {

auto buffer = tensor->buffer();
mDescribe = new InsideDescribe;
mDescribe->mContent = new InsideDescribe::NativeInsideDescribe;
mDescribe->mContent.reset(new InsideDescribe::NativeInsideDescribe);
auto nativeDescribe = mDescribe->mContent.get();
mBuffer.dimensions = buffer.dimensions;
mBuffer.type = buffer.type;
Expand Down
6 changes: 3 additions & 3 deletions source/core/TensorUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ struct Tensor::InsideDescribe {
GEOMETRY_STAGE = 1,
CONVERTED_STAGE = 1 << 1,
COMPUTE_SHAPE_STAGE = 1 << 2,
COMPUTE_CONTENT_STAGE = 1 << 3,
CONTENT_NOT_CHANGE = 1 << 3,
};
/** extra tensor info container */
struct NativeInsideDescribe : public RefCount {
struct NativeInsideDescribe {
public:
/** dimension format */
MNN_DATA_FORMAT dimensionFormat = MNN_DATA_FORMAT_NC4HW4;
Expand Down Expand Up @@ -115,7 +115,7 @@ struct Tensor::InsideDescribe {
// For isMutable = false Tensor , determine whether the content can be convert to main backend
uint32_t stageMask = 0;
};
SharedPtr<NativeInsideDescribe> mContent;
std::shared_ptr<NativeInsideDescribe> mContent;
SharedPtr<Backend::MemObj> mem;
inline Backend* getBackend() const {
return backend;
Expand Down
35 changes: 13 additions & 22 deletions source/geometry/GeometryComputerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform(
auto type = des->memoryType;
MNN_ASSERT(type != Tensor::InsideDescribe::MEMORY_OUTSIDE);
MNN_ASSERT(type != Tensor::InsideDescribe::MEMORY_HOST);
if (TensorUtils::getDescribeOrigin(t)->mContent->count() > 1) {
TensorUtils::getDescribeOrigin(t)->mContent = new Tensor::InsideDescribe::NativeInsideDescribe;
if (TensorUtils::getDescribeOrigin(t)->mContent.use_count() > 1) {
TensorUtils::getDescribeOrigin(t)->mContent.reset(new Tensor::InsideDescribe::NativeInsideDescribe);
t->buffer().dim = TensorUtils::getDescribe(t)->dims;
TensorUtils::getDescribeOrigin(t)->setBackend(nullptr);
TensorUtils::getDescribeOrigin(t)->mem = nullptr;
Expand Down Expand Up @@ -210,13 +210,18 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform(
TensorUtils::getDescribe(t)->rasterCommand.reset();
TensorUtils::getDescribe(t)->stageMask |= Tensor::InsideDescribe::StageInfo::COMPUTE_SHAPE_STAGE;
// The content may be computed by geometry computer, which will not make execution
TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE);
TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE);
}
}
info.computeCache.needComputeShape = needCompute;
if (info.type != Schedule::CONSTANT) {
continue;
}
if (!needCompute) {
for (auto t : info.outputs) {
TensorUtils::getDescribe(t)->stageMask |= Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE;
}
}
if (_hasZeroShapeOutput(info)) {
continue;
}
Expand Down Expand Up @@ -292,7 +297,7 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform(
dirty = true;
break;
}
if ((des->stageMask & Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE) == 0) {
if ((des->stageMask & Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE) == 0) {
dirty = true;
break;
}
Expand All @@ -305,26 +310,12 @@ ErrorCode GeometryComputerUtils::shapeComputeAndGeometryTransform(
return NOT_SUPPORT;
}
for (auto t : c.outputs) {
TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE);
TensorUtils::getDescribe(t)->stageMask &= (~Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE);
}
}
}
}
for (int i=0; i<infos.size(); ++i) {
auto& info = infos[i];
if (info.type != Schedule::CONSTANT) {
continue;
}
auto& cmdBufferVir = info.executeBuffer;
for (auto& cp : cmdBufferVir.command) {
auto& c = *cp;
bool dirty = false;
for (auto t : c.inputs) {
auto des = TensorUtils::getDescribe(t);
if ((!des->isMutable) || des->group) {
continue;
} else {
for (auto t : c.outputs) {
TensorUtils::getDescribe(t)->stageMask |= Tensor::InsideDescribe::StageInfo::CONTENT_NOT_CHANGE;
}
des->stageMask |= Tensor::InsideDescribe::StageInfo::COMPUTE_CONTENT_STAGE;
}
}
}
Expand Down

0 comments on commit e00553c

Please sign in to comment.