feat: support auto optimize demo

InfiniTensor · Nov 14, 2023 · 6bc538e · 6bc538e
1 parent 65f9c5e
commit 6bc538e
Showing 13 changed files with 401 additions and 1 deletion.
diff --git a/src/01graph_topo/include/graph_topo/linked_graph.hpp b/src/01graph_topo/include/graph_topo/linked_graph.hpp
@@ -1,8 +1,8 @@
 #ifndef GRAPH_TOPO_LINKED_GRAPH_H
 #define GRAPH_TOPO_LINKED_GRAPH_H
 
-#include "container.h"
 #include "common.h"
+#include "container.h"
 #include <algorithm>
 #include <sstream>
 #include <unordered_map>
@@ -30,6 +30,7 @@ namespace refactor::graph_topo {
         static auto shareEdge(TE) -> Rc<Edge>;
 
         std::string toString() const;
+        std::string toString(std::string func(TN const &)) const;
         Graph<TN, TE> intoGraph() const;
         std::vector<Rc<Node>> const &nodes() const;
         std::vector<Rc<Edge>> const &inputs() const;
@@ -127,6 +128,34 @@ namespace refactor::graph_topo {
         return ss.str();
     }
 
+    LINKED_GRAPH_FN toString(std::string func(TN const &)) const->std::string {
+        std::unordered_map<void *, size_t> indices;
+        std::stringstream ss;
+        auto f = [&indices, &ss](Rc<Edge> const &e) {
+            if (e) {
+                auto [it, ok] = indices.try_emplace(e.get(), indices.size());
+                ss << it->second << ' ';
+            } else {
+                ss << "? ";
+            }
+        };
+        ss << "*. -> ( ";
+        for (auto const &e : _inputs) { f(e); }
+        ss << ')' << std::endl;
+        for (auto i : range0_(_nodes.size())) {
+            auto n = _nodes[i];
+            ss << i << ". ( ";
+            for (auto const &e : n->_inputs) { f(e); }
+            ss << ") -> ( ";
+            for (auto const &e : n->_outputs) { f(e); }
+            ss << ')' << func(n->_info) << std::endl;
+        }
+        ss << "*. <- ( ";
+        for (auto const &e : _outputs) { f(e); }
+        ss << ')' << std::endl;
+        return ss.str();
+    }
+
     LINKED_GRAPH_FN nodes() const->std::vector<Rc<Node>> const & {
         return _nodes;
     }

diff --git a/src/02mem_manager/include/mem_manager/blob.hh b/src/02mem_manager/include/mem_manager/blob.hh
@@ -19,6 +19,7 @@ namespace refactor::mem_manager {
 
         static std::pair<std::shared_ptr<Blob>, void *> share(size_t);
         operator void const *() const noexcept;
+        operator void *() noexcept;
         template<class T> T const *get() const noexcept {
             return reinterpret_cast<T const *>(_ptr);
         }

diff --git a/src/02mem_manager/src/blob.cc b/src/02mem_manager/src/blob.cc
@@ -14,5 +14,6 @@ namespace refactor::mem_manager {
         return {std::move(blob), ptr};
     }
     Blob::operator void const *() const noexcept { return _ptr; }
+    Blob::operator void *() noexcept { return _ptr; }
 
 }// namespace refactor::mem_manager
diff --git a/src/05computation/include/computation/graph.h b/src/05computation/include/computation/graph.h
@@ -30,6 +30,7 @@ namespace refactor::computation {
 
         kernel::Graph lower(Target) const;
         auto internal() const -> decltype(_internal) const &;
+        auto internal() -> decltype(_internal) &;
     };
 
 }// namespace refactor::computation

diff --git a/src/05computation/include/computation/mutant_generator.h b/src/05computation/include/computation/mutant_generator.h
@@ -0,0 +1,44 @@
+#ifndef COMPUTATION_MUTANT_GENERATOR_H
+#define COMPUTATION_MUTANT_GENERATOR_H
+
+#include "graph.h"
+#include "operator.h"
+
+namespace refactor::computation {
+
+    using OpVec = std::vector<Arc<MyOperator>>;
+    using TensorVec = std::vector<Rc<refactor::graph_topo::LinkedGraph<Node, Edge>::Edge>>;
+
+    inline uint64_t hashAppend(uint64_t a, uint64_t b) {
+        return (a * 10000019 + b * 10000079) % 2147483647;
+    }
+
+    template<typename T> inline uint64_t hashVector(const std::vector<T> &vec) {
+        uint64_t ret = 0;
+        for (auto v : vec)
+            ret = hashAppend(ret, v);
+        return ret;
+    }
+
+    class MutantGenerator {
+        float equalThreshold;
+        size_t maxDepth;
+        size_t numValidTensors = 0;
+        OpVec opList;
+        std::vector<OpVec> opStorage;
+        OpVec opFinger;
+        TensorVec validTensors;
+        std::set<uint64_t> opHashMaps;
+
+    public:
+        void init(float, size_t, OpVec) noexcept;
+        void run(Graph const &, std::vector<Graph> &) noexcept;
+        void dfs(size_t, Graph const &, Graph &, std::vector<Graph> &) noexcept;
+        bool is_mutant(Graph &, Graph const &) noexcept;
+        bool approx_equal(const Tensor &, const Tensor &) const noexcept;
+        bool have_same_op(Arc<MyOperator> const &, size_t, size_t) noexcept;
+        void delete_hash_op(Arc<MyOperator> const &, size_t, size_t) noexcept;
+    };
+}// namespace refactor::computation
+
+#endif// COMPUTATION_MUTANT_GENERATOR_H
diff --git a/src/05computation/include/computation/operator.h b/src/05computation/include/computation/operator.h
@@ -7,6 +7,7 @@
 namespace refactor::computation {
     using kernel::LayoutType;
     using kernel::Target;
+    using kernel::Tensor;
 
     class Operator {
     public:
@@ -42,6 +43,19 @@ namespace refactor::computation {
 
     using OpBox = std::unique_ptr<Operator>;
 
+    struct MyOperator {
+        size_t numInputs = 2;
+        Arc<Operator> base;
+
+        MyOperator() : numInputs(2) {}
+        MyOperator(size_t num) : numInputs(num) {}
+        //MyOperator(const MyOperator &other) : numInputs(other.numInputs) {}
+        //MyOperator(MyOperator &&other) : numInputs(other.numInputs) {}
+        // virtual std::unique_ptr<MyOperator> create() const = 0;
+        virtual std::unique_ptr<Operator> clone() const = 0;
+        virtual bool compute(Tensor const &, Tensor const &, Tensor &) const = 0;
+    };
+
 }// namespace refactor::computation
 
 #endif// COMPUTATION_OPERATOR_H
diff --git a/src/05computation/include/computation/operators/concat.h b/src/05computation/include/computation/operators/concat.h
@@ -15,6 +15,16 @@ namespace refactor::computation {
         kernel::CollectorBox candidateKernels(Target) const noexcept final;
     };
 
+    using refactor::kernel::Tensor;
+    struct ConcatBox final : public MyOperator {
+        // Arc<Concat> base;
+
+        ConcatBox() noexcept : MyOperator() {
+            base = std::make_shared<Concat>(1, 2);
+        }
+        std::unique_ptr<Operator> clone() const final;
+        bool compute(Tensor const &, Tensor const &, Tensor &) const noexcept final;
+    };
 }// namespace refactor::computation
 
 #endif// COMPUTATION_CONCAT_H
diff --git a/src/05computation/include/computation/operators/mat_mul.h b/src/05computation/include/computation/operators/mat_mul.h
@@ -22,6 +22,17 @@ namespace refactor::computation {
         kernel::CollectorBox candidateKernels(Target) const noexcept final;
     };
 
+    using refactor::kernel::Tensor;
+    struct MatMulBox final : public MyOperator {
+        // Arc<MatMul> base;
+
+        MatMulBox() noexcept : MyOperator() {
+            base = std::make_shared<MatMul>(1.0, 1.0, false, false);
+        }
+        std::unique_ptr<Operator> clone() const final;
+        bool compute(Tensor const &, Tensor const &, Tensor &) const noexcept final;
+    };
+
 }// namespace refactor::computation
 
 #endif// COMPUTATION_MAT_MUL_H
diff --git a/src/05computation/src/graph.cc b/src/05computation/src/graph.cc
@@ -67,5 +67,6 @@ namespace refactor::computation {
     }
 
     auto Graph::internal() const -> decltype(_internal) const & { return _internal; }
+    auto Graph::internal() -> decltype(_internal) & { return _internal; }
 
 }// namespace refactor::computation
diff --git a/src/05computation/src/mutant_generator.cc b/src/05computation/src/mutant_generator.cc
@@ -0,0 +1,163 @@
+#include "computation/mutant_generator.h"
+#define MAX_SIZE 1024x1024
+
+namespace refactor::computation {
+    using K = MutantGenerator;
+
+    void K::init(float equalThreshold_, size_t maxDepth_, OpVec opList_) noexcept {
+        equalThreshold = equalThreshold_;
+        maxDepth = maxDepth_;
+        opList = opList_;
+        opFinger.clear();
+        opStorage.clear();
+        validTensors.clear();
+        opHashMaps.clear();
+        for (size_t i = 0; i < maxDepth; ++i) {
+            opStorage.push_back(opList);
+        }
+    }
+
+    void K::run(Graph const &inGraph, std::vector<Graph> &outGraphs) noexcept {
+        using namespace refactor::graph_topo;
+
+        // init global inputs
+        std::unordered_map<size_t, Edge> edges;
+        auto edgeIndex = std::vector<size_t>{};
+        auto inputs = inGraph.internal().linked().inputs();
+        auto outputs = inGraph.internal().linked().outputs();
+        ASSERT(outputs.size() == 1, "Do not support more than one output.");
+        numValidTensors = inputs.size();
+        for (size_t i = 0; i < numValidTensors; ++i) {
+            edgeIndex.emplace_back(i);
+            edges.insert({i, inputs[i]->info()});
+        }
+        // init graph
+        Builder<size_t, Node, size_t, Edge>
+            builder = {{}, edgeIndex, {}, {}, edges};
+        Graph curGraph(std::move(builder.build()));
+        for (size_t i = 0; i < numValidTensors; ++i) {
+            validTensors.emplace_back(curGraph.internal().linked().inputs()[i]);
+        }
+        dfs(0, inGraph, curGraph, outGraphs);
+    }
+
+    void K::dfs(size_t depth, Graph const &inGraph, Graph &curGraph, std::vector<Graph> &outGraphs) noexcept {
+        if (is_mutant(curGraph, inGraph)) {
+            outGraphs.emplace_back(curGraph);
+            return;
+        }
+        if (depth >= maxDepth) {
+            return;
+        }
+        //auto g_ = curGraph.internal().linked();
+        for (size_t index = 0; index < opStorage[depth].size(); ++index) {
+            auto op = opStorage[depth][index];
+            if (op->numInputs == 2) {
+                for (size_t i = 0; i < numValidTensors; ++i) {
+                    for (size_t j = 0; j < numValidTensors; ++j) {
+                        if (i == j) {
+                            continue;
+                        }
+                        auto x = validTensors[i]->info().tensor;
+                        auto y = validTensors[j]->info().tensor;
+                        //fmt::println("{},{}, {}, {}", i, j, reinterpret_cast<void *>(x.get()), reinterpret_cast<void *>(y.get()));
+                        auto out = Tensor::share(x->dataType, {1024, 1024}, LayoutType::Others);
+                        out->malloc();
+                        if (!op->compute(*x, *y, *out) || have_same_op(op, i, j)) {
+                            out->free();
+                            continue;
+                        }
+                        numValidTensors++;
+                        opFinger.push_back(op);
+                        auto name = fmt::format("{}", depth);
+                        auto newEdge = curGraph.internal().linked().shareEdge({out, "tensor_" + name});
+                        auto newNode = curGraph.internal().linked().pushNode({op->clone(), "op_" + name},
+                                                                             {newEdge});
+                        newNode->connect(0, validTensors[i]);
+                        newNode->connect(1, validTensors[j]);
+                        validTensors.push_back(newEdge);
+                        //fmt::println("{}", curGraph.internal().linked().toString([](Node const &o) -> std::string { return std::string(o.op->name()); }));
+                        //fmt::println("{}", reinterpret_cast<void *>(validTensors[j]->info().tensor.get()));
+                        dfs(depth + 1, inGraph, curGraph, outGraphs);
+                        curGraph.internal().linked().eraseNode(newNode);
+                        //curGraph.internal().linked();
+                        validTensors.pop_back();
+                        opFinger.pop_back();
+                        delete_hash_op(op, i, j);
+                        numValidTensors--;
+                    }
+                }
+            }
+        }
+    }
+
+    bool K::is_mutant(Graph &curGraph, Graph const &inGraph) noexcept {
+        fmt::println("=======================output graph =================");
+        fmt::println("{}", curGraph.internal().linked().toString([](Node const &o) -> std::string { return std::string(o.op->name()); }));
+        fmt::println("Edges info :");
+        for (size_t i = 0; i < numValidTensors; ++i) {
+            fmt::println("{}. \"{}\" Shape is {}", i, validTensors[i]->info().name,
+                         vec2str(validTensors[i]->info().tensor->shape));
+        }
+        auto inputs = inGraph.internal().linked().inputs();
+        auto outputs = inGraph.internal().linked().outputs();
+        std::vector<refactor::Rc<refactor::graph_topo::LinkedGraph<Node, Edge>::Edge>> outEdges;
+        for (auto output : outputs) {
+            int found = -1;
+            auto &tensor = *output->info().tensor;
+            for (size_t i = inputs.size(); i < validTensors.size(); ++i) {
+                if (approx_equal(tensor, *(validTensors[i]->info().tensor))) {
+                    found = i;
+                    break;
+                }
+            }
+            if (found == -1) {
+                fmt::println("!!!!!!!compare false ");
+                return false;
+            }
+            outEdges.emplace_back(validTensors[found]);
+        }
+        curGraph.internal().linked().setOutputs(outEdges);
+        fmt::println("=======================compare true =================");
+        return true;
+    }
+
+    bool K::approx_equal(const Tensor &a, const Tensor &b) const noexcept {
+        if (a.shape != b.shape) {
+            return false;
+        }
+        size_t equal = 0, total = 0;
+        auto dataA = a.data->get<float>();
+        auto dataB = b.data->get<float>();
+        for (size_t i = 0; i < a.elementsSize(); ++i) {
+            if (dataA[i] == dataB[i]) {
+                equal++;
+            }
+            total++;
+        }
+        if (float(equal) / total >= equalThreshold) {
+            return true;
+        }
+        return false;
+    }
+
+    bool K::have_same_op(Arc<MyOperator> const &op, size_t a, size_t b) noexcept {
+        //fmt::println("{}", reinterpret_cast<void *>(op->base.get()));
+        std::vector<size_t> hashInfo = {op->base->opTypeId(), a, b};
+        auto res = hashVector(hashInfo);
+        if (opHashMaps.find(res) != opHashMaps.end()) {
+            return true;
+        }
+        opHashMaps.insert(std::move(res));
+        return false;
+    }
+
+    void K::delete_hash_op(Arc<MyOperator> const &op, size_t a, size_t b) noexcept {
+        std::vector<size_t> hashInfo = {op->base->opTypeId(), a, b};
+        auto res = hashVector(hashInfo);
+        auto it = opHashMaps.find(res);
+        if (auto it = opHashMaps.find(res); it != opHashMaps.end()) {
+            opHashMaps.erase(it);
+        }
+    }
+}// namespace refactor::computation
diff --git a/src/05computation/src/operators/concat.cc b/src/05computation/src/operators/concat.cc
@@ -14,4 +14,33 @@ namespace refactor::computation {
         using Collector_ = kernel::ConcatCollector;
         return std::make_unique<Collector_>(target, axis);
     }
+
+    bool ConcatBox::compute(Tensor const &a, Tensor const &b, Tensor &out) const noexcept {
+        if (a.rank() != 2 || b.rank() != 2) {
+            return false;
+        }
+        if (a.shape[0] != b.shape[0]) {
+            return false;
+        }
+        if (a.dataType != b.dataType) {
+            return false;
+        }
+        if (a.data == nullptr || b.data == nullptr) {
+            return false;
+        }
+        out.shape = {a.shape[0], a.shape[1] + b.shape[1]};
+        //compute
+        auto kernels = this->base->candidateKernels(Target::Cpu)->filter({a, b}, {out});
+        ASSERT(kernels.size() != 0, "do not supposrt this kernel");
+        runtime::Resources res;
+        auto rou = kernels[0]->lower(res);
+        void const *inputs[]{*a.data, *b.data};
+        void *outputs[]{*out.data};
+        rou(res, inputs, outputs);
+        return true;
+    }
+
+    std::unique_ptr<Operator> ConcatBox::clone() const {
+        return std::make_unique<Concat>(*dynamic_cast<Concat const *>(base.get()));
+    }
 }// namespace refactor::computation
diff --git a/src/05computation/src/operators/mat_mul.cc b/src/05computation/src/operators/mat_mul.cc
@@ -1,5 +1,6 @@
 #include "computation/operators/mat_mul.h"
 #include "kernel/collectors/mat_mul.h"
+#include "runtime/resource.h"
 
 namespace refactor::computation {
     using Op = MatMul;
@@ -14,4 +15,32 @@ namespace refactor::computation {
         return std::make_unique<kernel::MatMulCollector>(target, alpha, beta, transA, transB);
     }
 
+    bool MatMulBox::compute(Tensor const &a, Tensor const &b, Tensor &out) const noexcept {
+        if (a.rank() != 2 || b.rank() != 2) {
+            return false;
+        }
+        if (a.shape[1] != b.shape[0]) {
+            return false;
+        }
+        if (a.dataType != b.dataType) {
+            return false;
+        }
+        if (a.data == nullptr || b.data == nullptr) {
+            return false;
+        }
+        out.shape = {a.shape[0], b.shape[1]};
+        //compute
+        auto kernels = this->base->candidateKernels(Target::Cpu)->filter({a, b}, {out});
+        ASSERT(kernels.size() != 0, "do not supposrt this kernel");
+        runtime::Resources res;
+        auto rou = kernels[0]->lower(res);
+        void const *inputs[]{*a.data, *b.data};
+        void *outputs[]{*out.data};
+        rou(res, inputs, outputs);
+        return true;
+    }
+
+    std::unique_ptr<Operator> MatMulBox::clone() const {
+        return std::make_unique<MatMul>(*dynamic_cast<MatMul const *>(base.get()));
+    }
 }// namespace refactor::computation
diff --git a/src/05computation/test/test_mutant_generator.cpp b/src/05computation/test/test_mutant_generator.cpp
@@ -0,0 +1,67 @@
+#include "computation/graph.h"
+#include "computation/mutant_generator.h"
+#include "computation/operators/concat.h"
+#include "computation/operators/mat_mul.h"
+#include <gtest/gtest.h>
+#include <numeric>
+
+namespace refactor::computation {
+
+    refactor::graph_topo::Builder<size_t, Node, size_t, Edge> TestInGraphBuild() {
+        auto nodes = std::unordered_map<size_t, Node>{};
+        nodes[0] = Node{std::make_unique<MatMul>(1.0, 1.0, false, false), "matmul_1"};
+        nodes[1] = Node{std::make_unique<MatMul>(1.0, 1.0, false, false), "matmul_2"};
+        nodes[2] = Node{std::make_unique<Concat>(1, 2), "concat"};
+
+        auto tensor0 = Tensor::share(DataType::F32, {5, 6}, LayoutType::Others);
+        auto tensor1 = Tensor::share(DataType::F32, {4, 5}, LayoutType::Others);
+        auto tensor2 = Tensor::share(DataType::F32, {5, 7}, LayoutType::Others);
+        auto tensor3 = Tensor::share(DataType::F32, {4, 6}, LayoutType::Others);
+        auto tensor4 = Tensor::share(DataType::F32, {4, 7}, LayoutType::Others);
+        auto tensor5 = Tensor::share(DataType::F32, {4, 13}, LayoutType::Others);
+        // initialize inputs data
+        auto data0 = reinterpret_cast<float *>(tensor0->malloc());
+        auto data1 = reinterpret_cast<float *>(tensor1->malloc());
+        auto data2 = reinterpret_cast<float *>(tensor2->malloc());
+        std::iota(data0, data0 + tensor0->elementsSize(), 1.0);
+        std::iota(data1, data1 + tensor1->elementsSize(), 1.0);
+        std::iota(data2, data2 + tensor2->elementsSize(), 1.0);
+        // initialize outputs data
+        float outputData[]{255.0, 270.0, 285.0, 300.0, 315.0, 330.0, 295.0, 310.0, 325.0, 340.0, 355.0, 370.0, 385.0, 580.0, 620.0, 660.0, 700.0,
+                           740.0, 780.0, 670.0, 710.0, 750.0, 790.0, 830.0, 870.0, 910.0, 905.0, 970.0, 1035.0, 1100.0, 1165.0, 1230.0, 1045.0, 1110.0, 1175.0, 1240.0,
+                           1305.0, 1370.0, 1435.0, 1230.0, 1320.0, 1410.0, 1500.0, 1590.0, 1680.0, 1420.0, 1510.0, 1600.0, 1690.0, 1780.0, 1870.0, 1960.};
+        std::memcpy(tensor5->malloc(), outputData, tensor5->bytesSize());
+
+        return {
+            {{0, {{1, 0}, {3}}},
+             {1, {{1, 2}, {4}}},
+             {2, {{3, 4}, {5}}}},
+            {0, 1, 2},
+            {5},
+            std::move(nodes),
+            {
+                {0, {tensor0, "input_tensor_0"}},
+                {1, {tensor1, "input_tensor_1"}},
+                {2, {tensor2, "input_tensor_2"}},
+                {3, {tensor3, "matmul0_output"}},
+                {4, {tensor4, "matmul1_output"}},
+                {5, {tensor5, "output"}},
+            },
+        };
+    }
+
+    TEST(Graph, MutantGenerator) {
+        auto graphTopo = TestInGraphBuild().build();
+        fmt::println("{}", graphTopo.topology.toString());
+        Graph g(std::move(graphTopo));
+        // create mutant generator
+        MutantGenerator mutant;
+        OpVec oplist = {std::make_shared<MatMulBox>(), std::make_shared<ConcatBox>()};
+        mutant.init(1.0, 3, oplist);
+        std::vector<Graph> outGraph = {};
+        mutant.run(std::move(g), outGraph);
+        for (size_t i = 0; i < outGraph.size(); ++i) {
+            fmt::println("{}", outGraph[i].internal().linked().toString());
+        }
+    }
+}// namespace refactor::computation