Skip to content

Latest commit

 

History

History
81 lines (63 loc) · 1.8 KB

File metadata and controls

81 lines (63 loc) · 1.8 KB

Baby CUDA

Description

GPU Reverse Engineering might be easier than you thought about :)

Solution

The binary is a CUDA program, which is a GPU programming language. Reading it in IDA decompiled code, we can probably realize it is a matrix multiplication program. Quite simple, I did not add any tweaks.

Main logic is in kernel.cu, commented below.

int main()
{
    // Perform matrix multiplication C = A*B
    // where A, B and C are NxN matrices
    int N = 4;
    int SIZE = N * N;

    // Allocate memory on the host
    vector<float> h_A(SIZE);
    vector<float> h_B(SIZE);
    vector<float> h_C(SIZE);

    // Read flag
    string flag;
    cout << "Enter flag: ";
    cin >> flag;
    if (flag.length() != SIZE) {
        exit(1);
    }

    // Initialize matrices on the host
    int curr = 0;
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            h_A[i * N + j] = flag[curr];
            h_B[i * N + j] = curr++;
        }
    }

    h_B[0]++;
    h_B[3] += 2;
    h_B[5] += 2;
    h_B[6] += 3;
    h_B[10] += 1;
    h_B[14] += 5;

    // Allocate memory on the device
    dev_array<float> d_A(SIZE);
    dev_array<float> d_B(SIZE);
    dev_array<float> d_C(SIZE);

    d_A.set(&h_A[0], SIZE);
    d_B.set(&h_B[0], SIZE);

    matrixMultiplication(d_A.getData(), d_B.getData(), d_C.getData(), N);
    cudaDeviceSynchronize();

    d_C.get(&h_C[0], SIZE);
    cudaDeviceSynchronize();

    vector<vector<int>> res{ 
        {2755,3324,4553,4150},
        {2534,3087,4271,3863},
        {1828,2145,2903,2739},
        {2436,2830,3926,3490} };

    // cvctf{CuD4_B@@M}
    for (int ROW = 0; ROW < N; ROW++) {
        for (int COL = 0; COL < N; COL++) {
            if ((int)h_C[ROW * N + COL] != res[ROW][COL]) exit(1);
        }
    }

    cout << "Congratulations!\n";

    return 0;
}