From e13ef6e4093496e20c14694426ff3e156b42f842 Mon Sep 17 00:00:00 2001 From: MrSpike <68044923+MrSpike63@users.noreply.github.com> Date: Wed, 7 Feb 2024 13:34:05 +1300 Subject: [PATCH] feat: Add support for proxy-based contract deployment --- README.md | 18 +++--- src/contract_address3.h | 45 ++++++++++++++ src/cpu_keccak.h | 30 +++++++++- src/keccak.h | 30 +++++++++- src/main.cu | 130 ++++++++++++++++++++++++++++++++++++---- 5 files changed, 231 insertions(+), 22 deletions(-) create mode 100644 src/contract_address3.h diff --git a/README.md b/README.md index fafdcf4..4fd52b1 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,18 @@ Vanity Eth Address is a tool to generate Ethereum addresses that match certain c ``` ./vanity-eth-addresss [PARAMETERS] Scoring methods - (-lz) --leading-zeros Count zero bytes at the start of the address - (-z) --zeros Count zero bytes anywhere in the address + (-lz) --leading-zeros Count zero bytes at the start of the address + (-z) --zeros Count zero bytes anywhere in the address Modes (normal addresses by default) - (-c) --contract Search for addresses and score the contract address generated using nonce=0 - (-c2) --contract2 Search for contract addresses using the CREATE2 opcode + (-c) --contract Search for addresses and score the contract address generated using nonce=0 + (-c2) --contract2 Search for contract addresses using the CREATE2 opcode + (-c3) --contract3 Search for contract addresses using a CREATE3 proxy deployer Other: - (-d) --device Use device (Add one for each device for multi-gpu) - (-b) --bytecode File containing contract bytecode (only needed when using --contract2) - (-a) --address
Sender contract address (only needed when using --contract2) - (-w) --work-scale Defaults to 15. Scales the work done in each kernel. If your GPU finishes kernels within a few seconds, you may benefit from increasing this number. + (-d) --device Use device (Add one for each device for multi-gpu) + (-b) --bytecode File containing contract bytecode (only needed when using --contract2 or --contract3) + (-a) --address
Sender contract address (only needed when using --contract2 or --contract3) + (-ad) --deployer-address
Deployer contract address (only needed when using --contract3) + (-w) --work-scale Defaults to 15. Scales the work done in each kernel. If your GPU finishes kernels within a few seconds, you may benefit from increasing this number. Examples: ./vanity-eth-address --zeros --device 0 --device 2 --work-scale 17 diff --git a/src/contract_address3.h b/src/contract_address3.h new file mode 100644 index 0000000..fdff2fe --- /dev/null +++ b/src/contract_address3.h @@ -0,0 +1,45 @@ +/* + Copyright (C) 2023 MrSpike63 + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, version 3. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +#pragma once +#include "curve_math.h" +#include "keccak.h" +#include "math.h" + + +__global__ void __launch_bounds__(BLOCK_SIZE, 2) gpu_contract3_address_work(int score_method, Address origin, Address deployer, _uint256 base_key, _uint256 proxy_bytecode) { + uint64_t thread_id = (uint64_t)threadIdx.x + (uint64_t)blockIdx.x * (uint64_t)BLOCK_SIZE; + uint64_t key_offset = (uint64_t)THREAD_WORK * thread_id; + + _uint256 key = base_key; + asm( + "add.cc.u32 %0, %0, %8; \n\t" + "addc.cc.u32 %1, %1, %9; \n\t" + "addc.cc.u32 %2, %2, 0x0; \n\t" + "addc.cc.u32 %3, %3, 0x0; \n\t" + "addc.cc.u32 %4, %4, 0x0; \n\t" + "addc.cc.u32 %5, %5, 0x0; \n\t" + "addc.cc.u32 %6, %6, 0x0; \n\t" + "addc.u32 %7, %7, 0x0; \n\t" + : "+r"(key.h), "+r"(key.g), "+r"(key.f), "+r"(key.e), "+r"(key.d), "+r"(key.c), "+r"(key.b), "+r"(key.a) : "r"((uint32_t)(key_offset & 0xFFFFFFFF)), "r"((uint32_t)(key_offset >> 32)) + ); + for (int i = 0; i < THREAD_WORK; i++) { + _uint256 salt = calculate_create3_salt(origin, key); + Address proxy = calculate_contract_address2(deployer, salt, proxy_bytecode); + handle_output2(score_method, calculate_contract_address(proxy, 1), key_offset + i); + key.h += 1; + } +} \ No newline at end of file diff --git a/src/cpu_keccak.h b/src/cpu_keccak.h index 306bff5..c40bdf5 100644 --- a/src/cpu_keccak.h +++ b/src/cpu_keccak.h @@ -158,7 +158,7 @@ Address cpu_calculate_address(_uint256 x, _uint256 y) { } -Address cpu_calculate_contract_address(Address a) { +Address cpu_calculate_contract_address(Address a, uint8_t nonce = 0x80) { uint64_t block[25]; for (int i = 0; i < 25; i++) { block[i] = 0; @@ -166,7 +166,7 @@ Address cpu_calculate_contract_address(Address a) { block[0] = cpu_swap_endianness((0xD694ULL << 48) | ((uint64_t)a.a << 16) | (a.b >> 16)); block[5] = cpu_swap_endianness(((uint64_t)a.b << 48) | ((uint64_t)a.c << 16) | (a.d >> 16)); - block[10] = cpu_swap_endianness(((uint64_t)a.d << 48) | ((uint64_t)a.e << 16) | (0x80ULL << 8) | 1); + block[10] = cpu_swap_endianness(((uint64_t)a.d << 48) | ((uint64_t)a.e << 16) | ((uint64_t)nonce << 8) | 1); block[8] = 0x8000000000000000; @@ -254,4 +254,30 @@ Address cpu_calculate_contract_address2(Address a, _uint256 salt, _uint256 bytec uint64_t d = cpu_swap_endianness(block[15]); return {(uint32_t)(b & 0xFFFFFFFF), (uint32_t)(c >> 32), (uint32_t)(c & 0xFFFFFFFF), (uint32_t)(d >> 32), (uint32_t)(d & 0xFFFFFFFF)}; +} + +_uint256 cpu_calculate_create3_salt(Address origin, _uint256 salt) { + uint64_t block[25]; + for (int i = 0; i < 25; i++) { + block[i] = 0; + } + + block[0] = cpu_swap_endianness(((uint64_t)origin.a << 32) | (uint64_t)origin.b); + block[5] = cpu_swap_endianness(((uint64_t)origin.c << 32) | (uint64_t)origin.d); + block[10] = cpu_swap_endianness(((uint64_t)origin.e << 32) | (uint64_t)salt.a); + block[15] = cpu_swap_endianness(((uint64_t)salt.b << 32) | (uint64_t)salt.c); + block[20] = cpu_swap_endianness(((uint64_t)salt.d << 32) | (uint64_t)salt.e); + block[1] = cpu_swap_endianness(((uint64_t)salt.f << 32) | (uint64_t)salt.g); + block[6] = cpu_swap_endianness(((uint64_t)salt.h << 32) | (1ULL << 24)); + + block[8] = 0x8000000000000000; + + cpu_block_permute(block); + + uint64_t a = cpu_swap_endianness(block[0]); + uint64_t b = cpu_swap_endianness(block[5]); + uint64_t c = cpu_swap_endianness(block[10]); + uint64_t d = cpu_swap_endianness(block[15]); + + return {(uint32_t)(a >> 32), (uint32_t)(a & 0xFFFFFFFF), (uint32_t)(b >> 32), (uint32_t)(b & 0xFFFFFFFF), (uint32_t)(c >> 32), (uint32_t)(c & 0xFFFFFFFF), (uint32_t)(d >> 32), (uint32_t)(d & 0xFFFFFFFF)}; } \ No newline at end of file diff --git a/src/keccak.h b/src/keccak.h index 694afaf..eb0c345 100644 --- a/src/keccak.h +++ b/src/keccak.h @@ -152,7 +152,7 @@ __device__ Address calculate_address(_uint256 x, _uint256 y) { } -__device__ Address calculate_contract_address(Address a) { +__device__ Address calculate_contract_address(Address a, uint8_t nonce = 0x80) { uint64_t block[25]; for (int i = 0; i < 25; i++) { block[i] = 0; @@ -160,7 +160,7 @@ __device__ Address calculate_contract_address(Address a) { block[0] = swap_endianness((0xD694ULL << 48) | ((uint64_t)a.a << 16) | (a.b >> 16)); block[5] = swap_endianness(((uint64_t)a.b << 48) | ((uint64_t)a.c << 16) | (a.d >> 16)); - block[10] = swap_endianness(((uint64_t)a.d << 48) | ((uint64_t)a.e << 16) | (0x80ULL << 8) | 1); + block[10] = swap_endianness(((uint64_t)a.d << 48) | ((uint64_t)a.e << 16) | ((uint64_t)nonce << 8) | 1); block[8] = 0x8000000000000000; @@ -201,4 +201,30 @@ __device__ Address calculate_contract_address2(Address a, _uint256 salt, _uint25 uint64_t d = swap_endianness(block[15]); return {(uint32_t)(b & 0xFFFFFFFF), (uint32_t)(c >> 32), (uint32_t)(c & 0xFFFFFFFF), (uint32_t)(d >> 32), (uint32_t)(d & 0xFFFFFFFF)}; +} + +__device__ _uint256 calculate_create3_salt(Address origin, _uint256 salt) { + uint64_t block[25]; + for (int i = 0; i < 25; i++) { + block[i] = 0; + } + + block[0] = swap_endianness(((uint64_t)origin.a << 32) | (uint64_t)origin.b); + block[5] = swap_endianness(((uint64_t)origin.c << 32) | (uint64_t)origin.d); + block[10] = swap_endianness(((uint64_t)origin.e << 32) | (uint64_t)salt.a); + block[15] = swap_endianness(((uint64_t)salt.b << 32) | (uint64_t)salt.c); + block[20] = swap_endianness(((uint64_t)salt.d << 32) | (uint64_t)salt.e); + block[1] = swap_endianness(((uint64_t)salt.f << 32) | (uint64_t)salt.g); + block[6] = swap_endianness(((uint64_t)salt.h << 32) | (1ULL << 24)); + + block[8] = 0x8000000000000000; + + block_permute(block); + + uint64_t a = swap_endianness(block[0]); + uint64_t b = swap_endianness(block[5]); + uint64_t c = swap_endianness(block[10]); + uint64_t d = swap_endianness(block[15]); + + return {(uint32_t)(a >> 32), (uint32_t)(a & 0xFFFFFFFF), (uint32_t)(b >> 32), (uint32_t)(b & 0xFFFFFFFF), (uint32_t)(c >> 32), (uint32_t)(c & 0xFFFFFFFF), (uint32_t)(d >> 32), (uint32_t)(d & 0xFFFFFFFF)}; } \ No newline at end of file diff --git a/src/main.cu b/src/main.cu index 63f1509..f0eb3c4 100644 --- a/src/main.cu +++ b/src/main.cu @@ -135,6 +135,7 @@ __device__ void handle_output2(int score_method, Address a, uint64_t key) { #include "address.h" #include "contract_address.h" #include "contract_address2.h" +#include "contract_address3.h" int global_max_score = 0; @@ -176,7 +177,7 @@ uint64_t milliseconds() { } -void host_thread(int device, int device_index, int score_method, int mode, Address origin_address, _uint256 bytecode) { +void host_thread(int device, int device_index, int score_method, int mode, Address origin_address, Address deployer_address, _uint256 bytecode) { uint64_t GRID_WORK = ((uint64_t)BLOCK_SIZE * (uint64_t)GRID_SIZE * (uint64_t)THREAD_WORK); CurvePoint* block_offsets = 0; @@ -219,7 +220,7 @@ void host_thread(int device, int device_index, int score_method, int mode, Addre max_key = cpu_sub_256(max_key, GRID_WORK); max_key = cpu_sub_256(max_key, _uint256{0, 0, 0, 0, 0, 0, 0, THREAD_WORK}); max_key = cpu_add_256(max_key, _uint256{0, 0, 0, 0, 0, 0, 0, 2}); - } else if (mode == 2) { + } else if (mode == 2 || mode == 3) { max_key = _uint256{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; } @@ -229,7 +230,7 @@ void host_thread(int device, int device_index, int score_method, int mode, Addre if (mode == 0 || mode == 1) { status = generate_secure_random_key(base_random_key, max_key, 255); random_key_increment = cpu_mul_256_mod_p(cpu_mul_256_mod_p(uint32_to_uint256(BLOCK_SIZE), uint32_to_uint256(GRID_SIZE)), uint32_to_uint256(THREAD_WORK)); - } else if (mode == 2) { + } else if (mode == 2 || mode == 3) { status = generate_secure_random_key(base_random_key, max_key, 256); random_key_increment = cpu_mul_256_mod_p(cpu_mul_256_mod_p(uint32_to_uint256(BLOCK_SIZE), uint32_to_uint256(GRID_SIZE)), uint32_to_uint256(THREAD_WORK)); base_random_key.h &= ~(THREAD_WORK - 1); @@ -443,6 +444,73 @@ void host_thread(int device, int device_index, int score_method, int mode, Addre gpu_assert(cudaMemcpyToSymbol(device_memory, device_memory_host, sizeof(uint64_t))); } } + + if (mode == 3) { + while (true) { + uint64_t start_time = milliseconds(); + gpu_contract3_address_work<<>>(score_method, origin_address, deployer_address, random_key, bytecode); + + gpu_assert(cudaDeviceSynchronize()) + gpu_assert(cudaMemcpyFromSymbol(device_memory_host, device_memory, (2 + OUTPUT_BUFFER_SIZE * 3) * sizeof(uint64_t))) + + uint64_t end_time = milliseconds(); + double elapsed = (end_time - start_time) / 1000.0; + + global_max_score_mutex.lock(); + if (output_counter_host[0] != 0) { + if (max_score_host[0] > global_max_score) { + global_max_score = max_score_host[0]; + } else { + max_score_host[0] = global_max_score; + } + } + global_max_score_mutex.unlock(); + + double speed = GRID_WORK / elapsed / 1000000.0; + if (output_counter_host[0] != 0) { + int valid_results = 0; + + for (int i = 0; i < output_counter_host[0]; i++) { + if (output_buffer2_host[i] < max_score_host[0]) { continue; } + valid_results++; + } + + if (valid_results > 0) { + _uint256* results = new _uint256[valid_results]; + int* scores = new int[valid_results]; + valid_results = 0; + + for (int i = 0; i < output_counter_host[0]; i++) { + if (output_buffer2_host[i] < max_score_host[0]) { continue; } + + uint64_t k_offset = output_buffer_host[i]; + _uint256 k = cpu_add_256(random_key, _uint256{0, 0, 0, 0, 0, 0, (uint32_t)(k_offset >> 32), (uint32_t)(k_offset & 0xFFFFFFFF)}); + + int idx = valid_results++; + results[idx] = k; + scores[idx] = output_buffer2_host[i]; + } + + message_queue_mutex.lock(); + message_queue.push(Message{end_time, 0, device_index, cudaSuccess, speed, valid_results, results, scores}); + message_queue_mutex.unlock(); + } else { + message_queue_mutex.lock(); + message_queue.push(Message{end_time, 0, device_index, cudaSuccess, speed, 0}); + message_queue_mutex.unlock(); + } + } else { + message_queue_mutex.lock(); + message_queue.push(Message{end_time, 0, device_index, cudaSuccess, speed, 0}); + message_queue_mutex.unlock(); + } + + random_key = cpu_add_256(random_key, random_key_increment); + + output_counter_host[0] = 0; + gpu_assert(cudaMemcpyToSymbol(device_memory, device_memory_host, sizeof(uint64_t))); + } + } } @@ -461,9 +529,10 @@ void print_speeds(int num_devices, int* device_ids, double* speeds) { int main(int argc, char *argv[]) { int score_method = -1; // 0 = leading zeroes, 1 = zeros - int mode = 0; // 0 = address, 1 = contract, 2 = create2 contract + int mode = 0; // 0 = address, 1 = contract, 2 = create2 contract, 3 = create3 proxy contract char* input_file = 0; char* input_address = 0; + char* input_deployer_address = 0; int num_devices = 0; int device_ids[10]; @@ -484,12 +553,18 @@ int main(int argc, char *argv[]) { } else if (strcmp(argv[i], "--contract2") == 0 || strcmp(argv[i], "-c2") == 0) { mode = 2; i++; + } else if (strcmp(argv[i], "--contract3") == 0 || strcmp(argv[i], "-c3") == 0) { + mode = 3; + i++; } else if (strcmp(argv[i], "--bytecode") == 0 || strcmp(argv[i], "-b") == 0) { input_file = argv[i + 1]; i += 2; } else if (strcmp(argv[i], "--address") == 0 || strcmp(argv[i], "-a") == 0) { input_address = argv[i + 1]; i += 2; + } else if (strcmp(argv[i], "--deployer-address") == 0 || strcmp(argv[i], "-da") == 0) { + input_deployer_address = argv[i + 1]; + i += 2; } else if (strcmp(argv[i], "--work-scale") == 0 || strcmp(argv[i], "-w") == 0) { GRID_SIZE = 1U << atoi(argv[i + 1]); i += 2; @@ -513,14 +588,21 @@ int main(int argc, char *argv[]) { return 1; } - if (mode == 2 && !input_address) { + if ((mode == 2 || mode == 3) && !input_address) { printf("You must specify an origin address when using --contract2\n"); return 1; - } else if (mode == 2 && strlen(input_address) != 40 && strlen(input_address) != 42) { + } else if ((mode == 2 || mode == 3) && strlen(input_address) != 40 && strlen(input_address) != 42) { printf("The origin address must be 40 characters long\n"); return 1; } + if ((mode == 2 || mode == 3) && !input_deployer_address) { + printf("You must specify a deployer address when using --contract3\n"); + return 1; + } + + + for (int i = 0; i < num_devices; i++) { cudaError_t e = cudaSetDevice(device_ids[i]); if (e != cudaSuccess) { @@ -531,7 +613,7 @@ int main(int argc, char *argv[]) { #define nothex(n) ((n < 48 || n > 57) && (n < 65 || n > 70) && (n < 97 || n > 102)) _uint256 bytecode_hash; - if (mode == 2) { + if (mode == 2 || mode == 3) { std::ifstream infile(input_file, std::ios::binary); if (!infile.is_open()) { printf("Failed to open the bytecode file.\n"); @@ -584,7 +666,7 @@ int main(int argc, char *argv[]) { } Address origin_address; - if (mode == 2) { + if (mode == 2 || mode == 3) { if (strlen(input_address) == 42) { input_address += 2; } @@ -606,13 +688,37 @@ int main(int argc, char *argv[]) { #undef round } + + Address deployer_address; + if (mode == 3) { + if (strlen(input_deployer_address) == 42) { + input_deployer_address += 2; + } + char substr[9]; + + #define round(i, offset) \ + strncpy(substr, input_deployer_address + offset * 8, 8); \ + if (nothex(substr[0]) || nothex(substr[1]) || nothex(substr[2]) || nothex(substr[3]) || nothex(substr[4]) || nothex(substr[5]) || nothex(substr[6]) || nothex(substr[7])) { \ + printf("Invalid deployer address.\n"); \ + return 1; \ + } \ + deployer_address.i = strtoull(substr, 0, 16); + + round(a, 0) + round(b, 1) + round(c, 2) + round(d, 3) + round(e, 4) + + #undef round + } #undef nothex std::vector threads; uint64_t global_start_time = milliseconds(); for (int i = 0; i < num_devices; i++) { - std::thread th(host_thread, device_ids[i], i, score_method, mode, origin_address, bytecode_hash); + std::thread th(host_thread, device_ids[i], i, score_method, mode, origin_address, deployer_address, bytecode_hash); threads.push_back(move(th)); } @@ -644,6 +750,10 @@ int main(int argc, char *argv[]) { addresses[i] = cpu_calculate_contract_address(cpu_calculate_address(p.x, p.y)); } else if (mode == 2) { addresses[i] = cpu_calculate_contract_address2(origin_address, m.results[i], bytecode_hash); + } else if (mode == 3) { + _uint256 salt = cpu_calculate_create3_salt(origin_address, m.results[i]); + Address proxy = cpu_calculate_contract_address2(deployer_address, salt, bytecode_hash); + addresses[i] = cpu_calculate_contract_address(proxy, 1); } } @@ -655,7 +765,7 @@ int main(int argc, char *argv[]) { if (mode == 0 || mode == 1) { printf("Elapsed: %06u Score: %02u Private Key: 0x%08x%08x%08x%08x%08x%08x%08x%08x Address: 0x%08x%08x%08x%08x%08x\n", (uint32_t)time, score, k.a, k.b, k.c, k.d, k.e, k.f, k.g, k.h, a.a, a.b, a.c, a.d, a.e); - } else if (mode == 2) { + } else if (mode == 2 || mode == 3) { printf("Elapsed: %06u Score: %02u Salt: 0x%08x%08x%08x%08x%08x%08x%08x%08x Address: 0x%08x%08x%08x%08x%08x\n", (uint32_t)time, score, k.a, k.b, k.c, k.d, k.e, k.f, k.g, k.h, a.a, a.b, a.c, a.d, a.e); } }