Implement x14 (cuda + cpu functions)

Project was updated for VS2013 and CUDA SDK 6.5 add also a --cputest function to dump cpu hash results TODO: x15 is not fully functional, but first loop seems ok Signed-off-by: Tanguy Pruvot <[email protected]>
SilentMajortity · Aug 12, 2014 · 06763c2 · 06763c2
1 parent df840b7
commit 06763c2
Show file tree

Hide file tree

Showing 32 changed files with 8,179 additions and 436 deletions.
diff --git a/Makefile.am b/Makefile.am
@@ -29,16 +29,18 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  groestlcoin.cpp cuda_groestlcoin.cu cuda_groestlcoin.h \
 			  myriadgroestl.cpp cuda_myriadgroestl.cu \
 			  JHA/jackpotcoin.cu JHA/cuda_jha_keccak512.cu \
-			  JHA/cuda_jha_compactionTest.cu quark/cuda_quark_checkhash.cu \
+			  JHA/cuda_jha_compactionTest.cu quark/cuda_checkhash.cu \
 			  quark/cuda_jh512.cu quark/cuda_quark_blake512.cu quark/cuda_quark_groestl512.cu quark/cuda_skein512.cu \
 			  quark/cuda_bmw512.cu quark/cuda_quark_keccak512.cu quark/quarkcoin.cu quark/animecoin.cu \
 			  quark/cuda_quark_compactionTest.cu \
 			  cuda_nist5.cu \
 			  sph/cubehash.c sph/echo.c sph/luffa.c sph/shavite.c sph/simd.c \
 			  sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
+			  sph/shabal.c sph/whirlpool.c \
 			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
 			  x11/x11.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
-			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu
+			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
+			  x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu
 
 ccminer_LDFLAGS		= $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
 ccminer_LDADD		= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@

diff --git a/README.md b/README.md
@@ -2,3 +2,6 @@ ccminer
 =======
 
 Christian Buchner's &amp; Christian H.'s CUDA miner project
+
+Fork by tpruvot@github with X14 support
+   BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
diff --git a/README.txt b/README.txt
@@ -19,6 +19,10 @@ If you find this tool useful and like to support its continued
    SFR donation address: SR4b87aEnPfTs77bo9NnnaV21fiF6jQpAp
    MNC donation address: MShgNUSYwybEbXLvJUtdNg1a7rUeiNgooK
    BTQ donation address: 13GFwLiZL2DaA9XeE733PNrQX5QYLFsonS
+
+X14/X15 (tpruvot@github)
+   BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo
+
 ***************************************************************
 
 >>> Introduction <<<
@@ -88,6 +92,7 @@ its command line interface and options.
   -P, --protocol-dump   verbose dump of protocol-level activities
   -B, --background      run the miner in the background
       --benchmark       run in offline benchmark mode
+      --cputest         debug hashes from cpu algorithms
   -c, --config=FILE     load a JSON-format configuration file
   -V, --version         display version information and exit
   -h, --help            display this help text and exit

diff --git a/ccminer.sln b/ccminer.sln
@@ -1,6 +1,7 @@
-
-Microsoft Visual Studio Solution File, Format Version 11.00
-# Visual Studio 2010
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2013
+VisualStudioVersion = 12.0.30723.0
+MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ccminer", "ccminer.vcxproj", "{36DC07F9-A4A6-4877-A146-1B960083CF6F}"
 EndProject
 Global

diff --git a/ccminer.vcxproj b/ccminer.vcxproj
diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters
@@ -58,6 +58,9 @@
     <Filter Include="Source Files\CUDA\x13">
       <UniqueIdentifier>{d67a2af7-4851-4d21-910e-87791bc8ee35}</UniqueIdentifier>
     </Filter>
+    <Filter Include="Source Files\CUDA\x15">
+      <UniqueIdentifier>{a2403c22-6777-46ab-a55a-3fcc7386c974}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="compat\jansson\dump.c">
@@ -153,6 +156,15 @@
     <ClCompile Include="sph\hamsi_helper.c">
       <Filter>Source Files\sph</Filter>
     </ClCompile>
+    <ClCompile Include="sph\shabal.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\whirlpool.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
+    <ClCompile Include="sph\x15_helper.c">
+      <Filter>Source Files\sph</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="compat.h">
@@ -254,6 +266,12 @@
     <ClInclude Include="sph\sph_hamsi.h">
       <Filter>Header Files\sph</Filter>
     </ClInclude>
+    <ClInclude Include="sph\sph_shabal.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
+    <ClInclude Include="sph\sph_whirlpool.h">
+      <Filter>Header Files\sph</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <CudaCompile Include="cuda_fugue256.cu">
@@ -268,9 +286,6 @@
     <CudaCompile Include="JHA\jackpotcoin.cu">
       <Filter>Source Files\CUDA\JHA</Filter>
     </CudaCompile>
-    <CudaCompile Include="quark\cuda_quark_checkhash.cu">
-      <Filter>Source Files\CUDA\quark</Filter>
-    </CudaCompile>
     <CudaCompile Include="cuda_myriadgroestl.cu">
       <Filter>Source Files\CUDA</Filter>
     </CudaCompile>
@@ -361,5 +376,20 @@
     <CudaCompile Include="x13\x13.cu">
       <Filter>Source Files\CUDA\x13</Filter>
     </CudaCompile>
+    <CudaCompile Include="quark\cuda_checkhash.cu">
+      <Filter>Source Files\CUDA</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x15\x14.cu">
+      <Filter>Source Files\CUDA\x15</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x15\cuda_x14_shabal512.cu">
+      <Filter>Source Files\CUDA\x15</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x15\x15.cu">
+      <Filter>Source Files\CUDA\x15</Filter>
+    </CudaCompile>
+    <CudaCompile Include="x15\cuda_x15_whirlpool.cu">
+      <Filter>Source Files\CUDA\x15</Filter>
+    </CudaCompile>
   </ItemGroup>
 </Project>
diff --git a/compat/thrust/system/detail/generic/find.inl b/compat/thrust/system/detail/generic/find.inl
@@ -86,7 +86,7 @@ InputIterator find_if(thrust::execution_policy<DerivedPolicy> &exec,
 
     // TODO incorporate sizeof(InputType) into interval_threshold and round to multiple of 32
     const difference_type interval_threshold = 1 << 20;
-    const difference_type interval_size = (std::min)(interval_threshold, n);
+    const difference_type interval_size = min(interval_threshold, n);
 
     // force transform_iterator output to bool
     typedef thrust::transform_iterator<Predicate, InputIterator, bool> XfrmIterator;

diff --git a/cpu-miner.c b/cpu-miner.c
@@ -21,6 +21,7 @@
 #include <time.h>
 #ifdef WIN32
 #include <windows.h>
+#include <stdint.h>
 #else
 #include <errno.h>
 #include <signal.h>
@@ -133,6 +134,8 @@ typedef enum {
 	ALGO_NIST5,
 	ALGO_X11,
 	ALGO_X13,
+	ALGO_X14,
+	ALGO_X15,
 	ALGO_DMD_GR,
 } sha256_algos;
 
@@ -148,6 +151,8 @@ static const char *algo_names[] = {
 	"nist5",
 	"x11",
 	"x13",
+	"x14",
+	"x15",
 	"dmd-gr",
 };
 
@@ -222,6 +227,8 @@ Options:\n\
                         nist5     NIST5 (TalkCoin) hash\n\
                         x11       X11 (DarkCoin) hash\n\
                         x13       X13 (MaruCoin) hash\n\
+                        x14       X14 hash\n\
+                        x15       X15 hash\n\
                         dmd-gr    Diamond-Groestl hash\n\
   -d, --devices         takes a comma separated list of CUDA devices to use.\n\
                         Device IDs start counting from 0! Alternatively takes\n\
@@ -258,6 +265,7 @@ Options:\n\
 #endif
 "\
       --benchmark       run in offline benchmark mode\n\
+      --cputest         debug hashes from cpu algorithms\n\
   -c, --config=FILE     load a JSON-format configuration file\n\
   -V, --version         display version information and exit\n\
   -h, --help            display this help text and exit\n\
@@ -278,6 +286,7 @@ static struct option const options[] = {
 	{ "background", 0, NULL, 'B' },
 #endif
 	{ "benchmark", 0, NULL, 1005 },
+	{ "cputest", 0, NULL, 1006 },
 	{ "cert", 1, NULL, 1001 },
 	{ "config", 1, NULL, 'c' },
 	{ "debug", 0, NULL, 'D' },
@@ -924,6 +933,17 @@ static void *miner_thread(void *userdata)
 			                      max_nonce, &hashes_done);
 			break;
 
+		case ALGO_X14:
+			rc = scanhash_x14(thr_id, work.data, work.target,
+				max_nonce, &hashes_done);
+			break;
+
+		case ALGO_X15:
+			rc = scanhash_x15(thr_id, work.data, work.target,
+				max_nonce, &hashes_done);
+			exit(0);
+			break;
+
 		default:
 			/* should never happen */
 			goto out;
@@ -1345,6 +1365,10 @@ static void parse_arg (int key, char *arg)
 		want_stratum = false;
 		have_stratum = false;
 		break;
+	case 1006:
+		print_hash_tests();
+		exit(0);
+		break;
 	case 1003:
 		want_longpoll = false;
 		break;
@@ -1481,26 +1505,26 @@ static void signal_handler(int sig)
 }
 #endif
 
-#define PROGRAM_VERSION "1.2"
+#define PROGRAM_VERSION "1.2-VC12"
 int main(int argc, char *argv[])
 {
 	struct thr_info *thr;
 	long flags;
 	int i;
 
+	printf("*** ccMiner for nVidia GPUs by Christian Buchner and Christian H. ***\n");
+	printf("\t This is version "PROGRAM_VERSION" (tpruvot@github)\n");
 #ifdef WIN32
-	SYSTEM_INFO sysinfo;
+	printf("\t  Built with VC++ 2013 and nVidia CUDA SDK 6.5 RC (DC 5.0)\n\n");
+#else
+	printf("\t  Built with the nVidia CUDA SDK 6.5 RC\n\n");
 #endif
-
-	printf("     *** ccMiner for nVidia GPUs by Christian Buchner and Christian H. ***\n");
-	printf("\t             This is version "PROGRAM_VERSION" (beta)\n");
 	printf("\t  based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n");
-	printf("\t  based on pooler-cpuminer extension for HVC from\n\t       https://github.com/heavycoin/cpuminer-heavycoin\n");
-	printf("\t\t\tand\n\t       http://hvc.1gh.com/\n");
+	printf("\t  based on pooler-cpuminer extension for HVC from http://hvc.1gh.com/" "\n\n");
 	printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n");
-	printf("\t  LTC donation address: LKS1WDKGED647msBQfLBHV3Ls8sveGncnm\n");
 	printf("\t  BTC donation address: 16hJF5mceSojnTD3ZTUDqdRhDyPJzoRakM\n");
-	printf("\t  YAC donation address: Y87sptDEcpLkLeAuex6qZioDbvy1qXZEj4\n");
+	printf("\tCuda X14 and X15 added by Tanguy Pruvot (also in cpuminer-multi)\n");
+	printf("\t  BTC donation address: 1AJdfCpLWPNoAMDfHF1wD5y8VgKSSTHxPo\n\n");
 
 	rpc_user = strdup("");
 	rpc_pass = strdup("");

diff --git a/cpuminer-config.h b/cpuminer-config.h
@@ -152,7 +152,7 @@
 #define PACKAGE_NAME "ccminer"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 2014.06.15"
+#define PACKAGE_STRING "ccminer 2014.08.12"
 
 /* Define to the one symbol short name of this package. */
 #undef PACKAGE_TARNAME
@@ -161,7 +161,7 @@
 #undef PACKAGE_URL
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "2014.06.15"
+#define PACKAGE_VERSION "2014.08.12-VC12"
 
 /* If using the C implementation of alloca, define if you know the
    direction of stack growth for your system; otherwise it will be

diff --git a/heavy/heavy.cu b/heavy/heavy.cu
@@ -1,10 +1,15 @@
-#include <string.h>
-#include <openssl/sha.h>
 #include <cuda.h>
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
+
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+
 #include <map>
 
+#include <openssl/sha.h>
+
 #ifndef _WIN32
 #include <unistd.h>
 #endif
@@ -337,7 +342,7 @@ int scanhash_heavy_cpp(int thr_id, uint32_t *pdata,
     blake512_cpu_setBlock(pdata, blocklen);
 
     do {
-        int i;
+        uint32_t i;
 
         ////// Compaction init
         thrust::device_ptr<uint32_t> devNoncePtr(d_nonceVector[thr_id]);

diff --git a/miner.h b/miner.h
@@ -243,6 +243,14 @@ extern int scanhash_x13(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done);
 
+extern int scanhash_x14(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
+extern int scanhash_x15(int thr_id, uint32_t *pdata,
+	const uint32_t *ptarget, uint32_t max_nonce,
+	unsigned long *hashes_done);
+
 extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len);
 extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len);
 extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len);
@@ -342,6 +350,12 @@ extern void *tq_pop(struct thread_q *tq, const struct timespec *abstime);
 extern void tq_freeze(struct thread_q *tq);
 extern void tq_thaw(struct thread_q *tq);
 
+void print_hash_tests(void);
+void x11hash(void *output, const void *input);
+void x13hash(void *output, const void *input);
+void x14hash(void *output, const void *input);
+void x15hash(void *output, const void *input);
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/quark/cuda_bmw512.cu b/quark/cuda_bmw512.cu
@@ -23,6 +23,13 @@ static __device__ uint32_t cuda_swab32(uint32_t x)
 {
 	return __byte_perm(x, 0, 0x0123);
 }
+
+// das Hi Word in einem 64 Bit Typen ersetzen
+static __device__ unsigned long long REPLACE_HIWORD(const unsigned long long &x, const uint32_t &y) {
+	return (x & 0xFFFFFFFFULL) | (((unsigned long long)y) << 32ULL);
+}
+
+#if 0
 // Endian Drehung für 64 Bit Typen
 static __device__ unsigned long long cuda_swab64(unsigned long long x) {
     uint32_t h = (x >> 32);
@@ -39,11 +46,6 @@ static __device__ uint32_t HIWORD(const unsigned long long &x) {
 #endif
 }
 
-// das Hi Word in einem 64 Bit Typen ersetzen
-static __device__ unsigned long long REPLACE_HIWORD(const unsigned long long &x, const uint32_t &y) {
-	return (x & 0xFFFFFFFFULL) | (((unsigned long long)y) << 32ULL);
-}
-
 // das Lo Word aus einem 64 Bit Typen extrahieren
 static __device__ uint32_t LOWORD(const unsigned long long &x) {
 #if __CUDA_ARCH__ >= 130
@@ -66,6 +68,7 @@ static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI)
 static __device__ unsigned long long REPLACE_LOWORD(const unsigned long long &x, const uint32_t &y) {
 	return (x & 0xFFFFFFFF00000000ULL) | ((unsigned long long)y);
 }
+#endif
 
 // der Versuch, einen Wrapper für einen aus 32 Bit Registern zusammengesetzten uin64_t Typen zu entferfen...
 #if 1

diff --git a/quark/cuda_quark_checkhash.cu → quark/cuda_checkhash.cu b/quark/cuda_quark_checkhash.cu → quark/cuda_checkhash.cu
@@ -3,14 +3,10 @@
 #include "device_launch_parameters.h"
 
 #include <stdio.h>
+#include <stdint.h>
 #include <memory.h>
 
-// Folgende Definitionen später durch header ersetzen
-typedef unsigned char uint8_t;
-typedef unsigned int uint32_t;
-typedef unsigned long long uint64_t;
-
-// das Hash Target gegen das wir testen sollen
+// Hash Target gegen das wir testen sollen
 __constant__ uint32_t pTarget[8];
 
 uint32_t *d_resNounce[8];
@@ -19,7 +15,7 @@ uint32_t *h_resNounce[8];
 // aus heavy.cu
 extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
 
-__global__ void quark_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
+__global__ void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
 {
 	int thread = (blockDim.x * blockIdx.x + threadIdx.x);
 	if (thread < threads)
@@ -89,7 +85,7 @@ __host__ uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t star
 	// Größe des dynamischen Shared Memory Bereichs
 	size_t shared_size = 0;
 
-	quark_check_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]);
+	cuda_check_gpu_hash_64 <<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]);
 
 	// Strategisches Sleep Kommando zur Senkung der CPU Last
 	MyStreamSynchronize(NULL, order, thr_id);