makeandparams.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

diff --unified --recursive --text mfaktc-0.21/src/Makefile mfaktc-0.21-new/src/Makefile
--- mfaktc-0.21/src/Makefile	2015-02-17 15:12:49.305144447 -0500
+++ mfaktc-0.21-new/src/Makefile	2020-12-06 16:39:26.876217215 -0500
@@ -1,5 +1,5 @@
 # where is the CUDA Toolkit installed?
-CUDA_DIR = /usr/local/cuda
+CUDA_DIR = /opt/cuda
 CUDA_INCLUDE = -I$(CUDA_DIR)/include/
 CUDA_LIB = -L$(CUDA_DIR)/lib64/
 
@@ -13,11 +13,15 @@
 NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v
 
 # generate code for various compute capabilities
-NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc)
-NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all!
-NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code 
-NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.5 (3.2?) _CAN_ use funnel shift which is useful for mfaktc
-NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code
+# note, CUDA 11.1 supports CC 3.5+ (some Kepler, Maxwell, Pascal, Volta, Turing and Ampere)
+NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # CC 3.5 and 3.7 cards will use this code (some Kepler cards)
+NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code (Maxwell)
+NVCCFLAGS += --generate-code arch=compute_60,code=sm_60
+NVCCFLAGS += --generate-code arch=compute_61,code=sm_61 # CC 6.x GPUs will use this code (Pascal)
+NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 # Titan V and Tesla V100 will use this code
+NVCCFLAGS += --generate-code arch=compute_75,code=sm_75 # CC 7.5 GPU's will use this code (Volta)
+NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 # CC 8.0 GPU's will use this code (Tesla A100)
+NVCCFLAGS += --generate-code arch=compute_86,code=sm_86 # CC 8.6 GPU's will use this code (Ampere)
 
 # pass some options to the C host compiler (e.g. gcc on Linux)
 NVCCFLAGS += --compiler-options=-Wall
diff --unified --recursive --text mfaktc-0.21/src/params.h mfaktc-0.21-new/src/params.h
--- mfaktc-0.21/src/params.h	2015-02-17 15:12:49.322145383 -0500
+++ mfaktc-0.21-new/src/params.h	2020-12-06 16:51:24.334385208 -0500
@@ -175,7 +175,7 @@
 
 #define GPU_SIEVE_SIZE_MIN                   4 /* A 4M bit sieve seems like a reasonable minimum */
 #define GPU_SIEVE_SIZE_DEFAULT              64 /* Default is a 16M bit sieve */
-#define GPU_SIEVE_SIZE_MAX                 128 /* We've only tested up to 128M bits.  The GPU sieve code may be able to go higher. */
+#define GPU_SIEVE_SIZE_MAX                 2047 /* We've only tested up to 128M bits.  The GPU sieve code may be able to go higher. */
 
 #define GPU_SIEVE_PROCESS_SIZE_MIN           8 /* Processing 8K bits in each block is minimum (256 threads * 1 word of 32 bits) */
 #define GPU_SIEVE_PROCESS_SIZE_DEFAULT      16 /* Default is processing 8K bits */