1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
diff --unified --recursive --text mfaktc-0.21/src/Makefile mfaktc-0.21-new/src/Makefile
--- mfaktc-0.21/src/Makefile 2015-02-17 15:12:49.305144447 -0500
+++ mfaktc-0.21-new/src/Makefile 2020-12-06 16:39:26.876217215 -0500
@@ -1,5 +1,5 @@
# where is the CUDA Toolkit installed?
-CUDA_DIR = /usr/local/cuda
+CUDA_DIR = /opt/cuda
CUDA_INCLUDE = -I$(CUDA_DIR)/include/
CUDA_LIB = -L$(CUDA_DIR)/lib64/
@@ -13,11 +13,15 @@
NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v
# generate code for various compute capabilities
-NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc)
-NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all!
-NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code
-NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.5 (3.2?) _CAN_ use funnel shift which is useful for mfaktc
-NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code
+# note, CUDA 11.1 supports CC 3.5+ (some Kepler, Maxwell, Pascal, Volta, Turing and Ampere)
+NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # CC 3.5 and 3.7 cards will use this code (some Kepler cards)
+NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code (Maxwell)
+NVCCFLAGS += --generate-code arch=compute_60,code=sm_60
+NVCCFLAGS += --generate-code arch=compute_61,code=sm_61 # CC 6.x GPUs will use this code (Pascal)
+NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 # Titan V and Tesla V100 will use this code
+NVCCFLAGS += --generate-code arch=compute_75,code=sm_75 # CC 7.5 GPU's will use this code (Volta)
+NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 # CC 8.0 GPU's will use this code (Tesla A100)
+NVCCFLAGS += --generate-code arch=compute_86,code=sm_86 # CC 8.6 GPU's will use this code (Ampere)
# pass some options to the C host compiler (e.g. gcc on Linux)
NVCCFLAGS += --compiler-options=-Wall
diff --unified --recursive --text mfaktc-0.21/src/params.h mfaktc-0.21-new/src/params.h
--- mfaktc-0.21/src/params.h 2015-02-17 15:12:49.322145383 -0500
+++ mfaktc-0.21-new/src/params.h 2020-12-06 16:51:24.334385208 -0500
@@ -175,7 +175,7 @@
#define GPU_SIEVE_SIZE_MIN 4 /* A 4M bit sieve seems like a reasonable minimum */
#define GPU_SIEVE_SIZE_DEFAULT 64 /* Default is a 16M bit sieve */
-#define GPU_SIEVE_SIZE_MAX 128 /* We've only tested up to 128M bits. The GPU sieve code may be able to go higher. */
+#define GPU_SIEVE_SIZE_MAX 2047 /* We've only tested up to 128M bits. The GPU sieve code may be able to go higher. */
#define GPU_SIEVE_PROCESS_SIZE_MIN 8 /* Processing 8K bits in each block is minimum (256 threads * 1 word of 32 bits) */
#define GPU_SIEVE_PROCESS_SIZE_DEFAULT 16 /* Default is processing 8K bits */
|