1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
|
# SievePrimes defines how far the factor candidates (FCs) are presieved on
# the CPU. The first <SievePrimes> odd primes are used to sieve the FCs.
#
# Minimum: SievePrimes=2000 (check SievePrimesMin)
# Maximum: SievePrimes=200000
#
# Default: SievePrimes=25000
SievePrimes=25000
# Set this to 1 to enable automatically adjustments of SievePrimes during
# runtime based on the "average wait times".
#
# Default: SievePrimesAdjust=1
SievePrimesAdjust=1
# Set the minimum and maximum limit for SievePrimes, this is needed if
# SievePrimesAdjust is enabled. These are soft limits, there are hard limits
# in the code which should never ever be changed.
#
# Current limits are:
# 2000 <= SievePrimesMin <= SievePrimes <= SievePrimesMax <= 200000
#
# Default: SievePrimesMin=5000
# SievePrimesMax=100000
SievePrimesMin=5000
SievePrimesMax=100000
# Set the number of CUDA streams / data sets used by mfaktc.
# NumStreams must be >= 1. In this case mfaktc can process one stream /
# data set on the GPU while the GPU can preprocess the other one. When
# NumStreams is >= 2 than the time needed to upload (CPU->GPU transfer)
# the data sets can be hidden (if the hardware supports it (most GPUs are
# supporting this)).
# On Linux systems 2 or 3 seems a good numbers. There are comments that
# Windows systems need a greater number of streams.
# A greater number increases the memory consumed by mfaktc (host and GPU
# memory). The current limit for the number of streams is 10!
# Don't be too greedy with this value, high values are usualy just a waste
# of memory.
#
# Minimum: NumStreams=1
# Maximum: NumStreams=10
#
# Default: NumStreams=3
NumStreams=3
# Set the number of data sets which can be preprocessed on CPU. This allows
# to tolerate more jitter on runtime of preprocessing and GPU stream
# runtime.
# Don't be too greedy with this value, high values are usualy just a waste
# of memory.
#
# Minimum: CPUStreams=1
# Maximum: CPUStreams=5
#
# Default: CPUStreams=3
CPUStreams=3
# The GridSize affects the number of threads per grid.
# Depending on the number of multiprocessors of your GPU, too, the
# automatic parameter threads per grid is set to:
# GridSize = 0: 65536 < threads per grid <= 131072
# GridSize = 1: 131072 < threads per grid <= 262144
# GridSize = 2: 262144 < threads per grid <= 524288
# GridSize = 3: 524288 < threads per grid <= 1048576 (default)
# A smaller GridSize has more overhead than a bigger GridSize for long
# running jobs. For really small jobs there can be a small benefit on
# computation time if the GridSize is small. A smaller GridSize directly
# reduces the runtime per kernel launch and might result in a better
# interactivity one older GPUs.
#
# Default: GridSize=3
GridSize=3
# Checkpoints = 0: disable checkpoints
# Checkpoints = 1: enable checkpoints
# Checkpoints are needed for resume capability, after a class is finished a
# checkpoint file is written. When mfaktc is interrupted during the run and
# restarted later it will begin at the last processed class.
#
# Default: Checkpoints=1
Checkpoints=1
# CheckpointDelay is the time in seconds between two checkpoint writes.
# Allowed values are 0 <= CheckpointDelay <= 900.
#
# Minimum: CheckpointDelay=0
# Maximum: CheckpointDelay=900
#
# Default: CheckpointDelay=30
CheckpointDelay=30
# WorkFileAddDelay defines, how often mfaktc should check for worktodo.add.
# When a worktodo.add is deteced it will wait WorkFileAddDelay seconds until
# processing it.
#
# Minimum: WorkFileAddDelay=30 (WorkFileAddDelay=0 disables this feature)
# Maximum: WorkFileAddDelay=3600
#
# Default: WorkFileAddDelay=600
WorkFileAddDelay=600
# Allow to split an assignment into multiple bit ranges.
# 0 = disabled
# 1 = enabled
# Enabled Stages make only sense when StopAfterFactor is 1 or 2.
# Do not change this in the middle of a run which spans over multiple
# bitlevels, in this case mfaktc will ignore the checkpoint file and
# restarts from the beginning.
#
# Default: Stages=1
Stages=1
# possible values for StopAfterFactor:
# 0: Do not stop the current assignment after a factor was found.
# 1: When a factor was found for the current assignment stop after the
# current bitlevel. This makes only sense when Stages is enabled.
# 2: When a factor was found for the current assignment stop after the
# current class.
#
# Default: StopAfterFactor=1
StopAfterFactor=1
# possible values for PrintMode:
# 0: print a new line for each finished class
# 1: overwrite the current line (more compact output)
#
# Default: PrintMode=0
PrintMode=0
# allow the CPU to sleep if nothing can be preprocessed?
# 0: Do not sleep if the CPU must wait for the GPU
# 1: The CPU can sleep for a short time if it has to wait for the GPU
#
# Default: AllowSleep=0
AllowSleep=0
# if V5UserID and ComputerID are specified, then the result lines in the
# results file will have the prefix "UID: user/host, " - the same way as
# prime95 does it.
#
# Default: none (unset)
#
#V5UserID=TheJudger
#ComputerID=Ananke
# TimeStampInResults allows to configure if each output line in the results
# file should be preceeded with a date-and-time stamp (similar to prime95)
#
# Default: TimeStampInResults=0
TimeStampInResults=0
# PrintFormat allows to customize the progress output of mfaktc. You can use
# any combination of the following format specifications, which will be
# replaced correspondingly in the progress line:
#
# %C - class ID (n/420) or (n/4620) "%4d"
# %c - class number (n/96) or (n/960) "%3d"
# %p - percent complete (%) "%5.1f"
# %g - GHz-days/day (GHz) "%7.2f"
# %t - time per class (s) "%6.0f" / "%6.1f" / "%6.2f" / "%6.3f"
# %e - eta (d/h/m/s) "%2dm%02ds" / "%2dh%02dm" / "%2dd%02dh"
# %n - number of candidates (M/G) "%6.2fM"/"%6.2fG"
# %r - rate (M/s) "%6.2f" / "%6.1f"
# %s - SievePrimes "%7d"
# %w - CPU wait time for GPU (us) n.a. (mfakto only!)
# %W - CPU wait % (%) "6.2f"
# %d - date (Mon nn) "%b %d" (strftime format)
# %T - time (HH:MM) "%H:%M" (strftime format)
# %U - username (as configured) "%s" no fixed width
# %H - hostname (as configured) "%s" no fixed width
# %M - the exponent being worked on "%-10u"
# %l - the lower bit-limit "%2d"
# %u - the upper bit-limit "%2d"
#
# Using the ProgressHeader you can specify a fix string that is displayed as
# a header to the progress without any modification.
# Keep in mind that "number of candidates (M/G)" and "rate (M/s)" are NOT
# compareable between CPU- and GPU-sieving. When sieving is done on GPU
# those number count all factor candidates prior to sieving while CPU
# sieving counts the numbers after the sieving process.
#
# mfaktc 0.20+ (default)
ProgressHeader=Date Time | class Pct | time ETA | GHz-d/day Sieve Wait
ProgressFormat=%d %T | %C %p%% | %t %e | %g %s %W%%
# old mfaktc 0.18/0.19 style
#
#ProgressHeader= class | candidates | time | ETA | avg. rate | SievePrimes | CPU wait
#ProgressFormat=%C/4620 | %n | %ts | %e | %rM/s | %s | %W%%
# print everything
#ProgressHeader=[date time] exponent [TF bits]: percent class #, seq | GHZ | time | ETA | #FCs | rate | SieveP. | CPU wait | V5UserID@ComputerID
#ProgressFormat=[%d %T] M%M [%l-%u]: %p%% %C/4620,%c/960 | %g | %ts | %e | %n | %rM/s | %s | %W%% | %U@%H
# enable or disable GPU sieving
# SieveOnGPU=0: use the CPU for sieving factor candidates (high CPU usage)
# SieveOnGPU=1: use the GPU for sieving factor candidates (very low CPU usage)
# GPU sieving is supported on GPUs with compute capability 2.0 or higher.
# (e.g. Geforce 400 series or newer)
#
# Default: SieveOnGPU=1
SieveOnGPU=1
# GPUSievePrimes defines how far we sieve the factor candidates on the GPU.
# The first <GPUSievePrimes> primes are sieved.
#
# Minimum: GPUSievePrimes=0
# Maximum: GPUSievePrimes=1075000
#
# Default: GPUSievePrimes=82486
GPUSievePrimes=82486
# GPUSieveSize defines how big of a GPU sieve we use (in M bits).
#
# Minimum: GPUSieveSize=4
# Maximum: GPUSieveSize=2047
#
# Default: GPUSieveSize=64
GPUSieveSize=2047
# GPUSieveProcessSize defines how far many bits of the sieve each TF block
# processes (in K bits). Larger values may lead to less wasted cycles by
# reducing the number of times all threads in a warp are not TFing a
# candidate. However, more shared memory is used which may reduce occupancy.
# Smaller values should lead to a more responsive system (each kernel takes
# less time to execute). GPUSieveProcessSize must be a multiple of 8.
#
# Minimum: GPUSieveProcessSize=8
# Maximum: GPUSieveProcessSize=32
#
# Default: GPUSieveProcessSize=16
GPUSieveProcessSize=32
|