-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathargon2.nim
555 lines (437 loc) · 19.8 KB
/
argon2.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
import std/[base64, sequtils, strformat, strutils]
import blake2b
#[
General:
+ Argon2 RFC: https://www.rfc-editor.org/rfc/rfc9106.txt
+ This implementation is based heavily on https://cs.opensource.google/go/x/crypto/+/master:argon2/
+ Thoroughly commented for algorithm flow and comprehension
Modes:
+ Argon2d (Data-Dependent Memory Access):
Argon2d uses data-dependent memory access, which means the sequence of references to memory blocks depends on the block contents.
Faster but more vulnerable to side-channel attacks.
+ Argon2i (Data-Independent Memory Access):
Argon2i uses data-independent memory access, meaning the sequence of references to the memory blocks is precomputed and independent of the actual block contents.
Provides resistence to side-channel attacks.
+ Argon2id (Hybrid):
Argon2id combines both approaches: it starts like Argon2i (data-independent) and then switches to Argon2d (data-dependent).
This mode aims to provide a balance between resistance to side-channel attacks and computational efficiency (GPU attacks).
Parameters: (via RFC)
+ Message string P, which is a password for password hashing
applications. It MUST have a length not greater than 2^(32)-1
bytes.
+ Nonce S, which is a salt for password hashing applications. It
MUST have a length not greater than 2^(32)-1 bytes. 16 bytes is
RECOMMENDED for password hashing. The salt SHOULD be unique for
each password.
+ Degree of parallelism p determines how many independent (but
synchronizing) computational chains (lanes) can be run. It MUST
be an integer value from 1 to 2^(24)-1.
+ Tag length T MUST be an integer number of bytes from 4 to 2^(32)-
1.
+ Memory size m MUST be an integer number of kibibytes from 8*p to
2^(32)-1. The actual number of blocks is m', which is m rounded
down to the nearest multiple of 4*p.
+ Number of passes t (used to tune the running time independently of
the memory size) MUST be an integer number from 1 to 2^(32)-1.
+ Version number v MUST be one byte 0x13.
Secret value K is OPTIONAL. If used, it MUST have a length not
greater than 2^(32)-1 bytes.
Associated data X is OPTIONAL. If used, it MUST have a length not
greater than 2^(32)-1 bytes.
Type y MUST be 0 for Argon2d, 1 for Argon2i, or 2 for Argon2id.
Configuration:
OWASP recommends Argon2id with these settings: (https://raw.githubusercontent.com/OWASP/CheatSheetSeries/master/cheatsheets/Password_Storage_Cheat_Sheet.md)
m=47104 (46 MiB), t=1, p=1 (Do not use with Argon2i)
m=19456 (19 MiB), t=2, p=1 (Do not use with Argon2i)
m=12288 (12 MiB), t=3, p=1
m=9216 (9 MiB), t=4, p=1
m=7168 (7 MiB), t=5, p=1
"These configuration settings provide an equal level of defense, and the only difference is a trade off between CPU and RAM usage."
TODO:
+ add memory clearing (clear/reset proc?)
+ optimizations and inlining
]#
const
blockSize: int = 128
blockSizeInBytes: int = 1024
blake2bMaxDigestSize: int = 64
# NOTE: synchronization points are specific positions in the algorithm's execution where the memory
# state is synchronized across different computational threads or lanes.
syncPoints: uint32 = 4
Argon2_Version_1_3: int = 0x13 # 19 in decimal
Argon2_Version_1_2_1: int = 0x10 # 16 in decimal
type
Word = uint64
Block = array[blockSize, Word] # 128(Words per Block) × 8(bytes per Word) = 1024 byte(block size)
MemoryArray = seq[Block]
Mode = enum
ARGON2D, ARGON2I, ARGON2ID # 0, 1, 2 in decimal
Argon2Params = object
password: seq[byte]
salt: seq[byte]
secret: seq[byte]
assocData: seq[byte]
memoryCost: uint32 # memory cost (in kibibytes)
timeCost: uint32 # time cost (number of iterations)
parallelism: uint32 # degree of parallelism (number of threads)
digestSize: uint32 # desired length of the final hash
version: int
mode: Mode
Argon2Ctx = object
params: Argon2Params
mArray: MemoryArray
memory: ptr MemoryArray
include blamka
##########################################################################
proc validateArgon2Params(params: Argon2Params) =
if not (params.parallelism > 0 and params.parallelism <= (1 shl 24) - 1):
raise newException(ValueError, "Parallelism must be between 1 and 2^24 - 1")
# NOTE: must be a power of 2 greater than 1
if not (params.memoryCost >= 8 * params.parallelism and params.memoryCost <= (1 shl 32) - 1):
raise newException(ValueError, "Memory cost must be between 8 * parallelism and 2^32 - 1")
if not (params.timeCost > 0 and params.timeCost <= (1 shl 32) - 1):
raise newException(ValueError, "Time cost must be between 1 and 2^32 - 1")
if not (params.digestSize >= 4 and params.digestSize <= (1 shl 32) - 1):
raise newException(ValueError, "Hash length must be between 4 and 2^32 - 1")
if not (params.version in {Argon2_Version_1_3, Argon2_Version_1_2_1}):
raise newException(ValueError, "Version must be either 0x13 (19) or 0x10 (16)")
if not (params.mode in {ARGON2D, ARGON2I, ARGON2ID}):
raise newException(ValueError, "Mode must be one of ARGON2D (0), ARGON2I (1), or ARGON2ID (2)")
proc toBytesLE(value: uint32): seq[byte] =
## convert a 32-bit unsigned integer to a sequence of bytes in little-endian order
result = newSeq[byte](sizeof(value))
for i in 0 ..< sizeof(value):
result[i] = byte((value shr (8 * i)) and 0xFF)
proc bytesToBlock(bytes: seq[byte]): Block =
# NOTE: input byte sequence must be 1024 bytes long for Block conversion
var word: Word
for i in 0 ..< blockSize: # 128 Words (uint64) in a Block
word = 0
for j in 0 ..< 8: # 8 bytes in a Word
word = word or (Word(bytes[i * 8 + j]) shl (j * 8))
result[i] = word
return result
proc blockToBytes(blk: Block): seq[byte] =
result = newSeq[byte](blockSizeInBytes)
for i, word in blk:
for j in 0 ..< 8: # 8 bytes in a Word
result[i * 8 + j] = byte((word shr (j * 8)) and 0xFF)
##########################################################################
proc HPrime(input: seq[byte], digestSize: uint32): seq[byte] =
## turns BLAKE2 into a variable-length hash function
result = newSeq[byte](digestSize)
var V: seq[byte]
if digestSize <= 64:
# NOTE: digest size (tag length) is within BLAKE2B's maximum of 64 -> hash input directly
var blake2bCtx = newBlake2bCtx(digestSize = int(digestSize))
blake2bCtx.update(toBytesLE(digestSize) & input)
V = blake2bCtx.digest()
copyMem(addr result[0], addr V[0], digestSize)
else:
# NOTE: digest size (tag length) is larger than BLAKE2B's maximum of 64 -> extend hash artificially
# NOTE: initial hash
var blake2bCtx = newBlake2bCtx(digestSize = 64)
blake2bCtx.update(toBytesLE(digestSize) & input)
V = blake2bCtx.digest()
copyMem(addr result[0], addr V[0], 32)
# NOTE: intermediate hashing (extending)
var remaining = digestSize - 32
var offset: uint32 = 32
while remaining > 64:
blake2bCtx = newBlake2bCtx(digestSize = 64)
blake2bCtx.update(V)
V = blake2bCtx.digest()
let copySize = min(remaining, 32)
copyMem(addr result[offset], addr V[0], copySize)
remaining -= copySize
offset += copySize
# NOTE: final hash
blake2bCtx = newBlake2bCtx(digestSize = int(remaining))
blake2bCtx.update(V)
V = blake2bCtx.digest()
copyMem(addr result[offset], addr V[0], remaining)
return result
##########################################################################
proc initArgon2Params(
password, salt, secret, assocData: openArray[byte],
timeCost, memoryCost, parallelism, digestSize, version: int,
mode: Mode
): Argon2Params =
result.password = toSeq(password)
result.salt = toSeq(salt)
result.secret = toSeq(secret)
result.assocData = toSeq(assocData)
result.parallelism = uint32(parallelism)
result.memoryCost = uint32(memoryCost)
result.timeCost = uint32(timeCost)
result.digestSize = uint32(digestSize)
result.version = version
result.mode = mode
proc initHash(params: Argon2Params): seq[byte] =
## create initial hash based on input parameters
var blake2bCtx = newBlake2bCtx(digestSize = 64)
# NOTE: convert tuning parameters to bytes in little-endian order and update BLAKE2b state
blake2bCtx.update(toBytesLE(params.parallelism))
blake2bCtx.update(toBytesLE(params.digestSize))
blake2bCtx.update(toBytesLE(params.memoryCost))
blake2bCtx.update(toBytesLE(params.timeCost))
blake2bCtx.update(toBytesLE(uint32(params.version)))
blake2bCtx.update(toBytesLE(uint32(params.mode)))
# NOTE: update BLAKE2b state with password, salt, secret, and associated data
blake2bCtx.update(toBytesLE(uint32(params.password.len)))
blake2bCtx.update(params.password)
blake2bCtx.update(toBytesLE(uint32(params.salt.len)))
blake2bCtx.update(params.salt)
blake2bCtx.update(toBytesLE(uint32(params.secret.len)))
if params.secret.len > 0:
blake2bCtx.update(params.secret)
blake2bCtx.update(toBytesLE(uint32(params.assocData.len)))
if params.assocData.len > 0:
blake2bCtx.update(params.assocData)
return blake2bCtx.digest()
proc initMemoryArray(params: var Argon2Params): MemoryArray =
var mPrime = params.memoryCost
# NOTE: adjust memory cost to be a multiple of syncPoints * parallelism
mPrime = mPrime div (syncPoints * params.parallelism) * (syncPoints * params.parallelism)
if mPrime < 2 * syncPoints * params.parallelism:
mPrime = 2 * syncPoints * params.parallelism
params.memoryCost = mPrime
# NOTE: initialize memory array with adjusted number of blocks (cost)
result = newSeq[Block](params.memoryCost)
# NOTE: zeroing the memory is not necessary as Nim initializes with zeros by default
proc initBlocks(ctx: var Argon2Ctx, h0: seq[byte]) =
#[
initializes the first two blocks in each lane
h0: initial hash value
]#
var blk: Block
var bts: seq[byte]
var modifiedH0: seq[byte]
for lane in 0 ..< ctx.params.parallelism:
let laneOffset = lane * (ctx.params.memoryCost div ctx.params.parallelism)
# NOTE: make a copy of the initial hash and extend it
modifiedH0 = h0
# QUESTION: should we set length when the hash is generated?
modifiedH0.setLen(h0.len + 8)
# NOTE: convert the lane number to bytes in little-endian format and add to extended hash
bts = toBytesLE(lane)
copyMem(addr modifiedH0[blake2bMaxDigestSize + 4], addr bts[0], bts.len)
# NOTE: add block index 0 to the extended hash -> compute the block -> update memory
bts = toBytesLE(uint32(0))
copyMem(addr modifiedH0[blake2bMaxDigestSize], addr bts[0], bts.len)
blk = bytesToBlock(HPrime(modifiedH0, uint32(blockSizeInBytes)))
ctx.memory[][laneOffset + 0] = blk
# NOTE: repeat for block index 1
bts = toBytesLE(uint32(1))
copyMem(addr modifiedH0[blake2bMaxDigestSize], addr bts[0], bts.len)
blk = bytesToBlock(HPrime(modifiedH0, uint32(blockSizeInBytes)))
ctx.memory[][laneOffset + 1] = blk
##########################################################################
proc getSyncPoints(mode: Mode): int =
# NOTE: unused as we hardcode to 4 for all modes in line with Go's implementation.
case mode
of ARGON2D:
return 1
of ARGON2I, ARGON2ID:
return 4
proc phi(rand, m, s: uint64; lane, lanes: uint32): uint32 =
#[
calculates a pseudo-random index for memory segment processing
rand: 64-bit pseudo-random value used as the base for index calculation
m: modifier value derived from the number of segments and other parameters
s: start point for the segment calculation, influenced by the slice and segment count
lane: current processing lane
lanes: total number of lanes
returns: uint32 index
]#
# NOTE: extract the lower 32 bits of the random value
var p = rand and 0xFFFFFFFF'u64
# NOTE: square the value and shift right to fit 32-bit
p = (p * p) shr 32
# NOTE: multiply by modifier m and adjust to 32-bit
p = (p * m) shr 32
# NOTE: final index calculation
return lane * lanes + uint32((s + m - (p + 1)) mod lanes)
proc indexAlpha(rand: uint64; lanes, segments, threads, n, slice, lane, index: uint32): uint32 =
#[
computes a reference index within a lane
rand: 64-bit pseudo-random value
lanes: total number of lanes
segments: number of segments per lane
threads: number of threads (parallelism)
n: current pass number
slice: current slice number
lane: current processing lane
index: current index within the lane
returns: uint32 index
]#
# NOTE: determine reference lane
var refLane = uint32(rand shr 32) mod threads
# NOTE: first slice of the first pass
if n == 0 and slice == 0:
refLane = lane
# NOTE: calculate base modifier
var m = 3 * segments
# NOTE: calculate start point
var s = ((slice + 1) mod syncPoints) * segments
# NOTE: adjust m if the current lane is the reference lane
if lane == refLane:
m += index
# NOTE: first pass
if n == 0:
m = slice * segments
s = 0
if slice == 0 or lane == refLane:
m += index
# NOTE: adjust m for the first index or if in the reference lane
if index == 0 or lane == refLane:
m -= 1
return phi(rand, uint64(m), uint64(s), refLane, lanes)
proc processSegment(params: tuple[memory: ptr MemoryArray, n, slice, lane, lanes, segments, parallelism, memoryCost, timeCost: uint32, mode: Mode]) {.thread.} =
## processes a single segment of a lane in the memory array
var addresses, input, zero: Block
# NOTE: initialize the input block for Argon2i or first half of Argon2id
if params.mode == ARGON2I or (params.mode == ARGON2ID and params.n == 0 and params.slice < syncPoints div 2):
input[0] = uint64(params.n)
input[1] = uint64(params.lane)
input[2] = uint64(params.slice)
input[3] = uint64(params.memoryCost)
input[4] = uint64(params.timeCost)
input[5] = uint64(params.mode)
var index: uint32 = 0
if params.n == 0 and params.slice == 0:
# NOTE: skip first two blocks that are already initialized
index = 2
# NOTE: for Argon2i and first half of Argon2id, initialize the address block
if params.mode == ARGON2I or params.mode == ARGON2ID:
inc input[6]
processBlock(addresses, input, zero)
processBlock(addresses, addresses, zero)
# NOTE: calculate the starting offset in the memory array for this segment
var offset = params.lane * params.lanes + params.slice * params.segments + index
var random: uint64
while index < params.segments:
var prev = offset.pred
# NOTE: first block of the first slice
if index == 0 and params.slice == 0:
# NOTE: select last block in lane
prev += params.lanes
# NOTE: generate pseudo-random values for indexing (mode dependant)
if params.mode == ARGON2I or (params.mode == ARGON2ID and params.n == 0 and params.slice < syncPoints div 2):
if int(index) mod blockSize == 0:
# NOTE: refresh the address block periodically
inc input[6]
processBlock(addresses, input, zero)
processBlock(addresses, addresses, zero)
random = addresses[int(index) mod blockSize]
else:
# NOTE: for Argon2d and second half of Argon2id, use the previous block's first word
random = params.memory[][prev][0]
# NOTE: calculate the index of the new block to reference
let newOffset = indexAlpha(random, params.lanes, params.segments, params.parallelism, params.n, params.slice, params.lane, index)
# NOTE: XOR the current block with the referenced block and the previous block
processBlockXOR(params.memory[][offset], params.memory[][prev], params.memory[][newOffset])
inc index
inc offset
proc processBlocks(ctx: Argon2Ctx) =
## spawn threads for parallel computation across different lanes and slices
let lanes = ctx.params.memoryCost div ctx.params.parallelism
let segments = lanes div syncPoints
var threads = newSeq[Thread[tuple[memory: ptr MemoryArray, n, slice, lane, lanes, segments, parallelism, memoryCost, timeCost: uint32, mode: Mode]]](ctx.params.parallelism)
for n in 0 ..< ctx.params.timeCost:
for slice in 0 ..< syncPoints:
for lane in 0 ..< ctx.params.parallelism:
createThread(threads[lane], processSegment, (ctx.memory, n, slice, lane, lanes, segments, ctx.params.parallelism, ctx.params.memoryCost, ctx.params.timeCost, ctx.params.mode))
joinThreads(threads)
##########################################################################
proc base64Encode[T](data: openarray[T]): string =
## convenience proc
return data.encode().strip(leading=false, chars={'='})
proc digest*(ctx: Argon2Ctx): seq[byte] =
## generate final hash from the last block of each lane in memory array
# NOTE: number of blocks in each lane
let lanes = ctx.params.memoryCost div ctx.params.parallelism
var xorFinalBlock: Block
for lane in 0 ..< ctx.params.parallelism:
let blockIndex = lane * lanes + lanes.pred
# NOTE: first lane -> init xorFinalBlock
if lane == 0:
xorFinalBlock = ctx.memory[][blockIndex]
# NOTE: XOR each word of the lane's final block with the corresponding word in xorFinalBlock
else:
for j in 0 ..< blockSize:
xorFinalBlock[j] = xorFinalBlock[j] xor ctx.memory[][blockIndex][j]
# NOTE: convert the XOR-ed final block to bytes
let finalBlockBytes = blockToBytes(xorFinalBlock)
# NOTE: use HPrime to construct the final digest
return HPrime(finalBlockBytes, ctx.params.digestSize)
proc hexDigest*(ctx: Argon2Ctx): string =
## generate hex string of length digestSize * 2
let digest = ctx.digest()
result = newStringOfCap(digest.len + digest.len)
for b in digest:
result.add(b.toHex(2).toLowerAscii())
return result
proc encoded*(ctx: Argon2Ctx): string =
## form encoded string with parameters and base64 encoded salt and digest
let digest = ctx.digest()
let strMode = ($ctx.params.mode).toLowerAscii()
return fmt"${strMode}$v={ctx.params.version}$m={ctx.params.memoryCost},t={ctx.params.timeCost},p={ctx.params.parallelism}${base64Encode(ctx.params.salt)}${base64Encode(digest)}"
##########################################################################
proc newArgon2Ctx*(
password, salt: openArray[byte],
secret, assocData: openArray[byte] = @[],
timeCost: Positive = 2,
memoryCost: Positive = 16,
parallelism: Positive = 1,
digestSize: Positive = 32,
version: Positive = Argon2_Version_1_3,
mode: Mode = ARGON2ID
): Argon2Ctx =
result.params = initArgon2Params(
password, salt, secret, assocData,
timeCost, memoryCost, parallelism,
digestSize,
version,
mode
)
validateArgon2Params(result.params)
# NOTE: create initial hash based on input parameters
let h0 = initHash(result.params)
result.mArray = initMemoryArray(result.params)
result.memory = addr result.mArray
# NOTE: initialize first two blocks in each lane
result.initBlocks(h0)
# NOTE: fill the memory array in parallel
result.processBlocks()
proc newArgon2Ctx*(
password, salt: string,
secret, assocData: string = "",
timeCost: Positive = 2,
memoryCost: Positive = 16,
parallelism: Positive = 1,
digestSize: Positive = 32,
version: Positive = Argon2_Version_1_3,
mode: Mode = ARGON2ID
): Argon2Ctx =
return newArgon2Ctx(
password.toOpenArrayByte(0, password.len.pred),
salt.toOpenArrayByte(0, salt.len.pred),
secret.toOpenArrayByte(0, secret.len.pred),
assocData.toOpenArrayByte(0, assocData.len.pred),
timeCost, memoryCost, parallelism,
digestSize,
version,
mode
)
##########################################################################
when isMainModule:
include testing
proc runTestVectors() =
for v in testVectors:
let ctx = newArgon2Ctx("password", "somesalt",
timeCost=v.time, memoryCost=v.memory, parallelism=v.threads,
digestSize=24,
mode=v.mode)
doAssert ctx.hexDigest() == v.hash
runTestVectors()