-
+ 2D36C51A50632358041E360E0BE17F698BC87DD8CA2BDFED48731991905444B0B687F11930D612A9DEBB3BDFC49439292E6695F2DB05C04B92299BA6791B0C7F
keccak.c
(0 . 0)(1 . 312)
144 /* KECCAK permutation and sponge construction
145 * J. Welsh, May 2019
146 *
147 * Based on Guido Bertoni, Joan Daemen, Michael Peeters and Gilles van Assche
148 * (2011): The KECCAK reference, Version 3.0.
149 */
150
151 #include <stdint.h>
152 #include "io.h"
153
154 /* State is arranged in a 5 x 5 x W bit array, indexed as (x,y,z).
155 * The 1D section for a given (x,y) is called a lane and packed in a lane_t.
156 * The 1D section for a given (x,z) is called a column.
157 * The 2D section for a given z is called a slice.
158 */
159
160 typedef uint64_t lane_t;
161 #define L 6
162 #define W (1 << L) /* lane size = 64 bits */
163 #define B 25*W /* permutation width = 1600 bits */
164 /* we require L be at least 3 (W=8, B=200) to avoid sub-byte lane size */
165
166 #define ROUNDS (12 + 2*L)
167
168 #define mask(x) (((unsigned) (x)) & (W-1))
169 #define rot(v,r) (v = v<<mask(r) | v>>mask(W-mask(r)))
170
171 static lane_t state[5][5];
172 /* physical storage order can be swapped as a performance tweak */
173 #define a(x,y) (state[x][y])
174
175 /* Galois linear feedback shift register, output in low bit:
176 * (x^t mod x^8 + x^6 + x^5 + x^4 + 1) mod x in GF(2)(x)
177 * (I'd forgotten my grade-school algebra but the Handbook of Applied
178 * Cryptography plus http://archive.is/hhvKKJ (nayuki.io) and some written
179 * exercises cleared this up for me.) */
180 static uint8_t lfsr_step(uint8_t rstate) {
181 /* polynomial xor'd in when high (overflowing) bit is set */
182 uint8_t gate = ((int8_t) rstate) >> 7;
183 return (rstate << 1) ^ (gate & 0x71); /* bits 6,5,4,0 */
184 }
185
186 /* Column parity diffusion */
187 static void theta(void) {
188 /* LUT for x+1 mod 5 (not used on secret bits!) */
189 uint8_t const inc_mod5[8] = {1,2,3,4,0,1,2,3};
190 unsigned x, y;
191 lane_t c[5], d;
192
193 for (x=0; x<5; ++x)
194 c[x] = a(x,0) ^ a(x,1) ^ a(x,2) ^ a(x,3) ^ a(x,4);
195
196 for (x=0; x<5; ++x) {
197 d = c[inc_mod5[x]];
198 rot(d,1);
199 d ^= c[inc_mod5[x+3]]; /* x+3+1 == x-1 mod 5 */
200 for (y=0; y<5; ++y)
201 a(x,y) ^= d;
202 }
203 }
204
205 /* Inter-slice dispersion */
206 static void rho(void) {
207 /* precomputed offsets from Table 2.1 */
208 rot(a(0,1), 36);
209 rot(a(0,2), 3);
210 rot(a(0,3), 105);
211 rot(a(0,4), 210);
212
213 rot(a(1,0), 1);
214 rot(a(1,1), 300);
215 rot(a(1,2), 10);
216 rot(a(1,3), 45);
217 rot(a(1,4), 66);
218
219 rot(a(2,0), 190);
220 rot(a(2,1), 6);
221 rot(a(2,2), 171);
222 rot(a(2,3), 15);
223 rot(a(2,4), 253);
224
225 rot(a(3,0), 28);
226 rot(a(3,1), 55);
227 rot(a(3,2), 153);
228 rot(a(3,3), 21);
229 rot(a(3,4), 120);
230
231 rot(a(4,0), 91);
232 rot(a(4,1), 276);
233 rot(a(4,2), 231);
234 rot(a(4,3), 136);
235 rot(a(4,4), 78);
236 }
237
238 /* Slicewise transposition by matrix
239 * [ 0 1 ]
240 * [ 2 3 ]
241 * For all indices x, y, z:
242 * a'(y, 2x+3y mod 5, z) = a(x, y, z)
243 */
244 static void pi(void) {
245 /* shuffle in-place by swapping alternating temporaries with successive
246 * destinations */
247 lane_t c, d;
248 #define even(x,y) (c=a(x,y), a(x,y)=d)
249 #define odd(x,y) (d=a(x,y), a(x,y)=c)
250 /* a[0][0] doesn't move */
251 d=a(0,1); /* 2*0 + 3*1 = 3 -> 3 */
252 even(1,3); /* 2+9=11 -> 1 */
253 odd (3,1); /* 6+3=9 -> 4 */
254 even(1,4); /* 2+12=14 */
255 odd (4,4); /* 8+12=20 */
256 even(4,0); /* 8+0=8 */
257 odd (0,3); /* 0+9=9 */
258 even(3,4); /* 6+12=18 */
259 odd (4,3); /* 8+9=17 */
260 even(3,2); /* 6+6=12 */
261 odd (2,2); /* 4+6=10 */
262 even(2,0); /* 4+0=4 */
263 odd (0,4); /* 0+12=12 */
264 even(4,2); /* 8+6=14 */
265 odd (2,4); /* 4+12=16 */
266 even(4,1); /* 8+3=11 */
267 odd (1,1); /* 2+3=5 */
268 even(1,0); /* 2+0=2 */
269 odd (0,2); /* 0+6=6 */
270 even(2,1); /* 4+3=7 */
271 odd (1,2); /* 2+6=8 */
272 even(2,3); /* 4+9=13 */
273 odd (3,3); /* 6+9=15 */
274 even(3,0); /* 6+0=6 */
275 a(0,1)=c; /* And we're back home in 24 steps, lovely! */
276 }
277
278 /* Nonlinear row mapping */
279 static void chi(void) {
280 unsigned y;
281 for (y=0; y<5; ++y) {
282 lane_t c0=a(0,y), c1=a(1,y), c2=a(2,y), c3=a(3,y), c4=a(4,y);
283
284 a(0,y) = c0 ^ (~c1 & c2);
285 a(1,y) = c1 ^ (~c2 & c3);
286 a(2,y) = c2 ^ (~c3 & c4);
287 a(3,y) = c3 ^ (~c4 & c0);
288 a(4,y) = c4 ^ (~c0 & c1);
289 }
290 }
291
292 /* Round constants for disrupting z symmetry
293 *
294 * Mix LFSR bitstream sequentially into first lane with z index given by
295 * 2^j - 1
296 * LFSR advances 7 steps per round regardless of how many "j" are needed to
297 * fill the lane, corresponding to the maximum 64-bit lane size.
298 */
299 static uint8_t iota(uint8_t rstate) {
300 unsigned j;
301 for (j=0; j<=L; ++j) {
302 a(0,0) ^= ((lane_t) (rstate & 1)) << ((1 << j) - 1);
303 rstate = lfsr_step(rstate);
304 }
305 for (; j<7; ++j)
306 rstate = lfsr_step(rstate);
307 return rstate;
308 }
309
310 #ifdef TEST
311
312 #include <stdio.h>
313 #include "testvectors.h"
314
315 static int test_fail;
316 static unsigned test, test_step;
317
318 static void checkpoint(unsigned round, char const *step) {
319 int fail = 0;
320 unsigned x, y, i=0;
321 printf("test=%u round=%u %s ", test, round, step);
322 for (y=0; y<5; ++y)
323 for (x=0; x<5; ++x, ++i) {
324 assert(i<25);
325 assert(test_step<TEST_STEPS);
326 if (a(x,y) != tests[test][test_step][i]) {
327 test_fail = fail = 1;
328 printf("FAIL x=%u y=%u (0x%lx != 0x%lx)\n",
329 x, y, a(x,y),
330 tests[test][test_step][i]);
331 }
332 }
333 if (!fail) puts("PASS");
334 ++test_step;
335 }
336 #else
337 #define checkpoint(r,s)
338 #endif
339
340 static void keccakf(void) {
341 unsigned round;
342 uint8_t rstate=1;
343 for (round=0; round<ROUNDS; ++round) {
344 theta(); checkpoint(round,"theta");
345 rho(); checkpoint(round,"rho");
346 pi(); checkpoint(round,"pi");
347 chi(); checkpoint(round,"chi");
348 rstate = iota(rstate); checkpoint(round,"iota");
349 }
350 }
351
352 static void reset(void) {
353 unsigned x, y;
354 for (x=0; x<5; ++x)
355 for (y=0; y<5; ++y)
356 a(x,y) = 0;
357 }
358
359 static void load(uint8_t const *block, unsigned len) {
360 /* Keeping bytes in lane little-endian, independent of machine. This
361 * harmonizes with (rumored) Keccak convention for interpretation of
362 * octet stream as bitstream. For a big-endian interpretation we could
363 * flip the byte load/store order and the direction of rotations.
364 *
365 * Ordering of lanes is x-major due to the specified mapping of
366 * bitstring to state coordinates: s[w(5y+x)+z] = a[x][y][z] */
367 unsigned x, y, z, i=0;
368 for (y=0; y<5; ++y)
369 for (x=0; x<5; ++x)
370 for (z=0; z!=W; z+=8, ++i)
371 if (i==len) return;
372 else a(x,y) ^= ((lane_t) block[i]) << z;
373 }
374
375 static void store_hex(char *buf, unsigned block_len) {
376 static char const hex[] = "0123456789abcdef";
377 unsigned x, y, z, len = 2*block_len, i = 0;
378 for (y=0; y<5; ++y)
379 for (x=0; x<5; ++x)
380 for (z=0; z!=W; z+=8) {
381 uint8_t byte;
382 if (i==len) return;
383 byte = a(x,y) >> z;
384 /* Digits within the byte rendered big-endian,
385 * matching common practice but unfortunately
386 * not bitstream order. */
387 buf[i++] = hex[byte>>4];
388 buf[i++] = hex[byte&15];
389 }
390 }
391
392 static void extract(unsigned len) {
393 static char buf[2*B/8];
394 assert(2*len <= sizeof buf);
395 store_hex(buf,len);
396 write_all(1,buf,2*len);
397 }
398
399 /* Stream octets from stdin until EOF, then write hex to stdout. */
400 void sponge(unsigned capacity, size_t out_bits) {
401 static uint8_t buf[B/8];
402 unsigned rate = B - capacity,
403 len = rate/8,
404 n;
405 size_t out_len = out_bits/8;
406 assert(W == 8*sizeof(lane_t));
407 assert(0 < rate && rate < B);
408 assert(!(rate%8));
409 assert(len <= sizeof buf);
410
411 /* absorb */
412 reset();
413 while ((n = read_all(0,buf,len)) == len) {
414 load(buf,len);
415 keccakf();
416 }
417
418 /* pad (1000...1) */
419 if (n < len-1) {
420 buf[n] = 0x01; /* first bit (little-endian) */
421 for (++n; n < len-1; ++n)
422 buf[n] = 0;
423 buf[n] = 0x80; /* last bit (little-endian) */
424 }
425 else {
426 assert(n == len-1);
427 buf[n] = 0x81; /* first and last bits */
428 }
429 load(buf,len);
430 keccakf();
431
432 /* squeeze */
433 for (; out_len > len; out_len -= len) {
434 extract(len);
435 keccakf();
436 }
437 extract(out_len);
438 }
439
440 #ifdef TEST
441 int main() {
442 for (test=0; test<NTESTS; ++test) {
443 unsigned x, y, i=0;
444 /* initial state */
445 for (y=0; y<5; ++y)
446 for (x=0; x<5; ++x, ++i) {
447 assert(i<25);
448 a(x,y) = tests[test][0][i];
449 }
450 test_step = 1;
451 keccakf();
452 }
453 return test_fail;
454 }
455 #endif