2 * ChaCha20-Poly1305 Implementation for SSH-2
5 * http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/ssh/PROTOCOL.chacha20poly1305?rev=1.2&content-type=text/x-cvsweb-markup
8 * http://cr.yp.to/chacha/chacha-20080128.pdf
11 * http://cr.yp.to/snuffle/spec.pdf
14 * http://cr.yp.to/mac/poly1305-20050329.pdf
16 * The nonce for the Poly1305 is the second part of the key output
17 * from the first round of ChaCha20. This removes the AES requirement.
18 * This is undocumented!
20 * This has an intricate link between the cipher and the MAC. The
21 * keying of both is done in by the cipher and setting of the IV is
22 * done by the MAC. One cannot operate without the other. The
23 * configuration of the ssh2_cipher structure ensures that the MAC is
24 * set (and others ignored) if this cipher is chosen.
26 * This cipher also encrypts the length using a different
27 * instantiation of the cipher using a different key and IV made from
28 * the sequence number which is passed in addition when calling
29 * encrypt/decrypt on it.
38 /* ChaCha20 implementation, only supporting 256-bit keys */
40 /* State for each ChaCha20 instance */
42 /* Current context, usually with the count incremented
43 * 0-3 are the static constant
45 * 12-13 are the counter
48 /* The output of the state above ready to xor */
49 unsigned char current[64];
50 /* The index of the above currently used to allow a true streaming cipher */
54 static INLINE void chacha20_round(struct chacha20 *ctx)
60 memcpy(copy, ctx->state, sizeof(copy));
62 /* A circular rotation for a 32bit number */
63 #define rotl(x, shift) x = ((x << shift) | (x >> (32 - shift)))
65 /* What to do for each quarter round operation */
66 #define qrop(a, b, c, d) \
72 #define quarter(a, b, c, d) \
78 /* Do 20 rounds, in pairs because every other is different */
79 for (i = 0; i < 20; i += 2) {
83 quarter(2, 6, 10, 14);
84 quarter(3, 7, 11, 15);
85 /* Another slightly different round */
86 quarter(0, 5, 10, 15);
87 quarter(1, 6, 11, 12);
92 /* Dump the macros, don't need them littering */
97 /* Add the initial state */
98 for (i = 0; i < 16; ++i) {
99 copy[i] += ctx->state[i];
102 /* Update the content of the xor buffer */
103 for (i = 0; i < 16; ++i) {
104 ctx->current[i * 4 + 0] = copy[i] >> 0;
105 ctx->current[i * 4 + 1] = copy[i] >> 8;
106 ctx->current[i * 4 + 2] = copy[i] >> 16;
107 ctx->current[i * 4 + 3] = copy[i] >> 24;
109 /* State full, reset pointer to beginning */
110 ctx->currentIndex = 0;
111 smemclr(copy, sizeof(copy));
113 /* Increment round counter */
115 /* Check for overflow, not done in one line so the 32 bits are chopped by the type */
116 if (!(uint32)(ctx->state[12])) {
121 /* Initialise context with 256bit key */
122 static void chacha20_key(struct chacha20 *ctx, const unsigned char *key)
124 static const char constant[16] = "expand 32-byte k";
126 /* Add the fixed string to the start of the state */
127 ctx->state[0] = GET_32BIT_LSB_FIRST(constant + 0);
128 ctx->state[1] = GET_32BIT_LSB_FIRST(constant + 4);
129 ctx->state[2] = GET_32BIT_LSB_FIRST(constant + 8);
130 ctx->state[3] = GET_32BIT_LSB_FIRST(constant + 12);
133 ctx->state[4] = GET_32BIT_LSB_FIRST(key + 0);
134 ctx->state[5] = GET_32BIT_LSB_FIRST(key + 4);
135 ctx->state[6] = GET_32BIT_LSB_FIRST(key + 8);
136 ctx->state[7] = GET_32BIT_LSB_FIRST(key + 12);
137 ctx->state[8] = GET_32BIT_LSB_FIRST(key + 16);
138 ctx->state[9] = GET_32BIT_LSB_FIRST(key + 20);
139 ctx->state[10] = GET_32BIT_LSB_FIRST(key + 24);
140 ctx->state[11] = GET_32BIT_LSB_FIRST(key + 28);
142 /* New key, dump context */
143 ctx->currentIndex = 64;
146 static void chacha20_iv(struct chacha20 *ctx, const unsigned char *iv)
150 ctx->state[14] = GET_32BIT_MSB_FIRST(iv);
151 ctx->state[15] = GET_32BIT_MSB_FIRST(iv + 4);
153 /* New IV, dump context */
154 ctx->currentIndex = 64;
157 static void chacha20_encrypt(struct chacha20 *ctx, unsigned char *blk, int len)
160 /* If we don't have any state left, then cycle to the next */
161 if (ctx->currentIndex >= 64) {
165 /* Do the xor while there's some state left and some plaintext left */
166 while (ctx->currentIndex < 64 && len) {
167 *blk++ ^= ctx->current[ctx->currentIndex++];
173 /* Decrypt is encrypt... It's xor against a PRNG... */
174 static INLINE void chacha20_decrypt(struct chacha20 *ctx,
175 unsigned char *blk, int len)
177 chacha20_encrypt(ctx, blk, len);
180 /* Poly1305 implementation (no AES, nonce is not encrypted) */
183 unsigned char nonce[16];
188 /* Buffer in case we get less that a multiple of 16 bytes */
189 unsigned char buffer[16];
193 static void poly1305_make(struct poly1305 *ctx)
195 static const unsigned char p[] = {
197 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
198 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfb
201 ctx->modulo = bignum_from_bytes(p, sizeof(p));
204 memset(ctx->nonce, 0, 16);
205 ctx->bufferIndex = 0;
208 static void poly1305_free(struct poly1305 *ctx)
219 smemclr(ctx, sizeof(struct poly1305));
222 /* Takes a 256 bit key */
223 static void poly1305_key(struct poly1305 *ctx, const unsigned char *key)
225 unsigned char key_copy[16];
226 memcpy(key_copy, key, 16);
228 /* Key the MAC itself
229 * bytes 4, 8, 12 and 16 are required to have their top four bits clear */
232 key_copy[11] &= 0x0f;
233 key_copy[15] &= 0x0f;
234 /* bytes 5, 9 and 13 are required to have their bottom two bits clear */
237 key_copy[12] &= 0xfc;
241 ctx->r = bignum_from_bytes_le(key_copy, 16);
242 smemclr(key_copy, sizeof(key_copy));
244 /* Use second 128 bits are the nonce */
245 memcpy(ctx->nonce, key+16, 16);
248 /* Feed up to 16 bytes (should only be less for the last chunk) */
249 static void poly1305_feed_chunk(struct poly1305 *ctx,
250 const unsigned char *chunk, int len)
253 Bignum c = bignum_from_bytes_le(chunk, len);
254 tmp = bignum_lshift(One, 8 * len);
255 tmp2 = bigadd(c, tmp);
259 tmp = bigadd(ctx->h, tmp2);
265 ctx->h = modmul(tmp, ctx->r, ctx->modulo);
269 static void poly1305_feed(struct poly1305 *ctx,
270 const unsigned char *buf, int len)
272 /* Check for stuff left in the buffer from last time */
273 if (ctx->bufferIndex) {
274 /* Try to fill up to 16 */
275 while (ctx->bufferIndex < 16 && len) {
276 ctx->buffer[ctx->bufferIndex++] = *buf++;
279 if (ctx->bufferIndex == 16) {
280 poly1305_feed_chunk(ctx, ctx->buffer, 16);
281 ctx->bufferIndex = 0;
285 /* Process 16 byte whole chunks */
287 poly1305_feed_chunk(ctx, buf, 16);
292 /* Cache stuff that's left over */
294 memcpy(ctx->buffer, buf, len);
295 ctx->bufferIndex = len;
299 /* Finalise and populate buffer with 16 byte with MAC */
300 static void poly1305_finalise(struct poly1305 *ctx, unsigned char *mac)
305 if (ctx->bufferIndex) {
306 poly1305_feed_chunk(ctx, ctx->buffer, ctx->bufferIndex);
309 tmp = bignum_from_bytes_le(ctx->nonce, 16);
311 tmp2 = bigadd(ctx->h, tmp);
313 for (i = 0; i < 16; ++i) {
314 mac[i] = bignum_byte(tmp2, i);
322 struct chacha20 a_cipher; /* Used for length */
323 struct chacha20 b_cipher; /* Used for content */
325 /* Cache of the first 4 bytes because they are the sequence number */
326 /* Kept in 8 bytes with the top as zero to allow easy passing to setiv */
327 int mac_initialised; /* Where we have got to in filling mac_iv */
328 unsigned char mac_iv[8];
333 static void *poly_make_context(void *ctx)
338 static void poly_free_context(void *ctx)
340 /* Not allocated, just forwarded, no need to free */
343 static void poly_setkey(void *ctx, unsigned char *key)
345 /* Uses the same context as ChaCha20, so ignore */
348 static void poly_start(void *handle)
350 struct ccp_context *ctx = (struct ccp_context *)handle;
352 ctx->mac_initialised = 0;
353 memset(ctx->mac_iv, 0, 8);
354 poly1305_free(&ctx->mac);
355 poly1305_make(&ctx->mac);
358 static void poly_bytes(void *handle, unsigned char const *blk, int len)
360 struct ccp_context *ctx = (struct ccp_context *)handle;
362 /* First 4 bytes are the IV */
363 while (ctx->mac_initialised < 4 && len) {
364 ctx->mac_iv[7 - ctx->mac_initialised] = *blk++;
365 ++ctx->mac_initialised;
369 /* Initialise the IV if needed */
370 if (ctx->mac_initialised == 4) {
371 chacha20_iv(&ctx->b_cipher, ctx->mac_iv);
372 ++ctx->mac_initialised; /* Don't do it again */
374 /* Do first rotation */
375 chacha20_round(&ctx->b_cipher);
377 /* Set the poly key */
378 poly1305_key(&ctx->mac, ctx->b_cipher.current);
380 /* Set the first round as used */
381 ctx->b_cipher.currentIndex = 64;
384 /* Update the MAC with anything left */
386 poly1305_feed(&ctx->mac, blk, len);
390 static void poly_genresult(void *handle, unsigned char *blk)
392 struct ccp_context *ctx = (struct ccp_context *)handle;
393 poly1305_finalise(&ctx->mac, blk);
396 static int poly_verresult(void *handle, unsigned char const *blk)
398 struct ccp_context *ctx = (struct ccp_context *)handle;
400 unsigned char mac[16];
401 poly1305_finalise(&ctx->mac, mac);
402 res = smemeq(blk, mac, 16);
406 /* The generic poly operation used before generate and verify */
407 static void poly_op(void *handle, unsigned char *blk, int len, unsigned long seq)
411 PUT_32BIT_MSB_FIRST(iv, seq);
412 /* poly_bytes expects the first 4 bytes to be the IV */
413 poly_bytes(handle, iv, 4);
414 smemclr(iv, sizeof(iv));
415 poly_bytes(handle, blk, len);
418 static void poly_generate(void *handle, unsigned char *blk, int len, unsigned long seq)
420 poly_op(handle, blk, len, seq);
421 poly_genresult(handle, blk+len);
424 static int poly_verify(void *handle, unsigned char *blk, int len, unsigned long seq)
426 poly_op(handle, blk, len, seq);
427 return poly_verresult(handle, blk+len);
430 static const struct ssh_mac ssh2_poly1305 = {
431 poly_make_context, poly_free_context,
434 /* whole-packet operations */
435 poly_generate, poly_verify,
437 /* partial-packet operations */
438 poly_start, poly_bytes, poly_genresult, poly_verresult,
440 "", "", /* Not selectable individually, just part of ChaCha20-Poly1305 */
444 static void *ccp_make_context(void)
446 struct ccp_context *ctx = snew(struct ccp_context);
448 poly1305_make(&ctx->mac);
453 static void ccp_free_context(void *vctx)
455 struct ccp_context *ctx = (struct ccp_context *)vctx;
456 smemclr(&ctx->a_cipher, sizeof(ctx->a_cipher));
457 smemclr(&ctx->b_cipher, sizeof(ctx->b_cipher));
458 poly1305_free(&ctx->mac);
462 static void ccp_iv(void *vctx, unsigned char *iv)
464 /* struct ccp_context *ctx = (struct ccp_context *)vctx; */
465 /* IV is set based on the sequence number */
468 static void ccp_key(void *vctx, unsigned char *key)
470 struct ccp_context *ctx = (struct ccp_context *)vctx;
471 /* Initialise the a_cipher (for decrypting lengths) with the first 256 bits */
472 chacha20_key(&ctx->a_cipher, key + 32);
473 /* Initialise the b_cipher (for content and MAC) with the second 256 bits */
474 chacha20_key(&ctx->b_cipher, key);
477 static void ccp_encrypt(void *vctx, unsigned char *blk, int len)
479 struct ccp_context *ctx = (struct ccp_context *)vctx;
480 chacha20_encrypt(&ctx->b_cipher, blk, len);
483 static void ccp_decrypt(void *vctx, unsigned char *blk, int len)
485 struct ccp_context *ctx = (struct ccp_context *)vctx;
486 chacha20_decrypt(&ctx->b_cipher, blk, len);
489 static void ccp_length_op(struct ccp_context *ctx, unsigned char *blk, int len,
493 PUT_32BIT_LSB_FIRST(iv, seq >> 32);
494 PUT_32BIT_LSB_FIRST(iv + 4, seq);
495 chacha20_iv(&ctx->a_cipher, iv);
496 chacha20_iv(&ctx->b_cipher, iv);
497 /* Reset content block count to 1, as the first is the key for Poly1305 */
498 ++ctx->b_cipher.state[12];
499 smemclr(iv, sizeof(iv));
502 static void ccp_encrypt_length(void *vctx, unsigned char *blk, int len,
505 struct ccp_context *ctx = (struct ccp_context *)vctx;
506 ccp_length_op(ctx, blk, len, seq);
507 chacha20_encrypt(&ctx->a_cipher, blk, len);
510 static void ccp_decrypt_length(void *vctx, unsigned char *blk, int len,
513 struct ccp_context *ctx = (struct ccp_context *)vctx;
514 ccp_length_op(ctx, blk, len, seq);
515 chacha20_decrypt(&ctx->a_cipher, blk, len);
518 static const struct ssh2_cipher ssh2_chacha20_poly1305 = {
529 "chacha20-poly1305@openssh.com",
530 1, 512, SSH_CIPHER_SEPARATE_LENGTH, "ChaCha20",
535 static const struct ssh2_cipher *const ccp_list[] = {
536 &ssh2_chacha20_poly1305
539 const struct ssh2_ciphers ssh2_ccp = {
540 sizeof(ccp_list) / sizeof(*ccp_list),