2 * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
4 * Copyright (C) 2015 Martin Willi
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
12 #include <crypto/algapi.h>
13 #include <crypto/chacha20.h>
14 #include <crypto/internal/skcipher.h>
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17 #include <asm/fpu/api.h>
20 #define CHACHA20_STATE_ALIGN 16
22 asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
24 asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
26 asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
27 static bool chacha20_use_avx2;
30 static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
34 if (chacha20_use_avx2) {
35 while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
36 chacha20_8block_xor_avx2(state, dst, src);
37 bytes -= CHACHA20_BLOCK_SIZE * 8;
38 src += CHACHA20_BLOCK_SIZE * 8;
39 dst += CHACHA20_BLOCK_SIZE * 8;
44 while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
45 chacha20_4block_xor_ssse3(state, dst, src);
46 bytes -= CHACHA20_BLOCK_SIZE * 4;
47 src += CHACHA20_BLOCK_SIZE * 4;
48 dst += CHACHA20_BLOCK_SIZE * 4;
51 while (bytes >= CHACHA20_BLOCK_SIZE) {
52 chacha20_block_xor_ssse3(state, dst, src, bytes);
53 bytes -= CHACHA20_BLOCK_SIZE;
54 src += CHACHA20_BLOCK_SIZE;
55 dst += CHACHA20_BLOCK_SIZE;
59 chacha20_block_xor_ssse3(state, dst, src, bytes);
63 static int chacha20_simd(struct skcipher_request *req)
65 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
66 struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
67 u32 *state, state_buf[16 + 2] __aligned(8);
68 struct skcipher_walk walk;
71 BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
72 state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
74 if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
75 return crypto_chacha20_crypt(req);
77 err = skcipher_walk_virt(&walk, req, true);
79 crypto_chacha20_init(state, ctx, walk.iv);
83 while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
84 chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
85 rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
86 err = skcipher_walk_done(&walk,
87 walk.nbytes % CHACHA20_BLOCK_SIZE);
91 chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
93 err = skcipher_walk_done(&walk, 0);
101 static struct skcipher_alg alg = {
102 .base.cra_name = "chacha20",
103 .base.cra_driver_name = "chacha20-simd",
104 .base.cra_priority = 300,
105 .base.cra_blocksize = 1,
106 .base.cra_ctxsize = sizeof(struct chacha20_ctx),
107 .base.cra_module = THIS_MODULE,
109 .min_keysize = CHACHA20_KEY_SIZE,
110 .max_keysize = CHACHA20_KEY_SIZE,
111 .ivsize = CHACHA20_IV_SIZE,
112 .chunksize = CHACHA20_BLOCK_SIZE,
113 .setkey = crypto_chacha20_setkey,
114 .encrypt = chacha20_simd,
115 .decrypt = chacha20_simd,
118 static int __init chacha20_simd_mod_init(void)
120 if (!boot_cpu_has(X86_FEATURE_SSSE3))
123 #ifdef CONFIG_AS_AVX2
124 chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
125 boot_cpu_has(X86_FEATURE_AVX2) &&
126 cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
128 return crypto_register_skcipher(&alg);
131 static void __exit chacha20_simd_mod_fini(void)
133 crypto_unregister_skcipher(&alg);
136 module_init(chacha20_simd_mod_init);
137 module_exit(chacha20_simd_mod_fini);
139 MODULE_LICENSE("GPL");
140 MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
141 MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
142 MODULE_ALIAS_CRYPTO("chacha20");
143 MODULE_ALIAS_CRYPTO("chacha20-simd");