chacha.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. /* chacha.c
  2. *
  3. * Copyright (C) 2006-2023 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. /*
  22. DESCRIPTION
  23. This library contains implementation for the ChaCha20 stream cipher.
  24. Based from chacha-ref.c version 20080118
  25. D. J. Bernstein
  26. Public domain.
  27. */
  28. #ifdef WOLFSSL_ARMASM
  29. /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
  30. #else
  31. #ifdef HAVE_CONFIG_H
  32. #include <config.h>
  33. #endif
  34. #include <wolfssl/wolfcrypt/settings.h>
  35. #if defined(HAVE_CHACHA) && !defined(WOLFSSL_ARMASM)
  36. #include <wolfssl/wolfcrypt/chacha.h>
  37. #include <wolfssl/wolfcrypt/error-crypt.h>
  38. #include <wolfssl/wolfcrypt/logging.h>
  39. #include <wolfssl/wolfcrypt/cpuid.h>
  40. #ifdef NO_INLINE
  41. #include <wolfssl/wolfcrypt/misc.h>
  42. #else
  43. #define WOLFSSL_MISC_INCLUDED
  44. #include <wolfcrypt/src/misc.c>
  45. #endif
  46. #ifdef CHACHA_AEAD_TEST
  47. #include <stdio.h>
  48. #endif
  49. #ifdef USE_INTEL_CHACHA_SPEEDUP
  50. #include <emmintrin.h>
  51. #include <immintrin.h>
  52. #if defined(__GNUC__) && ((__GNUC__ < 4) || \
  53. (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
  54. #undef NO_AVX2_SUPPORT
  55. #define NO_AVX2_SUPPORT
  56. #endif
  57. #if defined(__clang__) && ((__clang_major__ < 3) || \
  58. (__clang_major__ == 3 && __clang_minor__ <= 5))
  59. #undef NO_AVX2_SUPPORT
  60. #define NO_AVX2_SUPPORT
  61. #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
  62. #undef NO_AVX2_SUPPORT
  63. #endif
  64. #ifndef NO_AVX2_SUPPORT
  65. #define HAVE_INTEL_AVX2
  66. #endif
  67. static int cpuidFlagsSet = 0;
  68. static word32 cpuidFlags = 0;
  69. #endif
  70. #ifdef BIG_ENDIAN_ORDER
  71. #define LITTLE32(x) ByteReverseWord32(x)
  72. #else
  73. #define LITTLE32(x) (x)
  74. #endif
  75. /* Number of rounds */
  76. #define ROUNDS 20
  77. #define U32C(v) (v##U)
  78. #define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF))
  79. #define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0])
  80. #define ROTATE(v,c) rotlFixed(v, c)
  81. #define XOR(v,w) ((v) ^ (w))
  82. #define PLUS(v,w) (U32V((v) + (w)))
  83. #define PLUSONE(v) (PLUS((v),1))
  84. #define QUARTERROUND(a,b,c,d) \
  85. x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
  86. x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
  87. x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
  88. x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
  89. /**
  90. * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
  91. * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
  92. */
  93. int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
  94. {
  95. word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
  96. if (ctx == NULL || inIv == NULL)
  97. return BAD_FUNC_ARG;
  98. XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
  99. ctx->left = 0; /* resets state */
  100. ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter; /* block counter */
  101. ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
  102. ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]); /* counter from nonce */
  103. ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]); /* counter from nonce */
  104. return 0;
  105. }
  106. /* "expand 32-byte k" as unsigned 32 byte */
  107. static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
  108. /* "expand 16-byte k" as unsigned 16 byte */
  109. static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
  110. /**
  111. * Key setup. 8 word iv (nonce)
  112. */
  113. int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
  114. {
  115. const word32* constants;
  116. const byte* k;
  117. #ifdef XSTREAM_ALIGN
  118. word32 alignKey[8];
  119. #endif
  120. if (ctx == NULL || key == NULL)
  121. return BAD_FUNC_ARG;
  122. if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ)
  123. return BAD_FUNC_ARG;
  124. #ifdef XSTREAM_ALIGN
  125. if ((wc_ptr_t)key % 4) {
  126. WOLFSSL_MSG("wc_ChachaSetKey unaligned key");
  127. XMEMCPY(alignKey, key, keySz);
  128. k = (byte*)alignKey;
  129. }
  130. else {
  131. k = key;
  132. }
  133. #else
  134. k = key;
  135. #endif /* XSTREAM_ALIGN */
  136. #ifdef CHACHA_AEAD_TEST
  137. word32 i;
  138. printf("ChaCha key used :\n");
  139. for (i = 0; i < keySz; i++) {
  140. printf("%02x", key[i]);
  141. if ((i + 1) % 8 == 0)
  142. printf("\n");
  143. }
  144. printf("\n\n");
  145. #endif
  146. ctx->X[4] = U8TO32_LITTLE(k + 0);
  147. ctx->X[5] = U8TO32_LITTLE(k + 4);
  148. ctx->X[6] = U8TO32_LITTLE(k + 8);
  149. ctx->X[7] = U8TO32_LITTLE(k + 12);
  150. if (keySz == CHACHA_MAX_KEY_SZ) {
  151. k += 16;
  152. constants = sigma;
  153. }
  154. else {
  155. constants = tau;
  156. }
  157. ctx->X[ 8] = U8TO32_LITTLE(k + 0);
  158. ctx->X[ 9] = U8TO32_LITTLE(k + 4);
  159. ctx->X[10] = U8TO32_LITTLE(k + 8);
  160. ctx->X[11] = U8TO32_LITTLE(k + 12);
  161. ctx->X[ 0] = constants[0];
  162. ctx->X[ 1] = constants[1];
  163. ctx->X[ 2] = constants[2];
  164. ctx->X[ 3] = constants[3];
  165. ctx->left = 0; /* resets state */
  166. return 0;
  167. }
  168. /**
  169. * Converts word into bytes with rotations having been done.
  170. */
  171. static WC_INLINE void wc_Chacha_wordtobyte(word32 x[CHACHA_CHUNK_WORDS],
  172. word32 state[CHACHA_CHUNK_WORDS])
  173. {
  174. word32 i;
  175. XMEMCPY(x, state, CHACHA_CHUNK_BYTES);
  176. for (i = (ROUNDS); i > 0; i -= 2) {
  177. QUARTERROUND(0, 4, 8, 12)
  178. QUARTERROUND(1, 5, 9, 13)
  179. QUARTERROUND(2, 6, 10, 14)
  180. QUARTERROUND(3, 7, 11, 15)
  181. QUARTERROUND(0, 5, 10, 15)
  182. QUARTERROUND(1, 6, 11, 12)
  183. QUARTERROUND(2, 7, 8, 13)
  184. QUARTERROUND(3, 4, 9, 14)
  185. }
  186. for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
  187. x[i] = PLUS(x[i], state[i]);
  188. #ifdef BIG_ENDIAN_ORDER
  189. x[i] = LITTLE32(x[i]);
  190. #endif
  191. }
  192. }
  193. #ifdef HAVE_XCHACHA
  194. /*
  195. * wc_HChacha_block - half a ChaCha block, for XChaCha
  196. *
  197. * see https://tools.ietf.org/html/draft-arciszewski-xchacha-03
  198. */
  199. static WC_INLINE void wc_HChacha_block(ChaCha* ctx, word32 stream[CHACHA_CHUNK_WORDS/2], word32 nrounds)
  200. {
  201. word32 x[CHACHA_CHUNK_WORDS];
  202. word32 i;
  203. for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
  204. x[i] = ctx->X[i];
  205. }
  206. for (i = nrounds; i > 0; i -= 2) {
  207. QUARTERROUND(0, 4, 8, 12)
  208. QUARTERROUND(1, 5, 9, 13)
  209. QUARTERROUND(2, 6, 10, 14)
  210. QUARTERROUND(3, 7, 11, 15)
  211. QUARTERROUND(0, 5, 10, 15)
  212. QUARTERROUND(1, 6, 11, 12)
  213. QUARTERROUND(2, 7, 8, 13)
  214. QUARTERROUND(3, 4, 9, 14)
  215. }
  216. for (i = 0; i < CHACHA_CHUNK_WORDS/4; ++i)
  217. stream[i] = x[i];
  218. for (i = CHACHA_CHUNK_WORDS/4; i < CHACHA_CHUNK_WORDS/2; ++i)
  219. stream[i] = x[i + CHACHA_CHUNK_WORDS/2];
  220. }
  221. /* XChaCha -- https://tools.ietf.org/html/draft-arciszewski-xchacha-03 */
  222. int wc_XChacha_SetKey(ChaCha *ctx,
  223. const byte *key, word32 keySz,
  224. const byte *nonce, word32 nonceSz,
  225. word32 counter) {
  226. word32 k[CHACHA_MAX_KEY_SZ];
  227. byte iv[CHACHA_IV_BYTES];
  228. int ret;
  229. if (nonceSz != XCHACHA_NONCE_BYTES)
  230. return BAD_FUNC_ARG;
  231. if ((ret = wc_Chacha_SetKey(ctx, key, keySz)) < 0)
  232. return ret;
  233. /* form a first chacha IV from the first 16 bytes of the nonce.
  234. * the first word is supplied in the "counter" arg, and
  235. * the result is a full 128 bit nonceful IV for the one-time block
  236. * crypto op that follows.
  237. */
  238. if ((ret = wc_Chacha_SetIV(ctx, nonce + 4, U8TO32_LITTLE(nonce))) < 0)
  239. return ret;
  240. wc_HChacha_block(ctx, k, 20); /* 20 rounds, but keeping half the output. */
  241. /* the HChacha output is used as a 256 bit key for the main cipher. */
  242. XMEMCPY(&ctx->X[4], k, 8 * sizeof(word32));
  243. /* use 8 bytes from the end of the 24 byte nonce, padded up to 12 bytes,
  244. * to form the IV for the main cipher.
  245. */
  246. XMEMSET(iv, 0, 4);
  247. XMEMCPY(iv + 4, nonce + 16, 8);
  248. if ((ret = wc_Chacha_SetIV(ctx, iv, counter)) < 0)
  249. return ret;
  250. ForceZero(k, sizeof k);
  251. ForceZero(iv, sizeof iv);
  252. return 0;
  253. }
  254. #endif /* HAVE_XCHACHA */
  255. #ifdef __cplusplus
  256. extern "C" {
  257. #endif
  258. extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c,
  259. word32 bytes);
  260. extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c,
  261. word32 bytes);
  262. extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
  263. word32 bytes);
  264. #ifdef __cplusplus
  265. } /* extern "C" */
  266. #endif
  267. /**
  268. * Encrypt a stream of bytes
  269. */
  270. static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
  271. word32 bytes)
  272. {
  273. union {
  274. byte state[CHACHA_CHUNK_BYTES];
  275. word32 state32[CHACHA_CHUNK_WORDS];
  276. wolfssl_word align_word; /* align for xorbufout */
  277. } tmp;
  278. /* handle left overs */
  279. if (bytes > 0 && ctx->left > 0) {
  280. word32 processed = min(bytes, ctx->left);
  281. wc_Chacha_wordtobyte(tmp.state32, ctx->X); /* recreate the stream */
  282. xorbufout(c, m, tmp.state + CHACHA_CHUNK_BYTES - ctx->left, processed);
  283. ctx->left -= processed;
  284. /* Used up all of the stream that was left, increment the counter */
  285. if (ctx->left == 0) {
  286. ctx->X[CHACHA_MATRIX_CNT_IV] =
  287. PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
  288. }
  289. bytes -= processed;
  290. c += processed;
  291. m += processed;
  292. }
  293. while (bytes >= CHACHA_CHUNK_BYTES) {
  294. wc_Chacha_wordtobyte(tmp.state32, ctx->X);
  295. ctx->X[CHACHA_MATRIX_CNT_IV] = PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
  296. xorbufout(c, m, tmp.state, CHACHA_CHUNK_BYTES);
  297. bytes -= CHACHA_CHUNK_BYTES;
  298. c += CHACHA_CHUNK_BYTES;
  299. m += CHACHA_CHUNK_BYTES;
  300. }
  301. if (bytes) {
  302. /* in this case there will always be some left over since bytes is less
  303. * than CHACHA_CHUNK_BYTES, so do not increment counter after getting
  304. * stream in order for the stream to be recreated on next call */
  305. wc_Chacha_wordtobyte(tmp.state32, ctx->X);
  306. xorbufout(c, m, tmp.state, bytes);
  307. ctx->left = CHACHA_CHUNK_BYTES - bytes;
  308. }
  309. }
  310. /**
  311. * API to encrypt/decrypt a message of any size.
  312. */
  313. int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
  314. word32 msglen)
  315. {
  316. if (ctx == NULL || input == NULL || output == NULL)
  317. return BAD_FUNC_ARG;
  318. #ifdef USE_INTEL_CHACHA_SPEEDUP
  319. /* handle left overs */
  320. if (msglen > 0 && ctx->left > 0) {
  321. byte* out;
  322. word32 processed = min(msglen, ctx->left);
  323. out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
  324. xorbufout(output, input, out, processed);
  325. ctx->left -= processed;
  326. msglen -= processed;
  327. output += processed;
  328. input += processed;
  329. }
  330. if (msglen == 0) {
  331. return 0;
  332. }
  333. if (!cpuidFlagsSet) {
  334. cpuidFlags = cpuid_get_flags();
  335. cpuidFlagsSet = 1;
  336. }
  337. #ifdef HAVE_INTEL_AVX2
  338. if (IS_INTEL_AVX2(cpuidFlags)) {
  339. SAVE_VECTOR_REGISTERS(return _svr_ret;);
  340. chacha_encrypt_avx2(ctx, input, output, msglen);
  341. RESTORE_VECTOR_REGISTERS();
  342. return 0;
  343. }
  344. #endif
  345. if (IS_INTEL_AVX1(cpuidFlags)) {
  346. SAVE_VECTOR_REGISTERS(return _svr_ret;);
  347. chacha_encrypt_avx1(ctx, input, output, msglen);
  348. RESTORE_VECTOR_REGISTERS();
  349. return 0;
  350. }
  351. else {
  352. chacha_encrypt_x64(ctx, input, output, msglen);
  353. return 0;
  354. }
  355. #endif
  356. wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
  357. return 0;
  358. }
  359. void wc_Chacha_purge_current_block(ChaCha* ctx) {
  360. if (ctx->left > 0) {
  361. byte scratch[CHACHA_CHUNK_BYTES];
  362. XMEMSET(scratch, 0, sizeof(scratch));
  363. (void)wc_Chacha_Process(ctx, scratch, scratch, CHACHA_CHUNK_BYTES - ctx->left);
  364. }
  365. }
  366. #endif /* HAVE_CHACHA*/
  367. #endif /* WOLFSSL_ARMASM */