ecc.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668
  1. /*
  2. * Copyright (c) 2013, 2014 Kenneth MacKay. All rights reserved.
  3. * Copyright (c) 2019 Vitaly Chikunov <[email protected]>
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are
  7. * met:
  8. * * Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * * Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <crypto/ecc_curve.h>
  27. #include <linux/module.h>
  28. #include <linux/random.h>
  29. #include <linux/slab.h>
  30. #include <linux/swab.h>
  31. #include <linux/fips.h>
  32. #include <crypto/ecdh.h>
  33. #include <crypto/rng.h>
  34. #include <crypto/internal/ecc.h>
  35. #include <asm/unaligned.h>
  36. #include <linux/ratelimit.h>
  37. #include "ecc_curve_defs.h"
  38. typedef struct {
  39. u64 m_low;
  40. u64 m_high;
  41. } uint128_t;
  42. /* Returns curv25519 curve param */
  43. const struct ecc_curve *ecc_get_curve25519(void)
  44. {
  45. return &ecc_25519;
  46. }
  47. EXPORT_SYMBOL(ecc_get_curve25519);
  48. const struct ecc_curve *ecc_get_curve(unsigned int curve_id)
  49. {
  50. switch (curve_id) {
  51. /* In FIPS mode only allow P256 and higher */
  52. case ECC_CURVE_NIST_P192:
  53. return fips_enabled ? NULL : &nist_p192;
  54. case ECC_CURVE_NIST_P256:
  55. return &nist_p256;
  56. case ECC_CURVE_NIST_P384:
  57. return &nist_p384;
  58. default:
  59. return NULL;
  60. }
  61. }
  62. EXPORT_SYMBOL(ecc_get_curve);
  63. static u64 *ecc_alloc_digits_space(unsigned int ndigits)
  64. {
  65. size_t len = ndigits * sizeof(u64);
  66. if (!len)
  67. return NULL;
  68. return kmalloc(len, GFP_KERNEL);
  69. }
  70. static void ecc_free_digits_space(u64 *space)
  71. {
  72. kfree_sensitive(space);
  73. }
  74. struct ecc_point *ecc_alloc_point(unsigned int ndigits)
  75. {
  76. struct ecc_point *p = kmalloc(sizeof(*p), GFP_KERNEL);
  77. if (!p)
  78. return NULL;
  79. p->x = ecc_alloc_digits_space(ndigits);
  80. if (!p->x)
  81. goto err_alloc_x;
  82. p->y = ecc_alloc_digits_space(ndigits);
  83. if (!p->y)
  84. goto err_alloc_y;
  85. p->ndigits = ndigits;
  86. return p;
  87. err_alloc_y:
  88. ecc_free_digits_space(p->x);
  89. err_alloc_x:
  90. kfree(p);
  91. return NULL;
  92. }
  93. EXPORT_SYMBOL(ecc_alloc_point);
  94. void ecc_free_point(struct ecc_point *p)
  95. {
  96. if (!p)
  97. return;
  98. kfree_sensitive(p->x);
  99. kfree_sensitive(p->y);
  100. kfree_sensitive(p);
  101. }
  102. EXPORT_SYMBOL(ecc_free_point);
  103. static void vli_clear(u64 *vli, unsigned int ndigits)
  104. {
  105. int i;
  106. for (i = 0; i < ndigits; i++)
  107. vli[i] = 0;
  108. }
  109. /* Returns true if vli == 0, false otherwise. */
  110. bool vli_is_zero(const u64 *vli, unsigned int ndigits)
  111. {
  112. int i;
  113. for (i = 0; i < ndigits; i++) {
  114. if (vli[i])
  115. return false;
  116. }
  117. return true;
  118. }
  119. EXPORT_SYMBOL(vli_is_zero);
  120. /* Returns nonzero if bit of vli is set. */
  121. static u64 vli_test_bit(const u64 *vli, unsigned int bit)
  122. {
  123. return (vli[bit / 64] & ((u64)1 << (bit % 64)));
  124. }
  125. static bool vli_is_negative(const u64 *vli, unsigned int ndigits)
  126. {
  127. return vli_test_bit(vli, ndigits * 64 - 1);
  128. }
  129. /* Counts the number of 64-bit "digits" in vli. */
  130. static unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits)
  131. {
  132. int i;
  133. /* Search from the end until we find a non-zero digit.
  134. * We do it in reverse because we expect that most digits will
  135. * be nonzero.
  136. */
  137. for (i = ndigits - 1; i >= 0 && vli[i] == 0; i--);
  138. return (i + 1);
  139. }
  140. /* Counts the number of bits required for vli. */
  141. unsigned int vli_num_bits(const u64 *vli, unsigned int ndigits)
  142. {
  143. unsigned int i, num_digits;
  144. u64 digit;
  145. num_digits = vli_num_digits(vli, ndigits);
  146. if (num_digits == 0)
  147. return 0;
  148. digit = vli[num_digits - 1];
  149. for (i = 0; digit; i++)
  150. digit >>= 1;
  151. return ((num_digits - 1) * 64 + i);
  152. }
  153. EXPORT_SYMBOL(vli_num_bits);
  154. /* Set dest from unaligned bit string src. */
  155. void vli_from_be64(u64 *dest, const void *src, unsigned int ndigits)
  156. {
  157. int i;
  158. const u64 *from = src;
  159. for (i = 0; i < ndigits; i++)
  160. dest[i] = get_unaligned_be64(&from[ndigits - 1 - i]);
  161. }
  162. EXPORT_SYMBOL(vli_from_be64);
  163. void vli_from_le64(u64 *dest, const void *src, unsigned int ndigits)
  164. {
  165. int i;
  166. const u64 *from = src;
  167. for (i = 0; i < ndigits; i++)
  168. dest[i] = get_unaligned_le64(&from[i]);
  169. }
  170. EXPORT_SYMBOL(vli_from_le64);
  171. /* Sets dest = src. */
  172. static void vli_set(u64 *dest, const u64 *src, unsigned int ndigits)
  173. {
  174. int i;
  175. for (i = 0; i < ndigits; i++)
  176. dest[i] = src[i];
  177. }
  178. /* Returns sign of left - right. */
  179. int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits)
  180. {
  181. int i;
  182. for (i = ndigits - 1; i >= 0; i--) {
  183. if (left[i] > right[i])
  184. return 1;
  185. else if (left[i] < right[i])
  186. return -1;
  187. }
  188. return 0;
  189. }
  190. EXPORT_SYMBOL(vli_cmp);
  191. /* Computes result = in << c, returning carry. Can modify in place
  192. * (if result == in). 0 < shift < 64.
  193. */
  194. static u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift,
  195. unsigned int ndigits)
  196. {
  197. u64 carry = 0;
  198. int i;
  199. for (i = 0; i < ndigits; i++) {
  200. u64 temp = in[i];
  201. result[i] = (temp << shift) | carry;
  202. carry = temp >> (64 - shift);
  203. }
  204. return carry;
  205. }
  206. /* Computes vli = vli >> 1. */
  207. static void vli_rshift1(u64 *vli, unsigned int ndigits)
  208. {
  209. u64 *end = vli;
  210. u64 carry = 0;
  211. vli += ndigits;
  212. while (vli-- > end) {
  213. u64 temp = *vli;
  214. *vli = (temp >> 1) | carry;
  215. carry = temp << 63;
  216. }
  217. }
  218. /* Computes result = left + right, returning carry. Can modify in place. */
  219. static u64 vli_add(u64 *result, const u64 *left, const u64 *right,
  220. unsigned int ndigits)
  221. {
  222. u64 carry = 0;
  223. int i;
  224. for (i = 0; i < ndigits; i++) {
  225. u64 sum;
  226. sum = left[i] + right[i] + carry;
  227. if (sum != left[i])
  228. carry = (sum < left[i]);
  229. result[i] = sum;
  230. }
  231. return carry;
  232. }
  233. /* Computes result = left + right, returning carry. Can modify in place. */
  234. static u64 vli_uadd(u64 *result, const u64 *left, u64 right,
  235. unsigned int ndigits)
  236. {
  237. u64 carry = right;
  238. int i;
  239. for (i = 0; i < ndigits; i++) {
  240. u64 sum;
  241. sum = left[i] + carry;
  242. if (sum != left[i])
  243. carry = (sum < left[i]);
  244. else
  245. carry = !!carry;
  246. result[i] = sum;
  247. }
  248. return carry;
  249. }
  250. /* Computes result = left - right, returning borrow. Can modify in place. */
  251. u64 vli_sub(u64 *result, const u64 *left, const u64 *right,
  252. unsigned int ndigits)
  253. {
  254. u64 borrow = 0;
  255. int i;
  256. for (i = 0; i < ndigits; i++) {
  257. u64 diff;
  258. diff = left[i] - right[i] - borrow;
  259. if (diff != left[i])
  260. borrow = (diff > left[i]);
  261. result[i] = diff;
  262. }
  263. return borrow;
  264. }
  265. EXPORT_SYMBOL(vli_sub);
  266. /* Computes result = left - right, returning borrow. Can modify in place. */
  267. static u64 vli_usub(u64 *result, const u64 *left, u64 right,
  268. unsigned int ndigits)
  269. {
  270. u64 borrow = right;
  271. int i;
  272. for (i = 0; i < ndigits; i++) {
  273. u64 diff;
  274. diff = left[i] - borrow;
  275. if (diff != left[i])
  276. borrow = (diff > left[i]);
  277. result[i] = diff;
  278. }
  279. return borrow;
  280. }
  281. static uint128_t mul_64_64(u64 left, u64 right)
  282. {
  283. uint128_t result;
  284. #if defined(CONFIG_ARCH_SUPPORTS_INT128)
  285. unsigned __int128 m = (unsigned __int128)left * right;
  286. result.m_low = m;
  287. result.m_high = m >> 64;
  288. #else
  289. u64 a0 = left & 0xffffffffull;
  290. u64 a1 = left >> 32;
  291. u64 b0 = right & 0xffffffffull;
  292. u64 b1 = right >> 32;
  293. u64 m0 = a0 * b0;
  294. u64 m1 = a0 * b1;
  295. u64 m2 = a1 * b0;
  296. u64 m3 = a1 * b1;
  297. m2 += (m0 >> 32);
  298. m2 += m1;
  299. /* Overflow */
  300. if (m2 < m1)
  301. m3 += 0x100000000ull;
  302. result.m_low = (m0 & 0xffffffffull) | (m2 << 32);
  303. result.m_high = m3 + (m2 >> 32);
  304. #endif
  305. return result;
  306. }
  307. static uint128_t add_128_128(uint128_t a, uint128_t b)
  308. {
  309. uint128_t result;
  310. result.m_low = a.m_low + b.m_low;
  311. result.m_high = a.m_high + b.m_high + (result.m_low < a.m_low);
  312. return result;
  313. }
  314. static void vli_mult(u64 *result, const u64 *left, const u64 *right,
  315. unsigned int ndigits)
  316. {
  317. uint128_t r01 = { 0, 0 };
  318. u64 r2 = 0;
  319. unsigned int i, k;
  320. /* Compute each digit of result in sequence, maintaining the
  321. * carries.
  322. */
  323. for (k = 0; k < ndigits * 2 - 1; k++) {
  324. unsigned int min;
  325. if (k < ndigits)
  326. min = 0;
  327. else
  328. min = (k + 1) - ndigits;
  329. for (i = min; i <= k && i < ndigits; i++) {
  330. uint128_t product;
  331. product = mul_64_64(left[i], right[k - i]);
  332. r01 = add_128_128(r01, product);
  333. r2 += (r01.m_high < product.m_high);
  334. }
  335. result[k] = r01.m_low;
  336. r01.m_low = r01.m_high;
  337. r01.m_high = r2;
  338. r2 = 0;
  339. }
  340. result[ndigits * 2 - 1] = r01.m_low;
  341. }
  342. /* Compute product = left * right, for a small right value. */
  343. static void vli_umult(u64 *result, const u64 *left, u32 right,
  344. unsigned int ndigits)
  345. {
  346. uint128_t r01 = { 0 };
  347. unsigned int k;
  348. for (k = 0; k < ndigits; k++) {
  349. uint128_t product;
  350. product = mul_64_64(left[k], right);
  351. r01 = add_128_128(r01, product);
  352. /* no carry */
  353. result[k] = r01.m_low;
  354. r01.m_low = r01.m_high;
  355. r01.m_high = 0;
  356. }
  357. result[k] = r01.m_low;
  358. for (++k; k < ndigits * 2; k++)
  359. result[k] = 0;
  360. }
  361. static void vli_square(u64 *result, const u64 *left, unsigned int ndigits)
  362. {
  363. uint128_t r01 = { 0, 0 };
  364. u64 r2 = 0;
  365. int i, k;
  366. for (k = 0; k < ndigits * 2 - 1; k++) {
  367. unsigned int min;
  368. if (k < ndigits)
  369. min = 0;
  370. else
  371. min = (k + 1) - ndigits;
  372. for (i = min; i <= k && i <= k - i; i++) {
  373. uint128_t product;
  374. product = mul_64_64(left[i], left[k - i]);
  375. if (i < k - i) {
  376. r2 += product.m_high >> 63;
  377. product.m_high = (product.m_high << 1) |
  378. (product.m_low >> 63);
  379. product.m_low <<= 1;
  380. }
  381. r01 = add_128_128(r01, product);
  382. r2 += (r01.m_high < product.m_high);
  383. }
  384. result[k] = r01.m_low;
  385. r01.m_low = r01.m_high;
  386. r01.m_high = r2;
  387. r2 = 0;
  388. }
  389. result[ndigits * 2 - 1] = r01.m_low;
  390. }
  391. /* Computes result = (left + right) % mod.
  392. * Assumes that left < mod and right < mod, result != mod.
  393. */
  394. static void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
  395. const u64 *mod, unsigned int ndigits)
  396. {
  397. u64 carry;
  398. carry = vli_add(result, left, right, ndigits);
  399. /* result > mod (result = mod + remainder), so subtract mod to
  400. * get remainder.
  401. */
  402. if (carry || vli_cmp(result, mod, ndigits) >= 0)
  403. vli_sub(result, result, mod, ndigits);
  404. }
  405. /* Computes result = (left - right) % mod.
  406. * Assumes that left < mod and right < mod, result != mod.
  407. */
  408. static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
  409. const u64 *mod, unsigned int ndigits)
  410. {
  411. u64 borrow = vli_sub(result, left, right, ndigits);
  412. /* In this case, p_result == -diff == (max int) - diff.
  413. * Since -x % d == d - x, we can get the correct result from
  414. * result + mod (with overflow).
  415. */
  416. if (borrow)
  417. vli_add(result, result, mod, ndigits);
  418. }
  419. /*
  420. * Computes result = product % mod
  421. * for special form moduli: p = 2^k-c, for small c (note the minus sign)
  422. *
  423. * References:
  424. * R. Crandall, C. Pomerance. Prime Numbers: A Computational Perspective.
  425. * 9 Fast Algorithms for Large-Integer Arithmetic. 9.2.3 Moduli of special form
  426. * Algorithm 9.2.13 (Fast mod operation for special-form moduli).
  427. */
  428. static void vli_mmod_special(u64 *result, const u64 *product,
  429. const u64 *mod, unsigned int ndigits)
  430. {
  431. u64 c = -mod[0];
  432. u64 t[ECC_MAX_DIGITS * 2];
  433. u64 r[ECC_MAX_DIGITS * 2];
  434. vli_set(r, product, ndigits * 2);
  435. while (!vli_is_zero(r + ndigits, ndigits)) {
  436. vli_umult(t, r + ndigits, c, ndigits);
  437. vli_clear(r + ndigits, ndigits);
  438. vli_add(r, r, t, ndigits * 2);
  439. }
  440. vli_set(t, mod, ndigits);
  441. vli_clear(t + ndigits, ndigits);
  442. while (vli_cmp(r, t, ndigits * 2) >= 0)
  443. vli_sub(r, r, t, ndigits * 2);
  444. vli_set(result, r, ndigits);
  445. }
  446. /*
  447. * Computes result = product % mod
  448. * for special form moduli: p = 2^{k-1}+c, for small c (note the plus sign)
  449. * where k-1 does not fit into qword boundary by -1 bit (such as 255).
  450. * References (loosely based on):
  451. * A. Menezes, P. van Oorschot, S. Vanstone. Handbook of Applied Cryptography.
  452. * 14.3.4 Reduction methods for moduli of special form. Algorithm 14.47.
  453. * URL: http://cacr.uwaterloo.ca/hac/about/chap14.pdf
  454. *
  455. * H. Cohen, G. Frey, R. Avanzi, C. Doche, T. Lange, K. Nguyen, F. Vercauteren.
  456. * Handbook of Elliptic and Hyperelliptic Curve Cryptography.
  457. * Algorithm 10.25 Fast reduction for special form moduli
  458. */
  459. static void vli_mmod_special2(u64 *result, const u64 *product,
  460. const u64 *mod, unsigned int ndigits)
  461. {
  462. u64 c2 = mod[0] * 2;
  463. u64 q[ECC_MAX_DIGITS];
  464. u64 r[ECC_MAX_DIGITS * 2];
  465. u64 m[ECC_MAX_DIGITS * 2]; /* expanded mod */
  466. int carry; /* last bit that doesn't fit into q */
  467. int i;
  468. vli_set(m, mod, ndigits);
  469. vli_clear(m + ndigits, ndigits);
  470. vli_set(r, product, ndigits);
  471. /* q and carry are top bits */
  472. vli_set(q, product + ndigits, ndigits);
  473. vli_clear(r + ndigits, ndigits);
  474. carry = vli_is_negative(r, ndigits);
  475. if (carry)
  476. r[ndigits - 1] &= (1ull << 63) - 1;
  477. for (i = 1; carry || !vli_is_zero(q, ndigits); i++) {
  478. u64 qc[ECC_MAX_DIGITS * 2];
  479. vli_umult(qc, q, c2, ndigits);
  480. if (carry)
  481. vli_uadd(qc, qc, mod[0], ndigits * 2);
  482. vli_set(q, qc + ndigits, ndigits);
  483. vli_clear(qc + ndigits, ndigits);
  484. carry = vli_is_negative(qc, ndigits);
  485. if (carry)
  486. qc[ndigits - 1] &= (1ull << 63) - 1;
  487. if (i & 1)
  488. vli_sub(r, r, qc, ndigits * 2);
  489. else
  490. vli_add(r, r, qc, ndigits * 2);
  491. }
  492. while (vli_is_negative(r, ndigits * 2))
  493. vli_add(r, r, m, ndigits * 2);
  494. while (vli_cmp(r, m, ndigits * 2) >= 0)
  495. vli_sub(r, r, m, ndigits * 2);
  496. vli_set(result, r, ndigits);
  497. }
  498. /*
  499. * Computes result = product % mod, where product is 2N words long.
  500. * Reference: Ken MacKay's micro-ecc.
  501. * Currently only designed to work for curve_p or curve_n.
  502. */
  503. static void vli_mmod_slow(u64 *result, u64 *product, const u64 *mod,
  504. unsigned int ndigits)
  505. {
  506. u64 mod_m[2 * ECC_MAX_DIGITS];
  507. u64 tmp[2 * ECC_MAX_DIGITS];
  508. u64 *v[2] = { tmp, product };
  509. u64 carry = 0;
  510. unsigned int i;
  511. /* Shift mod so its highest set bit is at the maximum position. */
  512. int shift = (ndigits * 2 * 64) - vli_num_bits(mod, ndigits);
  513. int word_shift = shift / 64;
  514. int bit_shift = shift % 64;
  515. vli_clear(mod_m, word_shift);
  516. if (bit_shift > 0) {
  517. for (i = 0; i < ndigits; ++i) {
  518. mod_m[word_shift + i] = (mod[i] << bit_shift) | carry;
  519. carry = mod[i] >> (64 - bit_shift);
  520. }
  521. } else
  522. vli_set(mod_m + word_shift, mod, ndigits);
  523. for (i = 1; shift >= 0; --shift) {
  524. u64 borrow = 0;
  525. unsigned int j;
  526. for (j = 0; j < ndigits * 2; ++j) {
  527. u64 diff = v[i][j] - mod_m[j] - borrow;
  528. if (diff != v[i][j])
  529. borrow = (diff > v[i][j]);
  530. v[1 - i][j] = diff;
  531. }
  532. i = !(i ^ borrow); /* Swap the index if there was no borrow */
  533. vli_rshift1(mod_m, ndigits);
  534. mod_m[ndigits - 1] |= mod_m[ndigits] << (64 - 1);
  535. vli_rshift1(mod_m + ndigits, ndigits);
  536. }
  537. vli_set(result, v[i], ndigits);
  538. }
  539. /* Computes result = product % mod using Barrett's reduction with precomputed
  540. * value mu appended to the mod after ndigits, mu = (2^{2w} / mod) and have
  541. * length ndigits + 1, where mu * (2^w - 1) should not overflow ndigits
  542. * boundary.
  543. *
  544. * Reference:
  545. * R. Brent, P. Zimmermann. Modern Computer Arithmetic. 2010.
  546. * 2.4.1 Barrett's algorithm. Algorithm 2.5.
  547. */
  548. static void vli_mmod_barrett(u64 *result, u64 *product, const u64 *mod,
  549. unsigned int ndigits)
  550. {
  551. u64 q[ECC_MAX_DIGITS * 2];
  552. u64 r[ECC_MAX_DIGITS * 2];
  553. const u64 *mu = mod + ndigits;
  554. vli_mult(q, product + ndigits, mu, ndigits);
  555. if (mu[ndigits])
  556. vli_add(q + ndigits, q + ndigits, product + ndigits, ndigits);
  557. vli_mult(r, mod, q + ndigits, ndigits);
  558. vli_sub(r, product, r, ndigits * 2);
  559. while (!vli_is_zero(r + ndigits, ndigits) ||
  560. vli_cmp(r, mod, ndigits) != -1) {
  561. u64 carry;
  562. carry = vli_sub(r, r, mod, ndigits);
  563. vli_usub(r + ndigits, r + ndigits, carry, ndigits);
  564. }
  565. vli_set(result, r, ndigits);
  566. }
  567. /* Computes p_result = p_product % curve_p.
  568. * See algorithm 5 and 6 from
  569. * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf
  570. */
  571. static void vli_mmod_fast_192(u64 *result, const u64 *product,
  572. const u64 *curve_prime, u64 *tmp)
  573. {
  574. const unsigned int ndigits = 3;
  575. int carry;
  576. vli_set(result, product, ndigits);
  577. vli_set(tmp, &product[3], ndigits);
  578. carry = vli_add(result, result, tmp, ndigits);
  579. tmp[0] = 0;
  580. tmp[1] = product[3];
  581. tmp[2] = product[4];
  582. carry += vli_add(result, result, tmp, ndigits);
  583. tmp[0] = tmp[1] = product[5];
  584. tmp[2] = 0;
  585. carry += vli_add(result, result, tmp, ndigits);
  586. while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
  587. carry -= vli_sub(result, result, curve_prime, ndigits);
  588. }
  589. /* Computes result = product % curve_prime
  590. * from http://www.nsa.gov/ia/_files/nist-routines.pdf
  591. */
  592. static void vli_mmod_fast_256(u64 *result, const u64 *product,
  593. const u64 *curve_prime, u64 *tmp)
  594. {
  595. int carry;
  596. const unsigned int ndigits = 4;
  597. /* t */
  598. vli_set(result, product, ndigits);
  599. /* s1 */
  600. tmp[0] = 0;
  601. tmp[1] = product[5] & 0xffffffff00000000ull;
  602. tmp[2] = product[6];
  603. tmp[3] = product[7];
  604. carry = vli_lshift(tmp, tmp, 1, ndigits);
  605. carry += vli_add(result, result, tmp, ndigits);
  606. /* s2 */
  607. tmp[1] = product[6] << 32;
  608. tmp[2] = (product[6] >> 32) | (product[7] << 32);
  609. tmp[3] = product[7] >> 32;
  610. carry += vli_lshift(tmp, tmp, 1, ndigits);
  611. carry += vli_add(result, result, tmp, ndigits);
  612. /* s3 */
  613. tmp[0] = product[4];
  614. tmp[1] = product[5] & 0xffffffff;
  615. tmp[2] = 0;
  616. tmp[3] = product[7];
  617. carry += vli_add(result, result, tmp, ndigits);
  618. /* s4 */
  619. tmp[0] = (product[4] >> 32) | (product[5] << 32);
  620. tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull);
  621. tmp[2] = product[7];
  622. tmp[3] = (product[6] >> 32) | (product[4] << 32);
  623. carry += vli_add(result, result, tmp, ndigits);
  624. /* d1 */
  625. tmp[0] = (product[5] >> 32) | (product[6] << 32);
  626. tmp[1] = (product[6] >> 32);
  627. tmp[2] = 0;
  628. tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32);
  629. carry -= vli_sub(result, result, tmp, ndigits);
  630. /* d2 */
  631. tmp[0] = product[6];
  632. tmp[1] = product[7];
  633. tmp[2] = 0;
  634. tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull);
  635. carry -= vli_sub(result, result, tmp, ndigits);
  636. /* d3 */
  637. tmp[0] = (product[6] >> 32) | (product[7] << 32);
  638. tmp[1] = (product[7] >> 32) | (product[4] << 32);
  639. tmp[2] = (product[4] >> 32) | (product[5] << 32);
  640. tmp[3] = (product[6] << 32);
  641. carry -= vli_sub(result, result, tmp, ndigits);
  642. /* d4 */
  643. tmp[0] = product[7];
  644. tmp[1] = product[4] & 0xffffffff00000000ull;
  645. tmp[2] = product[5];
  646. tmp[3] = product[6] & 0xffffffff00000000ull;
  647. carry -= vli_sub(result, result, tmp, ndigits);
  648. if (carry < 0) {
  649. do {
  650. carry += vli_add(result, result, curve_prime, ndigits);
  651. } while (carry < 0);
  652. } else {
  653. while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
  654. carry -= vli_sub(result, result, curve_prime, ndigits);
  655. }
  656. }
  657. #define SL32OR32(x32, y32) (((u64)x32 << 32) | y32)
  658. #define AND64H(x64) (x64 & 0xffFFffFF00000000ull)
  659. #define AND64L(x64) (x64 & 0x00000000ffFFffFFull)
  660. /* Computes result = product % curve_prime
  661. * from "Mathematical routines for the NIST prime elliptic curves"
  662. */
  663. static void vli_mmod_fast_384(u64 *result, const u64 *product,
  664. const u64 *curve_prime, u64 *tmp)
  665. {
  666. int carry;
  667. const unsigned int ndigits = 6;
  668. /* t */
  669. vli_set(result, product, ndigits);
  670. /* s1 */
  671. tmp[0] = 0; // 0 || 0
  672. tmp[1] = 0; // 0 || 0
  673. tmp[2] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  674. tmp[3] = product[11]>>32; // 0 ||a23
  675. tmp[4] = 0; // 0 || 0
  676. tmp[5] = 0; // 0 || 0
  677. carry = vli_lshift(tmp, tmp, 1, ndigits);
  678. carry += vli_add(result, result, tmp, ndigits);
  679. /* s2 */
  680. tmp[0] = product[6]; //a13||a12
  681. tmp[1] = product[7]; //a15||a14
  682. tmp[2] = product[8]; //a17||a16
  683. tmp[3] = product[9]; //a19||a18
  684. tmp[4] = product[10]; //a21||a20
  685. tmp[5] = product[11]; //a23||a22
  686. carry += vli_add(result, result, tmp, ndigits);
  687. /* s3 */
  688. tmp[0] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  689. tmp[1] = SL32OR32(product[6], (product[11]>>32)); //a12||a23
  690. tmp[2] = SL32OR32(product[7], (product[6])>>32); //a14||a13
  691. tmp[3] = SL32OR32(product[8], (product[7]>>32)); //a16||a15
  692. tmp[4] = SL32OR32(product[9], (product[8]>>32)); //a18||a17
  693. tmp[5] = SL32OR32(product[10], (product[9]>>32)); //a20||a19
  694. carry += vli_add(result, result, tmp, ndigits);
  695. /* s4 */
  696. tmp[0] = AND64H(product[11]); //a23|| 0
  697. tmp[1] = (product[10]<<32); //a20|| 0
  698. tmp[2] = product[6]; //a13||a12
  699. tmp[3] = product[7]; //a15||a14
  700. tmp[4] = product[8]; //a17||a16
  701. tmp[5] = product[9]; //a19||a18
  702. carry += vli_add(result, result, tmp, ndigits);
  703. /* s5 */
  704. tmp[0] = 0; // 0|| 0
  705. tmp[1] = 0; // 0|| 0
  706. tmp[2] = product[10]; //a21||a20
  707. tmp[3] = product[11]; //a23||a22
  708. tmp[4] = 0; // 0|| 0
  709. tmp[5] = 0; // 0|| 0
  710. carry += vli_add(result, result, tmp, ndigits);
  711. /* s6 */
  712. tmp[0] = AND64L(product[10]); // 0 ||a20
  713. tmp[1] = AND64H(product[10]); //a21|| 0
  714. tmp[2] = product[11]; //a23||a22
  715. tmp[3] = 0; // 0 || 0
  716. tmp[4] = 0; // 0 || 0
  717. tmp[5] = 0; // 0 || 0
  718. carry += vli_add(result, result, tmp, ndigits);
  719. /* d1 */
  720. tmp[0] = SL32OR32(product[6], (product[11]>>32)); //a12||a23
  721. tmp[1] = SL32OR32(product[7], (product[6]>>32)); //a14||a13
  722. tmp[2] = SL32OR32(product[8], (product[7]>>32)); //a16||a15
  723. tmp[3] = SL32OR32(product[9], (product[8]>>32)); //a18||a17
  724. tmp[4] = SL32OR32(product[10], (product[9]>>32)); //a20||a19
  725. tmp[5] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  726. carry -= vli_sub(result, result, tmp, ndigits);
  727. /* d2 */
  728. tmp[0] = (product[10]<<32); //a20|| 0
  729. tmp[1] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  730. tmp[2] = (product[11]>>32); // 0 ||a23
  731. tmp[3] = 0; // 0 || 0
  732. tmp[4] = 0; // 0 || 0
  733. tmp[5] = 0; // 0 || 0
  734. carry -= vli_sub(result, result, tmp, ndigits);
  735. /* d3 */
  736. tmp[0] = 0; // 0 || 0
  737. tmp[1] = AND64H(product[11]); //a23|| 0
  738. tmp[2] = product[11]>>32; // 0 ||a23
  739. tmp[3] = 0; // 0 || 0
  740. tmp[4] = 0; // 0 || 0
  741. tmp[5] = 0; // 0 || 0
  742. carry -= vli_sub(result, result, tmp, ndigits);
  743. if (carry < 0) {
  744. do {
  745. carry += vli_add(result, result, curve_prime, ndigits);
  746. } while (carry < 0);
  747. } else {
  748. while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
  749. carry -= vli_sub(result, result, curve_prime, ndigits);
  750. }
  751. }
  752. #undef SL32OR32
  753. #undef AND64H
  754. #undef AND64L
  755. /* Computes result = product % curve_prime for different curve_primes.
  756. *
  757. * Note that curve_primes are distinguished just by heuristic check and
  758. * not by complete conformance check.
  759. */
  760. static bool vli_mmod_fast(u64 *result, u64 *product,
  761. const struct ecc_curve *curve)
  762. {
  763. u64 tmp[2 * ECC_MAX_DIGITS];
  764. const u64 *curve_prime = curve->p;
  765. const unsigned int ndigits = curve->g.ndigits;
  766. /* All NIST curves have name prefix 'nist_' */
  767. if (strncmp(curve->name, "nist_", 5) != 0) {
  768. /* Try to handle Pseudo-Marsenne primes. */
  769. if (curve_prime[ndigits - 1] == -1ull) {
  770. vli_mmod_special(result, product, curve_prime,
  771. ndigits);
  772. return true;
  773. } else if (curve_prime[ndigits - 1] == 1ull << 63 &&
  774. curve_prime[ndigits - 2] == 0) {
  775. vli_mmod_special2(result, product, curve_prime,
  776. ndigits);
  777. return true;
  778. }
  779. vli_mmod_barrett(result, product, curve_prime, ndigits);
  780. return true;
  781. }
  782. switch (ndigits) {
  783. case 3:
  784. vli_mmod_fast_192(result, product, curve_prime, tmp);
  785. break;
  786. case 4:
  787. vli_mmod_fast_256(result, product, curve_prime, tmp);
  788. break;
  789. case 6:
  790. vli_mmod_fast_384(result, product, curve_prime, tmp);
  791. break;
  792. default:
  793. pr_err_ratelimited("ecc: unsupported digits size!\n");
  794. return false;
  795. }
  796. return true;
  797. }
  798. /* Computes result = (left * right) % mod.
  799. * Assumes that mod is big enough curve order.
  800. */
  801. void vli_mod_mult_slow(u64 *result, const u64 *left, const u64 *right,
  802. const u64 *mod, unsigned int ndigits)
  803. {
  804. u64 product[ECC_MAX_DIGITS * 2];
  805. vli_mult(product, left, right, ndigits);
  806. vli_mmod_slow(result, product, mod, ndigits);
  807. }
  808. EXPORT_SYMBOL(vli_mod_mult_slow);
  809. /* Computes result = (left * right) % curve_prime. */
  810. static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
  811. const struct ecc_curve *curve)
  812. {
  813. u64 product[2 * ECC_MAX_DIGITS];
  814. vli_mult(product, left, right, curve->g.ndigits);
  815. vli_mmod_fast(result, product, curve);
  816. }
  817. /* Computes result = left^2 % curve_prime. */
  818. static void vli_mod_square_fast(u64 *result, const u64 *left,
  819. const struct ecc_curve *curve)
  820. {
  821. u64 product[2 * ECC_MAX_DIGITS];
  822. vli_square(product, left, curve->g.ndigits);
  823. vli_mmod_fast(result, product, curve);
  824. }
  825. #define EVEN(vli) (!(vli[0] & 1))
  826. /* Computes result = (1 / p_input) % mod. All VLIs are the same size.
  827. * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
  828. * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
  829. */
  830. void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
  831. unsigned int ndigits)
  832. {
  833. u64 a[ECC_MAX_DIGITS], b[ECC_MAX_DIGITS];
  834. u64 u[ECC_MAX_DIGITS], v[ECC_MAX_DIGITS];
  835. u64 carry;
  836. int cmp_result;
  837. if (vli_is_zero(input, ndigits)) {
  838. vli_clear(result, ndigits);
  839. return;
  840. }
  841. vli_set(a, input, ndigits);
  842. vli_set(b, mod, ndigits);
  843. vli_clear(u, ndigits);
  844. u[0] = 1;
  845. vli_clear(v, ndigits);
  846. while ((cmp_result = vli_cmp(a, b, ndigits)) != 0) {
  847. carry = 0;
  848. if (EVEN(a)) {
  849. vli_rshift1(a, ndigits);
  850. if (!EVEN(u))
  851. carry = vli_add(u, u, mod, ndigits);
  852. vli_rshift1(u, ndigits);
  853. if (carry)
  854. u[ndigits - 1] |= 0x8000000000000000ull;
  855. } else if (EVEN(b)) {
  856. vli_rshift1(b, ndigits);
  857. if (!EVEN(v))
  858. carry = vli_add(v, v, mod, ndigits);
  859. vli_rshift1(v, ndigits);
  860. if (carry)
  861. v[ndigits - 1] |= 0x8000000000000000ull;
  862. } else if (cmp_result > 0) {
  863. vli_sub(a, a, b, ndigits);
  864. vli_rshift1(a, ndigits);
  865. if (vli_cmp(u, v, ndigits) < 0)
  866. vli_add(u, u, mod, ndigits);
  867. vli_sub(u, u, v, ndigits);
  868. if (!EVEN(u))
  869. carry = vli_add(u, u, mod, ndigits);
  870. vli_rshift1(u, ndigits);
  871. if (carry)
  872. u[ndigits - 1] |= 0x8000000000000000ull;
  873. } else {
  874. vli_sub(b, b, a, ndigits);
  875. vli_rshift1(b, ndigits);
  876. if (vli_cmp(v, u, ndigits) < 0)
  877. vli_add(v, v, mod, ndigits);
  878. vli_sub(v, v, u, ndigits);
  879. if (!EVEN(v))
  880. carry = vli_add(v, v, mod, ndigits);
  881. vli_rshift1(v, ndigits);
  882. if (carry)
  883. v[ndigits - 1] |= 0x8000000000000000ull;
  884. }
  885. }
  886. vli_set(result, u, ndigits);
  887. }
  888. EXPORT_SYMBOL(vli_mod_inv);
  889. /* ------ Point operations ------ */
  890. /* Returns true if p_point is the point at infinity, false otherwise. */
  891. bool ecc_point_is_zero(const struct ecc_point *point)
  892. {
  893. return (vli_is_zero(point->x, point->ndigits) &&
  894. vli_is_zero(point->y, point->ndigits));
  895. }
  896. EXPORT_SYMBOL(ecc_point_is_zero);
  897. /* Point multiplication algorithm using Montgomery's ladder with co-Z
  898. * coordinates. From https://eprint.iacr.org/2011/338.pdf
  899. */
  900. /* Double in place */
  901. static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
  902. const struct ecc_curve *curve)
  903. {
  904. /* t1 = x, t2 = y, t3 = z */
  905. u64 t4[ECC_MAX_DIGITS];
  906. u64 t5[ECC_MAX_DIGITS];
  907. const u64 *curve_prime = curve->p;
  908. const unsigned int ndigits = curve->g.ndigits;
  909. if (vli_is_zero(z1, ndigits))
  910. return;
  911. /* t4 = y1^2 */
  912. vli_mod_square_fast(t4, y1, curve);
  913. /* t5 = x1*y1^2 = A */
  914. vli_mod_mult_fast(t5, x1, t4, curve);
  915. /* t4 = y1^4 */
  916. vli_mod_square_fast(t4, t4, curve);
  917. /* t2 = y1*z1 = z3 */
  918. vli_mod_mult_fast(y1, y1, z1, curve);
  919. /* t3 = z1^2 */
  920. vli_mod_square_fast(z1, z1, curve);
  921. /* t1 = x1 + z1^2 */
  922. vli_mod_add(x1, x1, z1, curve_prime, ndigits);
  923. /* t3 = 2*z1^2 */
  924. vli_mod_add(z1, z1, z1, curve_prime, ndigits);
  925. /* t3 = x1 - z1^2 */
  926. vli_mod_sub(z1, x1, z1, curve_prime, ndigits);
  927. /* t1 = x1^2 - z1^4 */
  928. vli_mod_mult_fast(x1, x1, z1, curve);
  929. /* t3 = 2*(x1^2 - z1^4) */
  930. vli_mod_add(z1, x1, x1, curve_prime, ndigits);
  931. /* t1 = 3*(x1^2 - z1^4) */
  932. vli_mod_add(x1, x1, z1, curve_prime, ndigits);
  933. if (vli_test_bit(x1, 0)) {
  934. u64 carry = vli_add(x1, x1, curve_prime, ndigits);
  935. vli_rshift1(x1, ndigits);
  936. x1[ndigits - 1] |= carry << 63;
  937. } else {
  938. vli_rshift1(x1, ndigits);
  939. }
  940. /* t1 = 3/2*(x1^2 - z1^4) = B */
  941. /* t3 = B^2 */
  942. vli_mod_square_fast(z1, x1, curve);
  943. /* t3 = B^2 - A */
  944. vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
  945. /* t3 = B^2 - 2A = x3 */
  946. vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
  947. /* t5 = A - x3 */
  948. vli_mod_sub(t5, t5, z1, curve_prime, ndigits);
  949. /* t1 = B * (A - x3) */
  950. vli_mod_mult_fast(x1, x1, t5, curve);
  951. /* t4 = B * (A - x3) - y1^4 = y3 */
  952. vli_mod_sub(t4, x1, t4, curve_prime, ndigits);
  953. vli_set(x1, z1, ndigits);
  954. vli_set(z1, y1, ndigits);
  955. vli_set(y1, t4, ndigits);
  956. }
  957. /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
  958. static void apply_z(u64 *x1, u64 *y1, u64 *z, const struct ecc_curve *curve)
  959. {
  960. u64 t1[ECC_MAX_DIGITS];
  961. vli_mod_square_fast(t1, z, curve); /* z^2 */
  962. vli_mod_mult_fast(x1, x1, t1, curve); /* x1 * z^2 */
  963. vli_mod_mult_fast(t1, t1, z, curve); /* z^3 */
  964. vli_mod_mult_fast(y1, y1, t1, curve); /* y1 * z^3 */
  965. }
  966. /* P = (x1, y1) => 2P, (x2, y2) => P' */
  967. static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
  968. u64 *p_initial_z, const struct ecc_curve *curve)
  969. {
  970. u64 z[ECC_MAX_DIGITS];
  971. const unsigned int ndigits = curve->g.ndigits;
  972. vli_set(x2, x1, ndigits);
  973. vli_set(y2, y1, ndigits);
  974. vli_clear(z, ndigits);
  975. z[0] = 1;
  976. if (p_initial_z)
  977. vli_set(z, p_initial_z, ndigits);
  978. apply_z(x1, y1, z, curve);
  979. ecc_point_double_jacobian(x1, y1, z, curve);
  980. apply_z(x2, y2, z, curve);
  981. }
  982. /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
  983. * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
  984. * or P => P', Q => P + Q
  985. */
  986. static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
  987. const struct ecc_curve *curve)
  988. {
  989. /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
  990. u64 t5[ECC_MAX_DIGITS];
  991. const u64 *curve_prime = curve->p;
  992. const unsigned int ndigits = curve->g.ndigits;
  993. /* t5 = x2 - x1 */
  994. vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
  995. /* t5 = (x2 - x1)^2 = A */
  996. vli_mod_square_fast(t5, t5, curve);
  997. /* t1 = x1*A = B */
  998. vli_mod_mult_fast(x1, x1, t5, curve);
  999. /* t3 = x2*A = C */
  1000. vli_mod_mult_fast(x2, x2, t5, curve);
  1001. /* t4 = y2 - y1 */
  1002. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1003. /* t5 = (y2 - y1)^2 = D */
  1004. vli_mod_square_fast(t5, y2, curve);
  1005. /* t5 = D - B */
  1006. vli_mod_sub(t5, t5, x1, curve_prime, ndigits);
  1007. /* t5 = D - B - C = x3 */
  1008. vli_mod_sub(t5, t5, x2, curve_prime, ndigits);
  1009. /* t3 = C - B */
  1010. vli_mod_sub(x2, x2, x1, curve_prime, ndigits);
  1011. /* t2 = y1*(C - B) */
  1012. vli_mod_mult_fast(y1, y1, x2, curve);
  1013. /* t3 = B - x3 */
  1014. vli_mod_sub(x2, x1, t5, curve_prime, ndigits);
  1015. /* t4 = (y2 - y1)*(B - x3) */
  1016. vli_mod_mult_fast(y2, y2, x2, curve);
  1017. /* t4 = y3 */
  1018. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1019. vli_set(x2, t5, ndigits);
  1020. }
  1021. /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
  1022. * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
  1023. * or P => P - Q, Q => P + Q
  1024. */
  1025. static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
  1026. const struct ecc_curve *curve)
  1027. {
  1028. /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
  1029. u64 t5[ECC_MAX_DIGITS];
  1030. u64 t6[ECC_MAX_DIGITS];
  1031. u64 t7[ECC_MAX_DIGITS];
  1032. const u64 *curve_prime = curve->p;
  1033. const unsigned int ndigits = curve->g.ndigits;
  1034. /* t5 = x2 - x1 */
  1035. vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
  1036. /* t5 = (x2 - x1)^2 = A */
  1037. vli_mod_square_fast(t5, t5, curve);
  1038. /* t1 = x1*A = B */
  1039. vli_mod_mult_fast(x1, x1, t5, curve);
  1040. /* t3 = x2*A = C */
  1041. vli_mod_mult_fast(x2, x2, t5, curve);
  1042. /* t4 = y2 + y1 */
  1043. vli_mod_add(t5, y2, y1, curve_prime, ndigits);
  1044. /* t4 = y2 - y1 */
  1045. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1046. /* t6 = C - B */
  1047. vli_mod_sub(t6, x2, x1, curve_prime, ndigits);
  1048. /* t2 = y1 * (C - B) */
  1049. vli_mod_mult_fast(y1, y1, t6, curve);
  1050. /* t6 = B + C */
  1051. vli_mod_add(t6, x1, x2, curve_prime, ndigits);
  1052. /* t3 = (y2 - y1)^2 */
  1053. vli_mod_square_fast(x2, y2, curve);
  1054. /* t3 = x3 */
  1055. vli_mod_sub(x2, x2, t6, curve_prime, ndigits);
  1056. /* t7 = B - x3 */
  1057. vli_mod_sub(t7, x1, x2, curve_prime, ndigits);
  1058. /* t4 = (y2 - y1)*(B - x3) */
  1059. vli_mod_mult_fast(y2, y2, t7, curve);
  1060. /* t4 = y3 */
  1061. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1062. /* t7 = (y2 + y1)^2 = F */
  1063. vli_mod_square_fast(t7, t5, curve);
  1064. /* t7 = x3' */
  1065. vli_mod_sub(t7, t7, t6, curve_prime, ndigits);
  1066. /* t6 = x3' - B */
  1067. vli_mod_sub(t6, t7, x1, curve_prime, ndigits);
  1068. /* t6 = (y2 + y1)*(x3' - B) */
  1069. vli_mod_mult_fast(t6, t6, t5, curve);
  1070. /* t2 = y3' */
  1071. vli_mod_sub(y1, t6, y1, curve_prime, ndigits);
  1072. vli_set(x1, t7, ndigits);
  1073. }
  1074. static void ecc_point_mult(struct ecc_point *result,
  1075. const struct ecc_point *point, const u64 *scalar,
  1076. u64 *initial_z, const struct ecc_curve *curve,
  1077. unsigned int ndigits)
  1078. {
  1079. /* R0 and R1 */
  1080. u64 rx[2][ECC_MAX_DIGITS];
  1081. u64 ry[2][ECC_MAX_DIGITS];
  1082. u64 z[ECC_MAX_DIGITS];
  1083. u64 sk[2][ECC_MAX_DIGITS];
  1084. u64 *curve_prime = curve->p;
  1085. int i, nb;
  1086. int num_bits;
  1087. int carry;
  1088. carry = vli_add(sk[0], scalar, curve->n, ndigits);
  1089. vli_add(sk[1], sk[0], curve->n, ndigits);
  1090. scalar = sk[!carry];
  1091. num_bits = sizeof(u64) * ndigits * 8 + 1;
  1092. vli_set(rx[1], point->x, ndigits);
  1093. vli_set(ry[1], point->y, ndigits);
  1094. xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, curve);
  1095. for (i = num_bits - 2; i > 0; i--) {
  1096. nb = !vli_test_bit(scalar, i);
  1097. xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve);
  1098. xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve);
  1099. }
  1100. nb = !vli_test_bit(scalar, 0);
  1101. xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve);
  1102. /* Find final 1/Z value. */
  1103. /* X1 - X0 */
  1104. vli_mod_sub(z, rx[1], rx[0], curve_prime, ndigits);
  1105. /* Yb * (X1 - X0) */
  1106. vli_mod_mult_fast(z, z, ry[1 - nb], curve);
  1107. /* xP * Yb * (X1 - X0) */
  1108. vli_mod_mult_fast(z, z, point->x, curve);
  1109. /* 1 / (xP * Yb * (X1 - X0)) */
  1110. vli_mod_inv(z, z, curve_prime, point->ndigits);
  1111. /* yP / (xP * Yb * (X1 - X0)) */
  1112. vli_mod_mult_fast(z, z, point->y, curve);
  1113. /* Xb * yP / (xP * Yb * (X1 - X0)) */
  1114. vli_mod_mult_fast(z, z, rx[1 - nb], curve);
  1115. /* End 1/Z calculation */
  1116. xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve);
  1117. apply_z(rx[0], ry[0], z, curve);
  1118. vli_set(result->x, rx[0], ndigits);
  1119. vli_set(result->y, ry[0], ndigits);
  1120. }
  1121. /* Computes R = P + Q mod p */
  1122. static void ecc_point_add(const struct ecc_point *result,
  1123. const struct ecc_point *p, const struct ecc_point *q,
  1124. const struct ecc_curve *curve)
  1125. {
  1126. u64 z[ECC_MAX_DIGITS];
  1127. u64 px[ECC_MAX_DIGITS];
  1128. u64 py[ECC_MAX_DIGITS];
  1129. unsigned int ndigits = curve->g.ndigits;
  1130. vli_set(result->x, q->x, ndigits);
  1131. vli_set(result->y, q->y, ndigits);
  1132. vli_mod_sub(z, result->x, p->x, curve->p, ndigits);
  1133. vli_set(px, p->x, ndigits);
  1134. vli_set(py, p->y, ndigits);
  1135. xycz_add(px, py, result->x, result->y, curve);
  1136. vli_mod_inv(z, z, curve->p, ndigits);
  1137. apply_z(result->x, result->y, z, curve);
  1138. }
  1139. /* Computes R = u1P + u2Q mod p using Shamir's trick.
  1140. * Based on: Kenneth MacKay's micro-ecc (2014).
  1141. */
  1142. void ecc_point_mult_shamir(const struct ecc_point *result,
  1143. const u64 *u1, const struct ecc_point *p,
  1144. const u64 *u2, const struct ecc_point *q,
  1145. const struct ecc_curve *curve)
  1146. {
  1147. u64 z[ECC_MAX_DIGITS];
  1148. u64 sump[2][ECC_MAX_DIGITS];
  1149. u64 *rx = result->x;
  1150. u64 *ry = result->y;
  1151. unsigned int ndigits = curve->g.ndigits;
  1152. unsigned int num_bits;
  1153. struct ecc_point sum = ECC_POINT_INIT(sump[0], sump[1], ndigits);
  1154. const struct ecc_point *points[4];
  1155. const struct ecc_point *point;
  1156. unsigned int idx;
  1157. int i;
  1158. ecc_point_add(&sum, p, q, curve);
  1159. points[0] = NULL;
  1160. points[1] = p;
  1161. points[2] = q;
  1162. points[3] = &sum;
  1163. num_bits = max(vli_num_bits(u1, ndigits), vli_num_bits(u2, ndigits));
  1164. i = num_bits - 1;
  1165. idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1);
  1166. point = points[idx];
  1167. vli_set(rx, point->x, ndigits);
  1168. vli_set(ry, point->y, ndigits);
  1169. vli_clear(z + 1, ndigits - 1);
  1170. z[0] = 1;
  1171. for (--i; i >= 0; i--) {
  1172. ecc_point_double_jacobian(rx, ry, z, curve);
  1173. idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1);
  1174. point = points[idx];
  1175. if (point) {
  1176. u64 tx[ECC_MAX_DIGITS];
  1177. u64 ty[ECC_MAX_DIGITS];
  1178. u64 tz[ECC_MAX_DIGITS];
  1179. vli_set(tx, point->x, ndigits);
  1180. vli_set(ty, point->y, ndigits);
  1181. apply_z(tx, ty, z, curve);
  1182. vli_mod_sub(tz, rx, tx, curve->p, ndigits);
  1183. xycz_add(tx, ty, rx, ry, curve);
  1184. vli_mod_mult_fast(z, z, tz, curve);
  1185. }
  1186. }
  1187. vli_mod_inv(z, z, curve->p, ndigits);
  1188. apply_z(rx, ry, z, curve);
  1189. }
  1190. EXPORT_SYMBOL(ecc_point_mult_shamir);
  1191. static int __ecc_is_key_valid(const struct ecc_curve *curve,
  1192. const u64 *private_key, unsigned int ndigits)
  1193. {
  1194. u64 one[ECC_MAX_DIGITS] = { 1, };
  1195. u64 res[ECC_MAX_DIGITS];
  1196. if (!private_key)
  1197. return -EINVAL;
  1198. if (curve->g.ndigits != ndigits)
  1199. return -EINVAL;
  1200. /* Make sure the private key is in the range [2, n-3]. */
  1201. if (vli_cmp(one, private_key, ndigits) != -1)
  1202. return -EINVAL;
  1203. vli_sub(res, curve->n, one, ndigits);
  1204. vli_sub(res, res, one, ndigits);
  1205. if (vli_cmp(res, private_key, ndigits) != 1)
  1206. return -EINVAL;
  1207. return 0;
  1208. }
  1209. int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
  1210. const u64 *private_key, unsigned int private_key_len)
  1211. {
  1212. int nbytes;
  1213. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1214. nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  1215. if (private_key_len != nbytes)
  1216. return -EINVAL;
  1217. return __ecc_is_key_valid(curve, private_key, ndigits);
  1218. }
  1219. EXPORT_SYMBOL(ecc_is_key_valid);
  1220. /*
  1221. * ECC private keys are generated using the method of extra random bits,
  1222. * equivalent to that described in FIPS 186-4, Appendix B.4.1.
  1223. *
  1224. * d = (c mod(n–1)) + 1 where c is a string of random bits, 64 bits longer
  1225. * than requested
  1226. * 0 <= c mod(n-1) <= n-2 and implies that
  1227. * 1 <= d <= n-1
  1228. *
  1229. * This method generates a private key uniformly distributed in the range
  1230. * [1, n-1].
  1231. */
  1232. int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey)
  1233. {
  1234. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1235. u64 priv[ECC_MAX_DIGITS];
  1236. unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  1237. unsigned int nbits = vli_num_bits(curve->n, ndigits);
  1238. int err;
  1239. /* Check that N is included in Table 1 of FIPS 186-4, section 6.1.1 */
  1240. if (nbits < 160 || ndigits > ARRAY_SIZE(priv))
  1241. return -EINVAL;
  1242. /*
  1243. * FIPS 186-4 recommends that the private key should be obtained from a
  1244. * RBG with a security strength equal to or greater than the security
  1245. * strength associated with N.
  1246. *
  1247. * The maximum security strength identified by NIST SP800-57pt1r4 for
  1248. * ECC is 256 (N >= 512).
  1249. *
  1250. * This condition is met by the default RNG because it selects a favored
  1251. * DRBG with a security strength of 256.
  1252. */
  1253. if (crypto_get_default_rng())
  1254. return -EFAULT;
  1255. err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)priv, nbytes);
  1256. crypto_put_default_rng();
  1257. if (err)
  1258. return err;
  1259. /* Make sure the private key is in the valid range. */
  1260. if (__ecc_is_key_valid(curve, priv, ndigits))
  1261. return -EINVAL;
  1262. ecc_swap_digits(priv, privkey, ndigits);
  1263. return 0;
  1264. }
  1265. EXPORT_SYMBOL(ecc_gen_privkey);
  1266. int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
  1267. const u64 *private_key, u64 *public_key)
  1268. {
  1269. int ret = 0;
  1270. struct ecc_point *pk;
  1271. u64 priv[ECC_MAX_DIGITS];
  1272. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1273. if (!private_key || !curve || ndigits > ARRAY_SIZE(priv)) {
  1274. ret = -EINVAL;
  1275. goto out;
  1276. }
  1277. ecc_swap_digits(private_key, priv, ndigits);
  1278. pk = ecc_alloc_point(ndigits);
  1279. if (!pk) {
  1280. ret = -ENOMEM;
  1281. goto out;
  1282. }
  1283. ecc_point_mult(pk, &curve->g, priv, NULL, curve, ndigits);
  1284. /* SP800-56A rev 3 5.6.2.1.3 key check */
  1285. if (ecc_is_pubkey_valid_full(curve, pk)) {
  1286. ret = -EAGAIN;
  1287. goto err_free_point;
  1288. }
  1289. ecc_swap_digits(pk->x, public_key, ndigits);
  1290. ecc_swap_digits(pk->y, &public_key[ndigits], ndigits);
  1291. err_free_point:
  1292. ecc_free_point(pk);
  1293. out:
  1294. return ret;
  1295. }
  1296. EXPORT_SYMBOL(ecc_make_pub_key);
  1297. /* SP800-56A section 5.6.2.3.4 partial verification: ephemeral keys only */
  1298. int ecc_is_pubkey_valid_partial(const struct ecc_curve *curve,
  1299. struct ecc_point *pk)
  1300. {
  1301. u64 yy[ECC_MAX_DIGITS], xxx[ECC_MAX_DIGITS], w[ECC_MAX_DIGITS];
  1302. if (WARN_ON(pk->ndigits != curve->g.ndigits))
  1303. return -EINVAL;
  1304. /* Check 1: Verify key is not the zero point. */
  1305. if (ecc_point_is_zero(pk))
  1306. return -EINVAL;
  1307. /* Check 2: Verify key is in the range [1, p-1]. */
  1308. if (vli_cmp(curve->p, pk->x, pk->ndigits) != 1)
  1309. return -EINVAL;
  1310. if (vli_cmp(curve->p, pk->y, pk->ndigits) != 1)
  1311. return -EINVAL;
  1312. /* Check 3: Verify that y^2 == (x^3 + a·x + b) mod p */
  1313. vli_mod_square_fast(yy, pk->y, curve); /* y^2 */
  1314. vli_mod_square_fast(xxx, pk->x, curve); /* x^2 */
  1315. vli_mod_mult_fast(xxx, xxx, pk->x, curve); /* x^3 */
  1316. vli_mod_mult_fast(w, curve->a, pk->x, curve); /* a·x */
  1317. vli_mod_add(w, w, curve->b, curve->p, pk->ndigits); /* a·x + b */
  1318. vli_mod_add(w, w, xxx, curve->p, pk->ndigits); /* x^3 + a·x + b */
  1319. if (vli_cmp(yy, w, pk->ndigits) != 0) /* Equation */
  1320. return -EINVAL;
  1321. return 0;
  1322. }
  1323. EXPORT_SYMBOL(ecc_is_pubkey_valid_partial);
  1324. /* SP800-56A section 5.6.2.3.3 full verification */
  1325. int ecc_is_pubkey_valid_full(const struct ecc_curve *curve,
  1326. struct ecc_point *pk)
  1327. {
  1328. struct ecc_point *nQ;
  1329. /* Checks 1 through 3 */
  1330. int ret = ecc_is_pubkey_valid_partial(curve, pk);
  1331. if (ret)
  1332. return ret;
  1333. /* Check 4: Verify that nQ is the zero point. */
  1334. nQ = ecc_alloc_point(pk->ndigits);
  1335. if (!nQ)
  1336. return -ENOMEM;
  1337. ecc_point_mult(nQ, pk, curve->n, NULL, curve, pk->ndigits);
  1338. if (!ecc_point_is_zero(nQ))
  1339. ret = -EINVAL;
  1340. ecc_free_point(nQ);
  1341. return ret;
  1342. }
  1343. EXPORT_SYMBOL(ecc_is_pubkey_valid_full);
  1344. int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
  1345. const u64 *private_key, const u64 *public_key,
  1346. u64 *secret)
  1347. {
  1348. int ret = 0;
  1349. struct ecc_point *product, *pk;
  1350. u64 priv[ECC_MAX_DIGITS];
  1351. u64 rand_z[ECC_MAX_DIGITS];
  1352. unsigned int nbytes;
  1353. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1354. if (!private_key || !public_key || !curve ||
  1355. ndigits > ARRAY_SIZE(priv) || ndigits > ARRAY_SIZE(rand_z)) {
  1356. ret = -EINVAL;
  1357. goto out;
  1358. }
  1359. nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  1360. get_random_bytes(rand_z, nbytes);
  1361. pk = ecc_alloc_point(ndigits);
  1362. if (!pk) {
  1363. ret = -ENOMEM;
  1364. goto out;
  1365. }
  1366. ecc_swap_digits(public_key, pk->x, ndigits);
  1367. ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
  1368. ret = ecc_is_pubkey_valid_partial(curve, pk);
  1369. if (ret)
  1370. goto err_alloc_product;
  1371. ecc_swap_digits(private_key, priv, ndigits);
  1372. product = ecc_alloc_point(ndigits);
  1373. if (!product) {
  1374. ret = -ENOMEM;
  1375. goto err_alloc_product;
  1376. }
  1377. ecc_point_mult(product, pk, priv, rand_z, curve, ndigits);
  1378. if (ecc_point_is_zero(product)) {
  1379. ret = -EFAULT;
  1380. goto err_validity;
  1381. }
  1382. ecc_swap_digits(product->x, secret, ndigits);
  1383. err_validity:
  1384. memzero_explicit(priv, sizeof(priv));
  1385. memzero_explicit(rand_z, sizeof(rand_z));
  1386. ecc_free_point(product);
  1387. err_alloc_product:
  1388. ecc_free_point(pk);
  1389. out:
  1390. return ret;
  1391. }
  1392. EXPORT_SYMBOL(crypto_ecdh_shared_secret);
  1393. MODULE_LICENSE("Dual BSD/GPL");