LCOV - code coverage report
Current view: top level - os_stub/mbedtlslib/mbedtls/library - bignum_core.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 88.2 % 389 343
Test Date: 2025-11-23 08:10:21 Functions: 92.5 % 40 37

            Line data    Source code
       1              : /*
       2              :  *  Core bignum functions
       3              :  *
       4              :  *  Copyright The Mbed TLS Contributors
       5              :  *  SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
       6              :  */
       7              : 
       8              : #include "common.h"
       9              : 
      10              : #if defined(MBEDTLS_BIGNUM_C)
      11              : 
      12              : #include <string.h>
      13              : 
      14              : #include "mbedtls/error.h"
      15              : #include "mbedtls/platform_util.h"
      16              : #include "constant_time_internal.h"
      17              : 
      18              : #include "mbedtls/platform.h"
      19              : 
      20              : #include "bignum_core.h"
      21              : #include "bignum_core_invasive.h"
      22              : #include "bn_mul.h"
      23              : #include "constant_time_internal.h"
      24              : 
      25      8741448 : size_t mbedtls_mpi_core_clz(mbedtls_mpi_uint a)
      26              : {
      27              : #if defined(__has_builtin)
      28              : #if (MBEDTLS_MPI_UINT_MAX == UINT_MAX) && __has_builtin(__builtin_clz)
      29              :     #define core_clz __builtin_clz
      30              : #elif (MBEDTLS_MPI_UINT_MAX == ULONG_MAX) && __has_builtin(__builtin_clzl)
      31              :     #define core_clz __builtin_clzl
      32              : #elif (MBEDTLS_MPI_UINT_MAX == ULLONG_MAX) && __has_builtin(__builtin_clzll)
      33              :     #define core_clz __builtin_clzll
      34              : #endif
      35              : #endif
      36              : #if defined(core_clz)
      37      8741448 :     return (size_t) core_clz(a);
      38              : #else
      39              :     size_t j;
      40              :     mbedtls_mpi_uint mask = (mbedtls_mpi_uint) 1 << (biL - 1);
      41              : 
      42              :     for (j = 0; j < biL; j++) {
      43              :         if (a & mask) {
      44              :             break;
      45              :         }
      46              : 
      47              :         mask >>= 1;
      48              :     }
      49              : 
      50              :     return j;
      51              : #endif
      52              : }
      53              : 
      54      8741450 : size_t mbedtls_mpi_core_bitlen(const mbedtls_mpi_uint *A, size_t A_limbs)
      55              : {
      56              :     int i;
      57              :     size_t j;
      58              : 
      59     27612380 :     for (i = ((int) A_limbs) - 1; i >= 0; i--) {
      60     27612378 :         if (A[i] != 0) {
      61      8741448 :             j = biL - mbedtls_mpi_core_clz(A[i]);
      62      8741448 :             return (i * biL) + j;
      63              :         }
      64              :     }
      65              : 
      66            2 :     return 0;
      67              : }
      68              : 
      69       225510 : static mbedtls_mpi_uint mpi_bigendian_to_host(mbedtls_mpi_uint a)
      70              : {
      71              :     if (MBEDTLS_IS_BIG_ENDIAN) {
      72              :         /* Nothing to do on bigendian systems. */
      73              :         return a;
      74              :     } else {
      75              : #if defined(MBEDTLS_HAVE_INT32)
      76              :         return (mbedtls_mpi_uint) MBEDTLS_BSWAP32(a);
      77              : #elif defined(MBEDTLS_HAVE_INT64)
      78       225510 :         return (mbedtls_mpi_uint) MBEDTLS_BSWAP64(a);
      79              : #endif
      80              :     }
      81              : }
      82              : 
      83        34770 : void mbedtls_mpi_core_bigendian_to_host(mbedtls_mpi_uint *A,
      84              :                                         size_t A_limbs)
      85              : {
      86              :     mbedtls_mpi_uint *cur_limb_left;
      87              :     mbedtls_mpi_uint *cur_limb_right;
      88        34770 :     if (A_limbs == 0) {
      89            0 :         return;
      90              :     }
      91              : 
      92              :     /*
      93              :      * Traverse limbs and
      94              :      * - adapt byte-order in each limb
      95              :      * - swap the limbs themselves.
      96              :      * For that, simultaneously traverse the limbs from left to right
      97              :      * and from right to left, as long as the left index is not bigger
      98              :      * than the right index (it's not a problem if limbs is odd and the
      99              :      * indices coincide in the last iteration).
     100              :      */
     101        34770 :     for (cur_limb_left = A, cur_limb_right = A + (A_limbs - 1);
     102       147525 :          cur_limb_left <= cur_limb_right;
     103       112755 :          cur_limb_left++, cur_limb_right--) {
     104              :         mbedtls_mpi_uint tmp;
     105              :         /* Note that if cur_limb_left == cur_limb_right,
     106              :          * this code effectively swaps the bytes only once. */
     107       112755 :         tmp             = mpi_bigendian_to_host(*cur_limb_left);
     108       112755 :         *cur_limb_left  = mpi_bigendian_to_host(*cur_limb_right);
     109       112755 :         *cur_limb_right = tmp;
     110              :     }
     111              : }
     112              : 
     113              : /* Whether min <= A, in constant time.
     114              :  * A_limbs must be at least 1. */
     115          768 : mbedtls_ct_condition_t mbedtls_mpi_core_uint_le_mpi(mbedtls_mpi_uint min,
     116              :                                                     const mbedtls_mpi_uint *A,
     117              :                                                     size_t A_limbs)
     118              : {
     119              :     /* min <= least significant limb? */
     120          768 :     mbedtls_ct_condition_t min_le_lsl = mbedtls_ct_uint_ge(A[0], min);
     121              : 
     122              :     /* limbs other than the least significant one are all zero? */
     123          768 :     mbedtls_ct_condition_t msll_mask = MBEDTLS_CT_FALSE;
     124         4780 :     for (size_t i = 1; i < A_limbs; i++) {
     125         4012 :         msll_mask = mbedtls_ct_bool_or(msll_mask, mbedtls_ct_bool(A[i]));
     126              :     }
     127              : 
     128              :     /* min <= A iff the lowest limb of A is >= min or the other limbs
     129              :      * are not all zero. */
     130          768 :     return mbedtls_ct_bool_or(msll_mask, min_le_lsl);
     131              : }
     132              : 
     133      5137792 : mbedtls_ct_condition_t mbedtls_mpi_core_lt_ct(const mbedtls_mpi_uint *A,
     134              :                                               const mbedtls_mpi_uint *B,
     135              :                                               size_t limbs)
     136              : {
     137      5137792 :     mbedtls_ct_condition_t ret = MBEDTLS_CT_FALSE, cond = MBEDTLS_CT_FALSE, done = MBEDTLS_CT_FALSE;
     138              : 
     139     30625836 :     for (size_t i = limbs; i > 0; i--) {
     140              :         /*
     141              :          * If B[i - 1] < A[i - 1] then A < B is false and the result must
     142              :          * remain 0.
     143              :          *
     144              :          * Again even if we can make a decision, we just mark the result and
     145              :          * the fact that we are done and continue looping.
     146              :          */
     147     25488044 :         cond = mbedtls_ct_uint_lt(B[i - 1], A[i - 1]);
     148     25488044 :         done = mbedtls_ct_bool_or(done, cond);
     149              : 
     150              :         /*
     151              :          * If A[i - 1] < B[i - 1] then A < B is true.
     152              :          *
     153              :          * Again even if we can make a decision, we just mark the result and
     154              :          * the fact that we are done and continue looping.
     155              :          */
     156     25488044 :         cond = mbedtls_ct_uint_lt(A[i - 1], B[i - 1]);
     157     25488044 :         ret  = mbedtls_ct_bool_or(ret, mbedtls_ct_bool_and(cond, mbedtls_ct_bool_not(done)));
     158     25488044 :         done = mbedtls_ct_bool_or(done, cond);
     159              :     }
     160              : 
     161              :     /*
     162              :      * If all the limbs were equal, then the numbers are equal, A < B is false
     163              :      * and leaving the result 0 is correct.
     164              :      */
     165              : 
     166      5137792 :     return ret;
     167              : }
     168              : 
     169     36010701 : void mbedtls_mpi_core_cond_assign(mbedtls_mpi_uint *X,
     170              :                                   const mbedtls_mpi_uint *A,
     171              :                                   size_t limbs,
     172              :                                   mbedtls_ct_condition_t assign)
     173              : {
     174     36010701 :     if (X == A) {
     175            0 :         return;
     176              :     }
     177              : 
     178              :     /* This function is very performance-sensitive for RSA. For this reason
     179              :      * we have the loop below, instead of calling mbedtls_ct_memcpy_if
     180              :      * (this is more optimal since here we don't have to handle the case where
     181              :      * we copy awkwardly sized data).
     182              :      */
     183    213155329 :     for (size_t i = 0; i < limbs; i++) {
     184    177144628 :         X[i] = mbedtls_ct_mpi_uint_if(assign, A[i], X[i]);
     185              :     }
     186              : }
     187              : 
     188     10261632 : void mbedtls_mpi_core_cond_swap(mbedtls_mpi_uint *X,
     189              :                                 mbedtls_mpi_uint *Y,
     190              :                                 size_t limbs,
     191              :                                 mbedtls_ct_condition_t swap)
     192              : {
     193     10261632 :     if (X == Y) {
     194            0 :         return;
     195              :     }
     196              : 
     197     60830848 :     for (size_t i = 0; i < limbs; i++) {
     198     50569216 :         mbedtls_mpi_uint tmp = X[i];
     199     50569216 :         X[i] = mbedtls_ct_mpi_uint_if(swap, Y[i], X[i]);
     200     50569216 :         Y[i] = mbedtls_ct_mpi_uint_if(swap, tmp, Y[i]);
     201              :     }
     202              : }
     203              : 
     204            0 : int mbedtls_mpi_core_read_le(mbedtls_mpi_uint *X,
     205              :                              size_t X_limbs,
     206              :                              const unsigned char *input,
     207              :                              size_t input_length)
     208              : {
     209            0 :     const size_t limbs = CHARS_TO_LIMBS(input_length);
     210              : 
     211            0 :     if (X_limbs < limbs) {
     212            0 :         return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
     213              :     }
     214              : 
     215            0 :     if (X != NULL) {
     216            0 :         memset(X, 0, X_limbs * ciL);
     217              : 
     218            0 :         for (size_t i = 0; i < input_length; i++) {
     219            0 :             size_t offset = ((i % ciL) << 3);
     220            0 :             X[i / ciL] |= ((mbedtls_mpi_uint) input[i]) << offset;
     221              :         }
     222              :     }
     223              : 
     224            0 :     return 0;
     225              : }
     226              : 
     227        33926 : int mbedtls_mpi_core_read_be(mbedtls_mpi_uint *X,
     228              :                              size_t X_limbs,
     229              :                              const unsigned char *input,
     230              :                              size_t input_length)
     231              : {
     232        33926 :     const size_t limbs = CHARS_TO_LIMBS(input_length);
     233              : 
     234        33926 :     if (X_limbs < limbs) {
     235            0 :         return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
     236              :     }
     237              : 
     238              :     /* If X_limbs is 0, input_length must also be 0 (from previous test).
     239              :      * Nothing to do. */
     240        33926 :     if (X_limbs == 0) {
     241            0 :         return 0;
     242              :     }
     243              : 
     244        33926 :     memset(X, 0, X_limbs * ciL);
     245              : 
     246              :     /* memcpy() with (NULL, 0) is undefined behaviour */
     247        33926 :     if (input_length != 0) {
     248        33926 :         size_t overhead = (X_limbs * ciL) - input_length;
     249        33926 :         unsigned char *Xp = (unsigned char *) X;
     250        33926 :         memcpy(Xp + overhead, input, input_length);
     251              :     }
     252              : 
     253        33926 :     mbedtls_mpi_core_bigendian_to_host(X, X_limbs);
     254              : 
     255        33926 :     return 0;
     256              : }
     257              : 
     258            0 : int mbedtls_mpi_core_write_le(const mbedtls_mpi_uint *A,
     259              :                               size_t A_limbs,
     260              :                               unsigned char *output,
     261              :                               size_t output_length)
     262              : {
     263            0 :     size_t stored_bytes = A_limbs * ciL;
     264              :     size_t bytes_to_copy;
     265              : 
     266            0 :     if (stored_bytes < output_length) {
     267            0 :         bytes_to_copy = stored_bytes;
     268              :     } else {
     269            0 :         bytes_to_copy = output_length;
     270              : 
     271              :         /* The output buffer is smaller than the allocated size of A.
     272              :          * However A may fit if its leading bytes are zero. */
     273            0 :         for (size_t i = bytes_to_copy; i < stored_bytes; i++) {
     274            0 :             if (GET_BYTE(A, i) != 0) {
     275            0 :                 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
     276              :             }
     277              :         }
     278              :     }
     279              : 
     280            0 :     for (size_t i = 0; i < bytes_to_copy; i++) {
     281            0 :         output[i] = GET_BYTE(A, i);
     282              :     }
     283              : 
     284            0 :     if (stored_bytes < output_length) {
     285              :         /* Write trailing 0 bytes */
     286            0 :         memset(output + stored_bytes, 0, output_length - stored_bytes);
     287              :     }
     288              : 
     289            0 :     return 0;
     290              : }
     291              : 
     292         1118 : int mbedtls_mpi_core_write_be(const mbedtls_mpi_uint *X,
     293              :                               size_t X_limbs,
     294              :                               unsigned char *output,
     295              :                               size_t output_length)
     296              : {
     297              :     size_t stored_bytes;
     298              :     size_t bytes_to_copy;
     299              :     unsigned char *p;
     300              : 
     301         1118 :     stored_bytes = X_limbs * ciL;
     302              : 
     303         1118 :     if (stored_bytes < output_length) {
     304              :         /* There is enough space in the output buffer. Write initial
     305              :          * null bytes and record the position at which to start
     306              :          * writing the significant bytes. In this case, the execution
     307              :          * trace of this function does not depend on the value of the
     308              :          * number. */
     309            0 :         bytes_to_copy = stored_bytes;
     310            0 :         p = output + output_length - stored_bytes;
     311            0 :         memset(output, 0, output_length - stored_bytes);
     312              :     } else {
     313              :         /* The output buffer is smaller than the allocated size of X.
     314              :          * However X may fit if its leading bytes are zero. */
     315         1118 :         bytes_to_copy = output_length;
     316         1118 :         p = output;
     317        35993 :         for (size_t i = bytes_to_copy; i < stored_bytes; i++) {
     318        34875 :             if (GET_BYTE(X, i) != 0) {
     319            0 :                 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
     320              :             }
     321              :         }
     322              :     }
     323              : 
     324       188987 :     for (size_t i = 0; i < bytes_to_copy; i++) {
     325       187869 :         p[bytes_to_copy - i - 1] = GET_BYTE(X, i);
     326              :     }
     327              : 
     328         1118 :     return 0;
     329              : }
     330              : 
     331     10269957 : void mbedtls_mpi_core_shift_r(mbedtls_mpi_uint *X, size_t limbs,
     332              :                               size_t count)
     333              : {
     334              :     size_t i, v0, v1;
     335     10269957 :     mbedtls_mpi_uint r0 = 0, r1;
     336              : 
     337     10269957 :     v0 = count /  biL;
     338     10269957 :     v1 = count & (biL - 1);
     339              : 
     340     10269957 :     if (v0 > limbs || (v0 == limbs && v1 > 0)) {
     341            0 :         memset(X, 0, limbs * ciL);
     342            0 :         return;
     343              :     }
     344              : 
     345              :     /*
     346              :      * shift by count / limb_size
     347              :      */
     348     10269957 :     if (v0 > 0) {
     349        44668 :         for (i = 0; i < limbs - v0; i++) {
     350        40910 :             X[i] = X[i + v0];
     351              :         }
     352              : 
     353        42184 :         for (; i < limbs; i++) {
     354        38426 :             X[i] = 0;
     355              :         }
     356              :     }
     357              : 
     358              :     /*
     359              :      * shift by count % limb_size
     360              :      */
     361     10269957 :     if (v1 > 0) {
     362     60914389 :         for (i = limbs; i > 0; i--) {
     363     50648959 :             r1 = X[i - 1] << (biL - v1);
     364     50648959 :             X[i - 1] >>= v1;
     365     50648959 :             X[i - 1] |= r0;
     366     50648959 :             r0 = r1;
     367              :         }
     368              :     }
     369              : }
     370              : 
     371      2248440 : void mbedtls_mpi_core_shift_l(mbedtls_mpi_uint *X, size_t limbs,
     372              :                               size_t count)
     373              : {
     374              :     size_t i, v0, v1;
     375      2248440 :     mbedtls_mpi_uint r0 = 0, r1;
     376              : 
     377      2248440 :     v0 = count / (biL);
     378      2248440 :     v1 = count & (biL - 1);
     379              : 
     380              :     /*
     381              :      * shift by count / limb_size
     382              :      */
     383      2248440 :     if (v0 > 0) {
     384      2143195 :         for (i = limbs; i > v0; i--) {
     385      2104317 :             X[i - 1] = X[i - v0 - 1];
     386              :         }
     387              : 
     388       793087 :         for (; i > 0; i--) {
     389       754209 :             X[i - 1] = 0;
     390              :         }
     391              :     }
     392              : 
     393              :     /*
     394              :      * shift by count % limb_size
     395              :      */
     396      2248440 :     if (v1 > 0) {
     397     22110648 :         for (i = v0; i < limbs; i++) {
     398     19904845 :             r1 = X[i] >> (biL - v1);
     399     19904845 :             X[i] <<= v1;
     400     19904845 :             X[i] |= r0;
     401     19904845 :             r0 = r1;
     402              :         }
     403              :     }
     404      2248440 : }
     405              : 
     406       514083 : mbedtls_mpi_uint mbedtls_mpi_core_add(mbedtls_mpi_uint *X,
     407              :                                       const mbedtls_mpi_uint *A,
     408              :                                       const mbedtls_mpi_uint *B,
     409              :                                       size_t limbs)
     410              : {
     411       514083 :     mbedtls_mpi_uint c = 0;
     412              : 
     413      2559538 :     for (size_t i = 0; i < limbs; i++) {
     414      2045455 :         mbedtls_mpi_uint t = c + A[i];
     415      2045455 :         c = (t < A[i]);
     416      2045455 :         t += B[i];
     417      2045455 :         c += (t < B[i]);
     418      2045455 :         X[i] = t;
     419              :     }
     420              : 
     421       514083 :     return c;
     422              : }
     423              : 
     424     10249216 : mbedtls_mpi_uint mbedtls_mpi_core_add_if(mbedtls_mpi_uint *X,
     425              :                                          const mbedtls_mpi_uint *A,
     426              :                                          size_t limbs,
     427              :                                          unsigned cond)
     428              : {
     429     10249216 :     mbedtls_mpi_uint c = 0;
     430              : 
     431     10249216 :     mbedtls_ct_condition_t do_add = mbedtls_ct_bool(cond);
     432              : 
     433     60421120 :     for (size_t i = 0; i < limbs; i++) {
     434     50171904 :         mbedtls_mpi_uint add = mbedtls_ct_mpi_uint_if_else_0(do_add, A[i]);
     435     50171904 :         mbedtls_mpi_uint t = c + X[i];
     436     50171904 :         c = (t < X[i]);
     437     50171904 :         t += add;
     438     50171904 :         c += (t < add);
     439     50171904 :         X[i] = t;
     440              :     }
     441              : 
     442     10249216 :     return c;
     443              : }
     444              : 
     445     24493161 : mbedtls_mpi_uint mbedtls_mpi_core_sub(mbedtls_mpi_uint *X,
     446              :                                       const mbedtls_mpi_uint *A,
     447              :                                       const mbedtls_mpi_uint *B,
     448              :                                       size_t limbs)
     449              : {
     450     24493161 :     mbedtls_mpi_uint c = 0;
     451              : 
     452    136992118 :     for (size_t i = 0; i < limbs; i++) {
     453    112498957 :         mbedtls_mpi_uint z = (A[i] < c);
     454    112498957 :         mbedtls_mpi_uint t = A[i] - c;
     455    112498957 :         c = (t < B[i]) + z;
     456    112498957 :         X[i] = t - B[i];
     457              :     }
     458              : 
     459     24493161 :     return c;
     460              : }
     461              : 
     462     33842660 : mbedtls_mpi_uint mbedtls_mpi_core_mla(mbedtls_mpi_uint *d, size_t d_len,
     463              :                                       const mbedtls_mpi_uint *s, size_t s_len,
     464              :                                       mbedtls_mpi_uint b)
     465              : {
     466     33842660 :     mbedtls_mpi_uint c = 0; /* carry */
     467              :     /*
     468              :      * It is a documented precondition of this function that d_len >= s_len.
     469              :      * If that's not the case, we swap these round: this turns what would be
     470              :      * a buffer overflow into an incorrect result.
     471              :      */
     472     33842660 :     if (d_len < s_len) {
     473            0 :         s_len = d_len;
     474              :     }
     475     33842660 :     size_t excess_len = d_len - s_len;
     476     33842660 :     size_t steps_x8 = s_len / 8;
     477     33842660 :     size_t steps_x1 = s_len & 7;
     478              : 
     479     60923897 :     while (steps_x8--) {
     480     27081237 :         MULADDC_X8_INIT
     481              :         MULADDC_X8_CORE
     482              :             MULADDC_X8_STOP
     483              :     }
     484              : 
     485    139853764 :     while (steps_x1--) {
     486    106011104 :         MULADDC_X1_INIT
     487              :         MULADDC_X1_CORE
     488              :             MULADDC_X1_STOP
     489              :     }
     490              : 
     491     83320082 :     while (excess_len--) {
     492     49477422 :         *d += c;
     493     49477422 :         c = (*d < c);
     494     49477422 :         d++;
     495              :     }
     496              : 
     497     33842660 :     return c;
     498              : }
     499              : 
     500      6440638 : void mbedtls_mpi_core_mul(mbedtls_mpi_uint *X,
     501              :                           const mbedtls_mpi_uint *A, size_t A_limbs,
     502              :                           const mbedtls_mpi_uint *B, size_t B_limbs)
     503              : {
     504      6440638 :     memset(X, 0, (A_limbs + B_limbs) * ciL);
     505              : 
     506     32141213 :     for (size_t i = 0; i < B_limbs; i++) {
     507     25700575 :         (void) mbedtls_mpi_core_mla(X + i, A_limbs + 1, A, A_limbs, B[i]);
     508              :     }
     509      6440638 : }
     510              : 
     511              : /*
     512              :  * Fast Montgomery initialization (thanks to Tom St Denis).
     513              :  */
     514         1044 : mbedtls_mpi_uint mbedtls_mpi_core_montmul_init(const mbedtls_mpi_uint *N)
     515              : {
     516         1044 :     mbedtls_mpi_uint x = N[0];
     517              : 
     518         1044 :     x += ((N[0] + 2) & 4) << 1;
     519              : 
     520         5220 :     for (unsigned int i = biL; i >= 8; i /= 2) {
     521         4176 :         x *= (2 - (N[0] * x));
     522              :     }
     523              : 
     524         1044 :     return ~x + 1;
     525              : }
     526              : 
     527       169823 : void mbedtls_mpi_core_montmul(mbedtls_mpi_uint *X,
     528              :                               const mbedtls_mpi_uint *A,
     529              :                               const mbedtls_mpi_uint *B,
     530              :                               size_t B_limbs,
     531              :                               const mbedtls_mpi_uint *N,
     532              :                               size_t AN_limbs,
     533              :                               mbedtls_mpi_uint mm,
     534              :                               mbedtls_mpi_uint *T)
     535              : {
     536       169823 :     memset(T, 0, (2 * AN_limbs + 1) * ciL);
     537              : 
     538      3940088 :     for (size_t i = 0; i < AN_limbs; i++) {
     539              :         /* T = (T + u0*B + u1*N) / 2^biL */
     540      3770265 :         mbedtls_mpi_uint u0 = A[i];
     541      3770265 :         mbedtls_mpi_uint u1 = (T[0] + u0 * B[0]) * mm;
     542              : 
     543      3770265 :         (void) mbedtls_mpi_core_mla(T, AN_limbs + 2, B, B_limbs, u0);
     544      3770265 :         (void) mbedtls_mpi_core_mla(T, AN_limbs + 2, N, AN_limbs, u1);
     545              : 
     546      3770265 :         T++;
     547              :     }
     548              : 
     549              :     /*
     550              :      * The result we want is (T >= N) ? T - N : T.
     551              :      *
     552              :      * For better constant-time properties in this function, we always do the
     553              :      * subtraction, with the result in X.
     554              :      *
     555              :      * We also look to see if there was any carry in the final additions in the
     556              :      * loop above.
     557              :      */
     558              : 
     559       169823 :     mbedtls_mpi_uint carry  = T[AN_limbs];
     560       169823 :     mbedtls_mpi_uint borrow = mbedtls_mpi_core_sub(X, T, N, AN_limbs);
     561              : 
     562              :     /*
     563              :      * Using R as the Montgomery radix (auxiliary modulus) i.e. 2^(biL*AN_limbs):
     564              :      *
     565              :      * T can be in one of 3 ranges:
     566              :      *
     567              :      * 1) T < N      : (carry, borrow) = (0, 1): we want T
     568              :      * 2) N <= T < R : (carry, borrow) = (0, 0): we want X
     569              :      * 3) T >= R     : (carry, borrow) = (1, 1): we want X
     570              :      *
     571              :      * and (carry, borrow) = (1, 0) can't happen.
     572              :      *
     573              :      * So the correct return value is already in X if (carry ^ borrow) = 0,
     574              :      * but is in (the lower AN_limbs limbs of) T if (carry ^ borrow) = 1.
     575              :      */
     576       169823 :     mbedtls_ct_memcpy_if(mbedtls_ct_bool(carry ^ borrow),
     577              :                          (unsigned char *) X,
     578              :                          (unsigned char *) T,
     579              :                          NULL,
     580              :                          AN_limbs * sizeof(mbedtls_mpi_uint));
     581       169823 : }
     582              : 
     583          452 : int mbedtls_mpi_core_get_mont_r2_unsafe(mbedtls_mpi *X,
     584              :                                         const mbedtls_mpi *N)
     585              : {
     586          452 :     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     587              : 
     588          452 :     MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 1));
     589          452 :     MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(X, N->n * 2 * biL));
     590          452 :     MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(X, X, N));
     591          452 :     MBEDTLS_MPI_CHK(mbedtls_mpi_shrink(X, N->n));
     592              : 
     593          452 : cleanup:
     594          452 :     return ret;
     595              : }
     596              : 
     597              : MBEDTLS_STATIC_TESTABLE
     598        41456 : void mbedtls_mpi_core_ct_uint_table_lookup(mbedtls_mpi_uint *dest,
     599              :                                            const mbedtls_mpi_uint *table,
     600              :                                            size_t limbs,
     601              :                                            size_t count,
     602              :                                            size_t index)
     603              : {
     604       343920 :     for (size_t i = 0; i < count; i++, table += limbs) {
     605       302464 :         mbedtls_ct_condition_t assign = mbedtls_ct_uint_eq(i, index);
     606       302464 :         mbedtls_mpi_core_cond_assign(dest, table, limbs, assign);
     607              :     }
     608        41456 : }
     609              : 
     610              : /* Fill X with n_bytes random bytes.
     611              :  * X must already have room for those bytes.
     612              :  * The ordering of the bytes returned from the RNG is suitable for
     613              :  * deterministic ECDSA (see RFC 6979 §3.3 and the specification of
     614              :  * mbedtls_mpi_core_random()).
     615              :  */
     616          844 : int mbedtls_mpi_core_fill_random(
     617              :     mbedtls_mpi_uint *X, size_t X_limbs,
     618              :     size_t n_bytes,
     619              :     int (*f_rng)(void *, unsigned char *, size_t), void *p_rng)
     620              : {
     621          844 :     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     622          844 :     const size_t limbs = CHARS_TO_LIMBS(n_bytes);
     623          844 :     const size_t overhead = (limbs * ciL) - n_bytes;
     624              : 
     625          844 :     if (X_limbs < limbs) {
     626            0 :         return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
     627              :     }
     628              : 
     629          844 :     memset(X, 0, overhead);
     630          844 :     memset((unsigned char *) X + limbs * ciL, 0, (X_limbs - limbs) * ciL);
     631          844 :     MBEDTLS_MPI_CHK(f_rng(p_rng, (unsigned char *) X + overhead, n_bytes));
     632          844 :     mbedtls_mpi_core_bigendian_to_host(X, limbs);
     633              : 
     634          844 : cleanup:
     635          844 :     return ret;
     636              : }
     637              : 
     638          747 : int mbedtls_mpi_core_random(mbedtls_mpi_uint *X,
     639              :                             mbedtls_mpi_uint min,
     640              :                             const mbedtls_mpi_uint *N,
     641              :                             size_t limbs,
     642              :                             int (*f_rng)(void *, unsigned char *, size_t),
     643              :                             void *p_rng)
     644              : {
     645          747 :     mbedtls_ct_condition_t ge_lower = MBEDTLS_CT_TRUE, lt_upper = MBEDTLS_CT_FALSE;
     646          747 :     size_t n_bits = mbedtls_mpi_core_bitlen(N, limbs);
     647          747 :     size_t n_bytes = (n_bits + 7) / 8;
     648          747 :     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     649              : 
     650              :     /*
     651              :      * When min == 0, each try has at worst a probability 1/2 of failing
     652              :      * (the msb has a probability 1/2 of being 0, and then the result will
     653              :      * be < N), so after 30 tries failure probability is a most 2**(-30).
     654              :      *
     655              :      * When N is just below a power of 2, as is the case when generating
     656              :      * a random scalar on most elliptic curves, 1 try is enough with
     657              :      * overwhelming probability. When N is just above a power of 2,
     658              :      * as when generating a random scalar on secp224k1, each try has
     659              :      * a probability of failing that is almost 1/2.
     660              :      *
     661              :      * The probabilities are almost the same if min is nonzero but negligible
     662              :      * compared to N. This is always the case when N is crypto-sized, but
     663              :      * it's convenient to support small N for testing purposes. When N
     664              :      * is small, use a higher repeat count, otherwise the probability of
     665              :      * failure is macroscopic.
     666              :      */
     667          747 :     int count = (n_bytes > 4 ? 30 : 250);
     668              : 
     669              :     /*
     670              :      * Match the procedure given in RFC 6979 §3.3 (deterministic ECDSA)
     671              :      * when f_rng is a suitably parametrized instance of HMAC_DRBG:
     672              :      * - use the same byte ordering;
     673              :      * - keep the leftmost n_bits bits of the generated octet string;
     674              :      * - try until result is in the desired range.
     675              :      * This also avoids any bias, which is especially important for ECDSA.
     676              :      */
     677              :     do {
     678          768 :         MBEDTLS_MPI_CHK(mbedtls_mpi_core_fill_random(X, limbs,
     679              :                                                      n_bytes,
     680              :                                                      f_rng, p_rng));
     681          768 :         mbedtls_mpi_core_shift_r(X, limbs, 8 * n_bytes - n_bits);
     682              : 
     683          768 :         if (--count == 0) {
     684            0 :             ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
     685            0 :             goto cleanup;
     686              :         }
     687              : 
     688          768 :         ge_lower = mbedtls_mpi_core_uint_le_mpi(min, X, limbs);
     689          768 :         lt_upper = mbedtls_mpi_core_lt_ct(X, N, limbs);
     690          768 :     } while (mbedtls_ct_bool_and(ge_lower, lt_upper) == MBEDTLS_CT_FALSE);
     691              : 
     692          747 : cleanup:
     693          747 :     return ret;
     694              : }
     695              : 
     696         1006 : static size_t exp_mod_get_window_size(size_t Ebits)
     697              : {
     698              : #if MBEDTLS_MPI_WINDOW_SIZE >= 6
     699              :     return (Ebits > 671) ? 6 : (Ebits > 239) ? 5 : (Ebits >  79) ? 4 : 1;
     700              : #elif MBEDTLS_MPI_WINDOW_SIZE == 5
     701              :     return (Ebits > 239) ? 5 : (Ebits >  79) ? 4 : 1;
     702              : #elif MBEDTLS_MPI_WINDOW_SIZE > 1
     703         1006 :     return (Ebits >  79) ? MBEDTLS_MPI_WINDOW_SIZE : 1;
     704              : #else
     705              :     (void) Ebits;
     706              :     return 1;
     707              : #endif
     708              : }
     709              : 
     710          503 : size_t mbedtls_mpi_core_exp_mod_working_limbs(size_t AN_limbs, size_t E_limbs)
     711              : {
     712          503 :     const size_t wsize = exp_mod_get_window_size(E_limbs * biL);
     713          503 :     const size_t welem = ((size_t) 1) << wsize;
     714              : 
     715              :     /* How big does each part of the working memory pool need to be? */
     716          503 :     const size_t table_limbs   = welem * AN_limbs;
     717          503 :     const size_t select_limbs  = AN_limbs;
     718          503 :     const size_t temp_limbs    = 2 * AN_limbs + 1;
     719              : 
     720          503 :     return table_limbs + select_limbs + temp_limbs;
     721              : }
     722              : 
     723          503 : static void exp_mod_precompute_window(const mbedtls_mpi_uint *A,
     724              :                                       const mbedtls_mpi_uint *N,
     725              :                                       size_t AN_limbs,
     726              :                                       mbedtls_mpi_uint mm,
     727              :                                       const mbedtls_mpi_uint *RR,
     728              :                                       size_t welem,
     729              :                                       mbedtls_mpi_uint *Wtable,
     730              :                                       mbedtls_mpi_uint *temp)
     731              : {
     732              :     /* W[0] = 1 (in Montgomery presentation) */
     733          503 :     memset(Wtable, 0, AN_limbs * ciL);
     734          503 :     Wtable[0] = 1;
     735          503 :     mbedtls_mpi_core_montmul(Wtable, Wtable, RR, AN_limbs, N, AN_limbs, mm, temp);
     736              : 
     737              :     /* W[1] = A (already in Montgomery presentation) */
     738          503 :     mbedtls_mpi_uint *W1 = Wtable + AN_limbs;
     739          503 :     memcpy(W1, A, AN_limbs * ciL);
     740              : 
     741              :     /* W[i+1] = W[i] * W[1], i >= 2 */
     742          503 :     mbedtls_mpi_uint *Wprev = W1;
     743          995 :     for (size_t i = 2; i < welem; i++) {
     744          492 :         mbedtls_mpi_uint *Wcur = Wprev + AN_limbs;
     745          492 :         mbedtls_mpi_core_montmul(Wcur, Wprev, W1, AN_limbs, N, AN_limbs, mm, temp);
     746          492 :         Wprev = Wcur;
     747              :     }
     748          503 : }
     749              : 
     750              : #if defined(MBEDTLS_TEST_HOOKS) && !defined(MBEDTLS_THREADING_C)
     751              : void (*mbedtls_safe_codepath_hook)(void) = NULL;
     752              : void (*mbedtls_unsafe_codepath_hook)(void) = NULL;
     753              : #endif
     754              : 
     755              : /*
     756              :  * This function calculates the indices of the exponent where the exponentiation algorithm should
     757              :  * start processing.
     758              :  *
     759              :  * Warning! If the parameter E_public has MBEDTLS_MPI_IS_PUBLIC as its value,
     760              :  * this function is not constant time with respect to the exponent (parameter E).
     761              :  */
     762          503 : static inline void exp_mod_calc_first_bit_optionally_safe(const mbedtls_mpi_uint *E,
     763              :                                                           size_t E_limbs,
     764              :                                                           int E_public,
     765              :                                                           size_t *E_limb_index,
     766              :                                                           size_t *E_bit_index)
     767              : {
     768          503 :     if (E_public == MBEDTLS_MPI_IS_PUBLIC) {
     769              :         /*
     770              :          * Skip leading zero bits.
     771              :          */
     772          345 :         size_t E_bits = mbedtls_mpi_core_bitlen(E, E_limbs);
     773          345 :         if (E_bits == 0) {
     774              :             /*
     775              :              * If E is 0 mbedtls_mpi_core_bitlen() returns 0. Even if that is the case, we will want
     776              :              * to represent it as a single 0 bit and as such the bitlength will be 1.
     777              :              */
     778            0 :             E_bits = 1;
     779              :         }
     780              : 
     781          345 :         *E_limb_index = E_bits / biL;
     782          345 :         *E_bit_index = E_bits % biL;
     783              : 
     784              : #if defined(MBEDTLS_TEST_HOOKS) && !defined(MBEDTLS_THREADING_C)
     785              :         if (mbedtls_unsafe_codepath_hook != NULL) {
     786              :             mbedtls_unsafe_codepath_hook();
     787              :         }
     788              : #endif
     789              :     } else {
     790              :         /*
     791              :          * Here we need to be constant time with respect to E and can't do anything better than
     792              :          * start at the first allocated bit.
     793              :          */
     794          158 :         *E_limb_index = E_limbs;
     795          158 :         *E_bit_index = 0;
     796              : #if defined(MBEDTLS_TEST_HOOKS) && !defined(MBEDTLS_THREADING_C)
     797              :         if (mbedtls_safe_codepath_hook != NULL) {
     798              :             mbedtls_safe_codepath_hook();
     799              :         }
     800              : #endif
     801              :     }
     802          503 : }
     803              : 
     804              : /*
     805              :  * Warning! If the parameter window_public has MBEDTLS_MPI_IS_PUBLIC as its value, this function is
     806              :  * not constant time with respect to the window parameter and consequently the exponent of the
     807              :  * exponentiation (parameter E of mbedtls_mpi_core_exp_mod_optionally_safe).
     808              :  */
     809        47321 : static inline void exp_mod_table_lookup_optionally_safe(mbedtls_mpi_uint *Wselect,
     810              :                                                         mbedtls_mpi_uint *Wtable,
     811              :                                                         size_t AN_limbs, size_t welem,
     812              :                                                         mbedtls_mpi_uint window,
     813              :                                                         int window_public)
     814              : {
     815        47321 :     if (window_public == MBEDTLS_MPI_IS_PUBLIC) {
     816         5865 :         memcpy(Wselect, Wtable + window * AN_limbs, AN_limbs * ciL);
     817              : #if defined(MBEDTLS_TEST_HOOKS) && !defined(MBEDTLS_THREADING_C)
     818              :         if (mbedtls_unsafe_codepath_hook != NULL) {
     819              :             mbedtls_unsafe_codepath_hook();
     820              :         }
     821              : #endif
     822              :     } else {
     823              :         /* Select Wtable[window] without leaking window through
     824              :          * memory access patterns. */
     825        41456 :         mbedtls_mpi_core_ct_uint_table_lookup(Wselect, Wtable,
     826              :                                               AN_limbs, welem, window);
     827              : #if defined(MBEDTLS_TEST_HOOKS) && !defined(MBEDTLS_THREADING_C)
     828              :         if (mbedtls_safe_codepath_hook != NULL) {
     829              :             mbedtls_safe_codepath_hook();
     830              :         }
     831              : #endif
     832              :     }
     833        47321 : }
     834              : 
     835              : /* Exponentiation: X := A^E mod N.
     836              :  *
     837              :  * Warning! If the parameter E_public has MBEDTLS_MPI_IS_PUBLIC as its value,
     838              :  * this function is not constant time with respect to the exponent (parameter E).
     839              :  *
     840              :  * A must already be in Montgomery form.
     841              :  *
     842              :  * As in other bignum functions, assume that AN_limbs and E_limbs are nonzero.
     843              :  *
     844              :  * RR must contain 2^{2*biL} mod N.
     845              :  *
     846              :  * The algorithm is a variant of Left-to-right k-ary exponentiation: HAC 14.82
     847              :  * (The difference is that the body in our loop processes a single bit instead
     848              :  * of a full window.)
     849              :  */
     850          503 : static void mbedtls_mpi_core_exp_mod_optionally_safe(mbedtls_mpi_uint *X,
     851              :                                                      const mbedtls_mpi_uint *A,
     852              :                                                      const mbedtls_mpi_uint *N,
     853              :                                                      size_t AN_limbs,
     854              :                                                      const mbedtls_mpi_uint *E,
     855              :                                                      size_t E_limbs,
     856              :                                                      int E_public,
     857              :                                                      const mbedtls_mpi_uint *RR,
     858              :                                                      mbedtls_mpi_uint *T)
     859              : {
     860              :     /* We'll process the bits of E from most significant
     861              :      * (limb_index=E_limbs-1, E_bit_index=biL-1) to least significant
     862              :      * (limb_index=0, E_bit_index=0). */
     863          503 :     size_t E_limb_index = E_limbs;
     864          503 :     size_t E_bit_index = 0;
     865          503 :     exp_mod_calc_first_bit_optionally_safe(E, E_limbs, E_public,
     866              :                                            &E_limb_index, &E_bit_index);
     867              : 
     868          503 :     const size_t wsize = exp_mod_get_window_size(E_limb_index * biL);
     869          503 :     const size_t welem = ((size_t) 1) << wsize;
     870              : 
     871              :     /* This is how we will use the temporary storage T, which must have space
     872              :      * for table_limbs, select_limbs and (2 * AN_limbs + 1) for montmul. */
     873          503 :     const size_t table_limbs  = welem * AN_limbs;
     874          503 :     const size_t select_limbs = AN_limbs;
     875              : 
     876              :     /* Pointers to specific parts of the temporary working memory pool */
     877          503 :     mbedtls_mpi_uint *const Wtable  = T;
     878          503 :     mbedtls_mpi_uint *const Wselect = Wtable  +  table_limbs;
     879          503 :     mbedtls_mpi_uint *const temp    = Wselect + select_limbs;
     880              : 
     881              :     /*
     882              :      * Window precomputation
     883              :      */
     884              : 
     885          503 :     const mbedtls_mpi_uint mm = mbedtls_mpi_core_montmul_init(N);
     886              : 
     887              :     /* Set Wtable[i] = A^i (in Montgomery representation) */
     888          503 :     exp_mod_precompute_window(A, N, AN_limbs,
     889              :                               mm, RR,
     890              :                               welem, Wtable, temp);
     891              : 
     892              :     /*
     893              :      * Fixed window exponentiation
     894              :      */
     895              : 
     896              :     /* X = 1 (in Montgomery presentation) initially */
     897          503 :     memcpy(X, Wtable, AN_limbs * ciL);
     898              : 
     899              :     /* At any given time, window contains window_bits bits from E.
     900              :      * window_bits can go up to wsize. */
     901          503 :     size_t window_bits = 0;
     902          503 :     mbedtls_mpi_uint window = 0;
     903              : 
     904              :     do {
     905              :         /* Square */
     906       120425 :         mbedtls_mpi_core_montmul(X, X, X, AN_limbs, N, AN_limbs, mm, temp);
     907              : 
     908              :         /* Move to the next bit of the exponent */
     909       120425 :         if (E_bit_index == 0) {
     910         1790 :             --E_limb_index;
     911         1790 :             E_bit_index = biL - 1;
     912              :         } else {
     913       118635 :             --E_bit_index;
     914              :         }
     915              :         /* Insert next exponent bit into window */
     916       120425 :         ++window_bits;
     917       120425 :         window <<= 1;
     918       120425 :         window |= (E[E_limb_index] >> E_bit_index) & 1;
     919              : 
     920              :         /* Clear window if it's full. Also clear the window at the end,
     921              :          * when we've finished processing the exponent. */
     922       120425 :         if (window_bits == wsize ||
     923        73184 :             (E_bit_index == 0 && E_limb_index == 0)) {
     924              : 
     925        47321 :             exp_mod_table_lookup_optionally_safe(Wselect, Wtable, AN_limbs, welem,
     926              :                                                  window, E_public);
     927              :             /* Multiply X by the selected element. */
     928        47321 :             mbedtls_mpi_core_montmul(X, X, Wselect, AN_limbs, N, AN_limbs, mm,
     929              :                                      temp);
     930        47321 :             window = 0;
     931        47321 :             window_bits = 0;
     932              :         }
     933       120425 :     } while (!(E_bit_index == 0 && E_limb_index == 0));
     934          503 : }
     935              : 
     936          158 : void mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X,
     937              :                               const mbedtls_mpi_uint *A,
     938              :                               const mbedtls_mpi_uint *N, size_t AN_limbs,
     939              :                               const mbedtls_mpi_uint *E, size_t E_limbs,
     940              :                               const mbedtls_mpi_uint *RR,
     941              :                               mbedtls_mpi_uint *T)
     942              : {
     943          158 :     mbedtls_mpi_core_exp_mod_optionally_safe(X,
     944              :                                              A,
     945              :                                              N,
     946              :                                              AN_limbs,
     947              :                                              E,
     948              :                                              E_limbs,
     949              :                                              MBEDTLS_MPI_IS_SECRET,
     950              :                                              RR,
     951              :                                              T);
     952          158 : }
     953              : 
     954          345 : void mbedtls_mpi_core_exp_mod_unsafe(mbedtls_mpi_uint *X,
     955              :                                      const mbedtls_mpi_uint *A,
     956              :                                      const mbedtls_mpi_uint *N, size_t AN_limbs,
     957              :                                      const mbedtls_mpi_uint *E, size_t E_limbs,
     958              :                                      const mbedtls_mpi_uint *RR,
     959              :                                      mbedtls_mpi_uint *T)
     960              : {
     961          345 :     mbedtls_mpi_core_exp_mod_optionally_safe(X,
     962              :                                              A,
     963              :                                              N,
     964              :                                              AN_limbs,
     965              :                                              E,
     966              :                                              E_limbs,
     967              :                                              MBEDTLS_MPI_IS_PUBLIC,
     968              :                                              RR,
     969              :                                              T);
     970          345 : }
     971              : 
     972      4485961 : mbedtls_mpi_uint mbedtls_mpi_core_sub_int(mbedtls_mpi_uint *X,
     973              :                                           const mbedtls_mpi_uint *A,
     974              :                                           mbedtls_mpi_uint c,  /* doubles as carry */
     975              :                                           size_t limbs)
     976              : {
     977     25675484 :     for (size_t i = 0; i < limbs; i++) {
     978     21189523 :         mbedtls_mpi_uint s = A[i];
     979     21189523 :         mbedtls_mpi_uint t = s - c;
     980     21189523 :         c = (t > s);
     981     21189523 :         X[i] = t;
     982              :     }
     983              : 
     984      4485961 :     return c;
     985              : }
     986              : 
     987            0 : mbedtls_ct_condition_t mbedtls_mpi_core_check_zero_ct(const mbedtls_mpi_uint *A,
     988              :                                                       size_t limbs)
     989              : {
     990            0 :     volatile const mbedtls_mpi_uint *force_read_A = A;
     991            0 :     mbedtls_mpi_uint bits = 0;
     992              : 
     993            0 :     for (size_t i = 0; i < limbs; i++) {
     994            0 :         bits |= force_read_A[i];
     995              :     }
     996              : 
     997            0 :     return mbedtls_ct_bool(bits);
     998              : }
     999              : 
    1000          541 : void mbedtls_mpi_core_to_mont_rep(mbedtls_mpi_uint *X,
    1001              :                                   const mbedtls_mpi_uint *A,
    1002              :                                   const mbedtls_mpi_uint *N,
    1003              :                                   size_t AN_limbs,
    1004              :                                   mbedtls_mpi_uint mm,
    1005              :                                   const mbedtls_mpi_uint *rr,
    1006              :                                   mbedtls_mpi_uint *T)
    1007              : {
    1008          541 :     mbedtls_mpi_core_montmul(X, A, rr, AN_limbs, N, AN_limbs, mm, T);
    1009          541 : }
    1010              : 
    1011          503 : void mbedtls_mpi_core_from_mont_rep(mbedtls_mpi_uint *X,
    1012              :                                     const mbedtls_mpi_uint *A,
    1013              :                                     const mbedtls_mpi_uint *N,
    1014              :                                     size_t AN_limbs,
    1015              :                                     mbedtls_mpi_uint mm,
    1016              :                                     mbedtls_mpi_uint *T)
    1017              : {
    1018          503 :     const mbedtls_mpi_uint Rinv = 1;    /* 1/R in Mont. rep => 1 */
    1019              : 
    1020          503 :     mbedtls_mpi_core_montmul(X, A, &Rinv, 1, N, AN_limbs, mm, T);
    1021          503 : }
    1022              : 
    1023              : /*
    1024              :  * Compute X = A - B mod N.
    1025              :  * Both A and B must be in [0, N) and so will the output.
    1026              :  */
    1027      5124608 : static void mpi_core_sub_mod(mbedtls_mpi_uint *X,
    1028              :                              const mbedtls_mpi_uint *A,
    1029              :                              const mbedtls_mpi_uint *B,
    1030              :                              const mbedtls_mpi_uint *N,
    1031              :                              size_t limbs)
    1032              : {
    1033      5124608 :     mbedtls_mpi_uint c = mbedtls_mpi_core_sub(X, A, B, limbs);
    1034      5124608 :     (void) mbedtls_mpi_core_add_if(X, N, limbs, (unsigned) c);
    1035      5124608 : }
    1036              : 
    1037              : /*
    1038              :  * Divide X by 2 mod N in place, assuming N is odd.
    1039              :  * The input must be in [0, N) and so will the output.
    1040              :  */
    1041              : MBEDTLS_STATIC_TESTABLE
    1042      5124608 : void mbedtls_mpi_core_div2_mod_odd(mbedtls_mpi_uint *X,
    1043              :                                    const mbedtls_mpi_uint *N,
    1044              :                                    size_t limbs)
    1045              : {
    1046              :     /* If X is odd, add N to make it even before shifting. */
    1047      5124608 :     unsigned odd = (unsigned) X[0] & 1;
    1048      5124608 :     mbedtls_mpi_uint c = mbedtls_mpi_core_add_if(X, N, limbs, odd);
    1049      5124608 :     mbedtls_mpi_core_shift_r(X, limbs, 1);
    1050      5124608 :     X[limbs - 1] |= c << (biL - 1);
    1051      5124608 : }
    1052              : 
    1053              : /*
    1054              :  * Constant-time GCD and modular inversion - odd modulus.
    1055              :  *
    1056              :  * Pre-conditions: see public documentation.
    1057              :  *
    1058              :  * See https://www.jstage.jst.go.jp/article/transinf/E106.D/9/E106.D_2022ICP0009/_pdf
    1059              :  *
    1060              :  * The paper gives two computationally equivalent algorithms: Alg 7 (readable)
    1061              :  * and Alg 8 (constant-time). We use a third version that's hopefully both:
    1062              :  *
    1063              :  *  u, v = A, N  # N is called p in the paper but doesn't have to be prime
    1064              :  *  q, r = 0, 1
    1065              :  *  repeat bits(A_limbs + N_limbs) times:
    1066              :  *      d = v - u  # t1 in Alg 7
    1067              :  *      t1 = (u and v both odd) ? u : d  # t1 in Alg 8
    1068              :  *      t2 = (u and v both odd) ? d : (u odd) ? v : u  # t2 in Alg 8
    1069              :  *      t2 >>= 1
    1070              :  *      swap = t1 > t2  # similar to s, z in Alg 8
    1071              :  *      u, v = (swap) ? t2, t1 : t1, t2
    1072              :  *
    1073              :  *      d = r - q mod N  # t2 in Alg 7
    1074              :  *      t1 = (u and v both odd) ? q : d  # t3 in Alg 8
    1075              :  *      t2 = (u and v both odd) ? d : (u odd) ? r : q  # t4 Alg 8
    1076              :  *      t2 /= 2 mod N  # see below (pre_com)
    1077              :  *      q, r = (swap) ? t2, t1 : t1, t2
    1078              :  *  return v, q  # v: GCD, see Alg 6; q: no mult by pre_com, see below
    1079              :  *
    1080              :  * The ternary operators in the above pseudo-code need to be realised in a
    1081              :  * constant-time fashion. We use conditional assign for t1, t2 and conditional
    1082              :  * swap for the final update. (Note: the similarity between branches of Alg 7
    1083              :  * are highlighted in tables 2 and 3 and the surrounding text.)
    1084              :  *
    1085              :  * Also, we re-order operations, grouping things related to the inverse, which
    1086              :  * facilitates making its computation optional, and requires fewer temporaries.
    1087              :  *
    1088              :  * The only actual change from the paper is dropping the trick with pre_com,
    1089              :  * which I think complicates things for no benefit.
    1090              :  * See the comment on the big I != NULL block below for details.
    1091              :  */
    1092         9733 : void mbedtls_mpi_core_gcd_modinv_odd(mbedtls_mpi_uint *G,
    1093              :                                      mbedtls_mpi_uint *I,
    1094              :                                      const mbedtls_mpi_uint *A,
    1095              :                                      size_t A_limbs,
    1096              :                                      const mbedtls_mpi_uint *N,
    1097              :                                      size_t N_limbs,
    1098              :                                      mbedtls_mpi_uint *T)
    1099              : {
    1100              :     /* GCD and modinv, names common to Alg 7 and Alg 8 */
    1101         9733 :     mbedtls_mpi_uint *u = T + 0 * N_limbs;
    1102         9733 :     mbedtls_mpi_uint *v = G;
    1103              : 
    1104              :     /* GCD and modinv, my name (t1, t2 from Alg 7) */
    1105         9733 :     mbedtls_mpi_uint *d = T + 1 * N_limbs;
    1106              : 
    1107              :     /* GCD and modinv, names from Alg 8 (note: t1, t2 from Alg 7 are d above) */
    1108         9733 :     mbedtls_mpi_uint *t1 = T + 2 * N_limbs;
    1109         9733 :     mbedtls_mpi_uint *t2 = T + 3 * N_limbs;
    1110              : 
    1111              :     /* modinv only, names common to Alg 7 and Alg 8 */
    1112         9733 :     mbedtls_mpi_uint *q = I;
    1113         9733 :     mbedtls_mpi_uint *r = I != NULL ? T + 4 * N_limbs : NULL;
    1114              : 
    1115              :     /*
    1116              :      * Initial values:
    1117              :      * u, v = A, N
    1118              :      * q, r = 0, 1
    1119              :      *
    1120              :      * We only write to G (aka v) after reading from inputs (A and N), which
    1121              :      * allows aliasing, except with N when I != NULL, as then we'll be operating
    1122              :      * mod N on q and r later - see the public documentation.
    1123              :      */
    1124         9733 :     if (A_limbs > N_limbs) {
    1125              :         /* Violating this precondition should not result in memory errors. */
    1126            0 :         A_limbs = N_limbs;
    1127              :     }
    1128         9733 :     memcpy(u, A, A_limbs * ciL);
    1129         9733 :     memset((char *) u + A_limbs * ciL, 0, (N_limbs - A_limbs) * ciL);
    1130              : 
    1131              :     /* Avoid possible UB with memcpy when src == dst. */
    1132         9733 :     if (v != N) {
    1133         9733 :         memcpy(v, N, N_limbs * ciL);
    1134              :     }
    1135              : 
    1136         9733 :     if (I != NULL) {
    1137         9729 :         memset(q, 0, N_limbs * ciL);
    1138              : 
    1139         9729 :         memset(r, 0, N_limbs * ciL);
    1140         9729 :         r[0] = 1;
    1141              :     }
    1142              : 
    1143              :     /*
    1144              :      * At each step, out of u, v, v - u we keep one, shift another, and discard
    1145              :      * the third, then update (u, v) with the ordered result.
    1146              :      * Then we mirror those actions with q, r, r - q mod N.
    1147              :      *
    1148              :      * Loop invariants:
    1149              :      *  u <= v                  (on entry: A <= N)
    1150              :      *  GCD(u, v) == GCD(A, N)  (on entry: trivial)
    1151              :      *  v = A * q mod N         (on entry: N = A * 0 mod N)
    1152              :      *  u = A * r mod N         (on entry: A = A * 1 mod N)
    1153              :      *  q, r in [0, N)          (on entry: 0, 1)
    1154              :      *
    1155              :      * On exit:
    1156              :      *  u = 0
    1157              :      *  v = GCD(A, N) = A * q mod N
    1158              :      *  if v == 1 then 1 = A * q mod N ie q is A's inverse mod N
    1159              :      *  r = 0
    1160              :      *
    1161              :      * The exit state is a fixed point of the loop's body.
    1162              :      * Alg 7 and Alg 8 use 2 * bitlen(N) iterations but Theorem 2 (above in the
    1163              :      * paper) says bitlen(A) + bitlen(N) is actually enough.
    1164              :      */
    1165      5146757 :     for (size_t i = 0; i < (A_limbs + N_limbs) * biL; i++) {
    1166              :         /* s, z in Alg 8 - use meaningful names instead */
    1167      5137024 :         mbedtls_ct_condition_t u_odd = mbedtls_ct_bool(u[0] & 1);
    1168      5137024 :         mbedtls_ct_condition_t v_odd = mbedtls_ct_bool(v[0] & 1);
    1169              : 
    1170              :         /* Other conditions that will be useful below */
    1171      5137024 :         mbedtls_ct_condition_t u_odd_v_odd = mbedtls_ct_bool_and(u_odd, v_odd);
    1172      5137024 :         mbedtls_ct_condition_t v_even = mbedtls_ct_bool_not(v_odd);
    1173      5137024 :         mbedtls_ct_condition_t u_odd_v_even = mbedtls_ct_bool_and(u_odd, v_even);
    1174              : 
    1175              :         /* This is called t1 in Alg 7 (no name in Alg 8).
    1176              :          * We know that u <= v so there is no carry */
    1177      5137024 :         (void) mbedtls_mpi_core_sub(d, v, u, N_limbs);
    1178              : 
    1179              :         /* t1 (the thing that's kept) can be d (default) or u (if t2 is d) */
    1180      5137024 :         memcpy(t1, d, N_limbs * ciL);
    1181      5137024 :         mbedtls_mpi_core_cond_assign(t1, u, N_limbs, u_odd_v_odd);
    1182              : 
    1183              :         /* t2 (the thing that's shifted) can be u (if even), or v (if even),
    1184              :          * or d (which is even if both u and v were odd) */
    1185      5137024 :         memcpy(t2, u, N_limbs * ciL);
    1186      5137024 :         mbedtls_mpi_core_cond_assign(t2, v, N_limbs, u_odd_v_even);
    1187      5137024 :         mbedtls_mpi_core_cond_assign(t2, d, N_limbs, u_odd_v_odd);
    1188              : 
    1189      5137024 :         mbedtls_mpi_core_shift_r(t2, N_limbs, 1); // t2 is even
    1190              : 
    1191              :         /* Update u, v and re-order them if needed */
    1192      5137024 :         memcpy(u, t1, N_limbs * ciL);
    1193      5137024 :         memcpy(v, t2, N_limbs * ciL);
    1194      5137024 :         mbedtls_ct_condition_t swap = mbedtls_mpi_core_lt_ct(v, u, N_limbs);
    1195      5137024 :         mbedtls_mpi_core_cond_swap(u, v, N_limbs, swap);
    1196              : 
    1197              :         /* Now, if modinv was requested, do the same with q, r, but:
    1198              :          * - decisions still based on u and v (their initial values);
    1199              :          * - operations are now mod N;
    1200              :          * - we re-use t1, t2 for what the paper calls t3, t4 in Alg 8.
    1201              :          *
    1202              :          * Here we slightly diverge from the paper and instead do the obvious
    1203              :          * thing that preserves the invariants involving q and r: mirror
    1204              :          * operations on u and v, ie also divide by 2 here (mod N).
    1205              :          *
    1206              :          * The paper uses a trick where it replaces division by 2 with
    1207              :          * multiplication by 2 here, and compensates in the end by multiplying
    1208              :          * by pre_com, which is probably intended as an optimisation.
    1209              :          *
    1210              :          * However I believe it's not actually an optimisation, since
    1211              :          * constant-time modular multiplication by 2 (left-shift + conditional
    1212              :          * subtract) is just as costly as constant-time modular division by 2
    1213              :          * (conditional add + right-shift). So, skip it and keep things simple.
    1214              :          */
    1215      5137024 :         if (I != NULL) {
    1216              :             /* This is called t2 in Alg 7 (no name in Alg 8). */
    1217      5124608 :             mpi_core_sub_mod(d, q, r, N, N_limbs);
    1218              : 
    1219              :             /* t3 (the thing that's kept) */
    1220      5124608 :             memcpy(t1, d, N_limbs * ciL);
    1221      5124608 :             mbedtls_mpi_core_cond_assign(t1, r, N_limbs, u_odd_v_odd);
    1222              : 
    1223              :             /* t4 (the thing that's shifted) */
    1224      5124608 :             memcpy(t2, r, N_limbs * ciL);
    1225      5124608 :             mbedtls_mpi_core_cond_assign(t2, q, N_limbs, u_odd_v_even);
    1226      5124608 :             mbedtls_mpi_core_cond_assign(t2, d, N_limbs, u_odd_v_odd);
    1227              : 
    1228      5124608 :             mbedtls_mpi_core_div2_mod_odd(t2, N, N_limbs);
    1229              : 
    1230              :             /* Update and possibly swap */
    1231      5124608 :             memcpy(r, t1, N_limbs * ciL);
    1232      5124608 :             memcpy(q, t2, N_limbs * ciL);
    1233      5124608 :             mbedtls_mpi_core_cond_swap(r, q, N_limbs, swap);
    1234              :         }
    1235              :     }
    1236              : 
    1237              :     /* G and I already hold the correct values by virtue of being aliased */
    1238         9733 : }
    1239              : 
    1240              : #endif /* MBEDTLS_BIGNUM_C */
        

Generated by: LCOV version 2.0-1