first pass

[PuTTY.git] / sshbn.h
diff --git a/sshbn.h b/sshbn.h

index 9366f614ae4cde0cb198626180758db235e4cee1..6ee97ee65785e70360631e818c6328fdd0677bf1 100644 (file)
--- a/sshbn.h
+++ b/sshbn.h
@@ -1,109 +1,220 @@
  /*
   * sshbn.h: the assorted conditional definitions of BignumInt and
- * multiply/divide macros used throughout the bignum code to treat
- * numbers as arrays of the most conveniently sized word for the
- * target machine. Exported so that other code (e.g. poly1305) can use
- * it too.
- */
-
-/*
- * Usage notes:
- *  * Do not call the DIVMOD_WORD macro with expressions such as array
- *    subscripts, as some implementations object to this (see below).
- *  * Note that none of the division methods below will cope if the
- *    quotient won't fit into BIGNUM_INT_BITS. Callers should be careful
- *    to avoid this case.
- *    If this condition occurs, in the case of the x86 DIV instruction,
- *    an overflow exception will occur, which (according to a correspondent)
- *    will manifest on Windows as something like
- *      0xC0000095: Integer overflow
- *    The C variant won't give the right answer, either.
+ * multiply macros used throughout the bignum code to treat numbers as
+ * arrays of the most conveniently sized word for the target machine.
+ * Exported so that other code (e.g. poly1305) can use it too.
+ *
+ * This file must export, in whatever ifdef branch it ends up in:
+ *
+ *  - two types: 'BignumInt' and 'BignumCarry'. BignumInt is an
+ *    unsigned integer type which will be used as the base word size
+ *    for all bignum operations. BignumCarry is an unsigned integer
+ *    type used to hold the carry flag taken as input and output by
+ *    the BignumADC macro (see below).
+ *
+ *  - four constant macros: BIGNUM_INT_BITS, BIGNUM_INT_BYTES,
+ *    BIGNUM_TOP_BIT, BIGNUM_INT_MASK. These should be more or less
+ *    self-explanatory, but just in case, they give the number of bits
+ *    in BignumInt, the number of bytes that works out to, the
+ *    BignumInt value consisting of only the top bit, and the
+ *    BignumInt value with all bits set.
+ *
+ *  - four statement macros: BignumADC, BignumMUL, BignumMULADD,
+ *    BignumMULADD2. These do various kinds of multi-word arithmetic,
+ *    and all produce two output values.
+ *     * BignumADC(ret,retc,a,b,c) takes input BignumInt values a,b
+ *       and a BignumCarry c, and outputs a BignumInt ret = a+b+c and
+ *       a BignumCarry retc which is the carry off the top of that
+ *       addition.
+ *     * BignumMUL(rh,rl,a,b) returns the two halves of the
+ *       double-width product a*b.
+ *     * BignumMULADD(rh,rl,a,b,addend) returns the two halves of the
+ *       double-width value a*b + addend.
+ *     * BignumMULADD2(rh,rl,a,b,addend1,addend2) returns the two
+ *       halves of the double-width value a*b + addend1 + addend2.
+ *
+ * Every branch of the main ifdef below defines the type BignumInt and
+ * the value BIGNUM_INT_BITS. The other three constant macros are
+ * filled in by common code further down.
+ *
+ * Most branches also define a macro DEFINE_BIGNUMDBLINT containing a
+ * typedef statement which declares a type _twice_ the length of a
+ * BignumInt. This causes the common code further down to produce a
+ * default implementation of the four statement macros in terms of
+ * that double-width type, and also to defined BignumCarry to be
+ * BignumInt.
+ *
+ * However, if a particular compile target does not have a type twice
+ * the length of the BignumInt you want to use but it does provide
+ * some alternative means of doing add-with-carry and double-word
+ * multiply, then the ifdef branch in question can just define
+ * BignumCarry and the four statement macros itself, and that's fine
+ * too.
   */
  
  #if defined __SIZEOF_INT128__
-/* gcc and clang both provide a __uint128_t type on 64-bit targets
- * (and, when they do, indicate its presence by the above macro),
- * using the same 'two machine registers' kind of code generation that
- * 32-bit targets use for 64-bit ints. If we have one of these, we can
- * use a 64-bit BignumInt and a 128-bit BignumDblInt. */
-typedef unsigned long long BignumInt;
-typedef __uint128_t BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFFFFFFFFFFFFFULL
-#define BIGNUM_TOP_BIT   0x8000000000000000ULL
-#define BIGNUM_INT_BITS  64
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
-#define DIVMOD_WORD(q, r, hi, lo, w) do { \
-    BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \
-    q = n / w; \
-    r = n % w; \
-} while (0)
-#elif defined __GNUC__ && defined __i386__
-typedef unsigned long BignumInt;
-typedef unsigned long long BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFFFFFUL
-#define BIGNUM_TOP_BIT   0x80000000UL
-#define BIGNUM_INT_BITS  32
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
-#define DIVMOD_WORD(q, r, hi, lo, w) \
-    __asm__("div %2" : \
-           "=d" (r), "=a" (q) : \
-           "r" (w), "d" (hi), "a" (lo))
+
+  /*
+   * 64-bit BignumInt using gcc/clang style 128-bit BignumDblInt.
+   *
+   * gcc and clang both provide a __uint128_t type on 64-bit targets
+   * (and, when they do, indicate its presence by the above macro),
+   * using the same 'two machine registers' kind of code generation
+   * that 32-bit targets use for 64-bit ints.
+   */
+
+  typedef unsigned long long BignumInt;
+  #define BIGNUM_INT_BITS 64
+  #define DEFINE_BIGNUMDBLINT typedef __uint128_t BignumDblInt
+
+#elif defined _MSC_VER && defined _M_AMD64
+
+  /*
+   * 64-bit BignumInt, using Visual Studio x86-64 compiler intrinsics.
+   *
+   * 64-bit Visual Studio doesn't provide very much in the way of help
+   * here: there's no int128 type, and also no inline assembler giving
+   * us direct access to the x86-64 MUL or ADC instructions. However,
+   * there are compiler intrinsics giving us that access, so we can
+   * use those - though it turns out we have to be a little careful,
+   * since they seem to generate wrong code if their pointer-typed
+   * output parameters alias their inputs. Hence all the internal temp
+   * variables inside the macros.
+   */
+
+  #include <intrin.h>
+  typedef unsigned char BignumCarry; /* the type _addcarry_u64 likes to use */
+  typedef unsigned __int64 BignumInt;
+  #define BIGNUM_INT_BITS 64
+  #define BignumADC(ret, retc, a, b, c) do                \
+      {                                                   \
+          BignumInt ADC_tmp;                              \
+          (retc) = _addcarry_u64(c, a, b, &ADC_tmp);      \
+          (ret) = ADC_tmp;                                \
+      } while (0)
+  #define BignumMUL(rh, rl, a, b) do              \
+      {                                           \
+          BignumInt MULADD_hi;                    \
+          (rl) = _umul128(a, b, &MULADD_hi);      \
+          (rh) = MULADD_hi;                       \
+      } while (0)
+  #define BignumMULADD(rh, rl, a, b, addend) do                           \
+      {                                                                   \
+          BignumInt MULADD_lo, MULADD_hi;                                 \
+          MULADD_lo = _umul128(a, b, &MULADD_hi);                         \
+          MULADD_hi += _addcarry_u64(0, MULADD_lo, (addend), &(rl));     \
+          (rh) = MULADD_hi;                                               \
+      } while (0)
+  #define BignumMULADD2(rh, rl, a, b, addend1, addend2) do                \
+      {                                                                   \
+          BignumInt MULADD_lo1, MULADD_lo2, MULADD_hi;                    \
+          MULADD_lo1 = _umul128(a, b, &MULADD_hi);                        \
+          MULADD_hi += _addcarry_u64(0, MULADD_lo1, (addend1), &MULADD_lo2); \
+          MULADD_hi += _addcarry_u64(0, MULADD_lo2, (addend2), &(rl));    \
+          (rh) = MULADD_hi;                                               \
+      } while (0)
+
+#elif defined __GNUC__ || defined _LLP64 || __STDC__ >= 199901L
+
+  /* 32-bit BignumInt, using C99 unsigned long long as BignumDblInt */
+
+  typedef unsigned int BignumInt;
+  #define BIGNUM_INT_BITS 32
+  #define DEFINE_BIGNUMDBLINT typedef unsigned long long BignumDblInt
+
  #elif defined _MSC_VER && defined _M_IX86
-typedef unsigned __int32 BignumInt;
-typedef unsigned __int64 BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFFFFFUL
-#define BIGNUM_TOP_BIT   0x80000000UL
-#define BIGNUM_INT_BITS  32
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
-/* Note: MASM interprets array subscripts in the macro arguments as
- * assembler syntax, which gives the wrong answer. Don't supply them.
- * <http://msdn2.microsoft.com/en-us/library/bf1dw62z.aspx> */
-#define DIVMOD_WORD(q, r, hi, lo, w) do { \
-    __asm mov edx, hi \
-    __asm mov eax, lo \
-    __asm div w \
-    __asm mov r, edx \
-    __asm mov q, eax \
-} while(0)
+
+  /* 32-bit BignumInt, using Visual Studio __int64 as BignumDblInt */
+
+  typedef unsigned int BignumInt;
+  #define BIGNUM_INT_BITS  32
+  #define DEFINE_BIGNUMDBLINT typedef unsigned __int64 BignumDblInt
+
  #elif defined _LP64
-/* 64-bit architectures can do 32x32->64 chunks at a time */
-typedef unsigned int BignumInt;
-typedef unsigned long BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFFFFFU
-#define BIGNUM_TOP_BIT   0x80000000U
-#define BIGNUM_INT_BITS  32
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
-#define DIVMOD_WORD(q, r, hi, lo, w) do { \
-    BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \
-    q = n / w; \
-    r = n % w; \
-} while (0)
-#elif defined _LLP64
-/* 64-bit architectures in which unsigned long is 32 bits, not 64 */
-typedef unsigned long BignumInt;
-typedef unsigned long long BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFFFFFUL
-#define BIGNUM_TOP_BIT   0x80000000UL
-#define BIGNUM_INT_BITS  32
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
-#define DIVMOD_WORD(q, r, hi, lo, w) do { \
-    BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \
-    q = n / w; \
-    r = n % w; \
-} while (0)
+
+  /*
+   * 32-bit BignumInt, using unsigned long itself as BignumDblInt.
+   *
+   * Only for platforms where long is 64 bits, of course.
+   */
+
+  typedef unsigned int BignumInt;
+  #define BIGNUM_INT_BITS  32
+  #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
+
  #else
-/* Fallback for all other cases */
-typedef unsigned short BignumInt;
-typedef unsigned long BignumDblInt;
-#define BIGNUM_INT_MASK  0xFFFFU
-#define BIGNUM_TOP_BIT   0x8000U
-#define BIGNUM_INT_BITS  16
-#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
-#define DIVMOD_WORD(q, r, hi, lo, w) do { \
-    BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \
-    q = n / w; \
-    r = n % w; \
-} while (0)
+
+  /*
+   * 16-bit BignumInt, using unsigned long as BignumDblInt.
+   *
+   * This is the final fallback for real emergencies: C89 guarantees
+   * unsigned short/long to be at least the required sizes, so this
+   * should work on any C implementation at all. But it'll be
+   * noticeably slow, so if you find yourself in this case you
+   * probably want to move heaven and earth to find an alternative!
+   */
+
+  typedef unsigned short BignumInt;
+  #define BIGNUM_INT_BITS  16
+  #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
+
  #endif
  
+/*
+ * Common code across all branches of that ifdef: define the three
+ * easy constant macros in terms of BIGNUM_INT_BITS.
+ */
  #define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
+#define BIGNUM_TOP_BIT (((BignumInt)1) << (BIGNUM_INT_BITS-1))
+#define BIGNUM_INT_MASK (BIGNUM_TOP_BIT | (BIGNUM_TOP_BIT-1))
+
+/*
+ * Common code across _most_ branches of the ifdef: define a set of
+ * statement macros in terms of the BignumDblInt type provided. In
+ * this case, we also define BignumCarry to be the same thing as
+ * BignumInt, for simplicity.
+ */
+#ifdef DEFINE_BIGNUMDBLINT
+
+  typedef BignumInt BignumCarry;
+  #define BignumADC(ret, retc, a, b, c) do                        \
+      {                                                           \
+          DEFINE_BIGNUMDBLINT;                                    \
+          BignumDblInt ADC_temp = (BignumInt)(a);                 \
+          ADC_temp += (BignumInt)(b);                             \
+          ADC_temp += (c);                                        \
+          (ret) = (BignumInt)ADC_temp;                            \
+          (retc) = (BignumCarry)(ADC_temp >> BIGNUM_INT_BITS);    \
+      } while (0)
+  
+  #define BignumMUL(rh, rl, a, b) do                              \
+      {                                                           \
+          DEFINE_BIGNUMDBLINT;                                    \
+          BignumDblInt MUL_temp = (BignumInt)(a);                 \
+          MUL_temp *= (BignumInt)(b);                             \
+          (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS);        \
+          (rl) = (BignumInt)(MUL_temp);                           \
+      } while (0)
+  
+  #define BignumMULADD(rh, rl, a, b, addend) do                   \
+      {                                                           \
+          DEFINE_BIGNUMDBLINT;                                    \
+          BignumDblInt MUL_temp = (BignumInt)(a);                 \
+          MUL_temp *= (BignumInt)(b);                             \
+          MUL_temp += (BignumInt)(addend);                        \
+          (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS);        \
+          (rl) = (BignumInt)(MUL_temp);                           \
+      } while (0)
+  
+  #define BignumMULADD2(rh, rl, a, b, addend1, addend2) do        \
+      {                                                           \
+          DEFINE_BIGNUMDBLINT;                                    \
+          BignumDblInt MUL_temp = (BignumInt)(a);                 \
+          MUL_temp *= (BignumInt)(b);                             \
+          MUL_temp += (BignumInt)(addend1);                       \
+          MUL_temp += (BignumInt)(addend2);                       \
+          (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS);        \
+          (rl) = (BignumInt)(MUL_temp);                           \
+      } while (0)
+
+#endif /* DEFINE_BIGNUMDBLINT */