Improvements from Spyros Blanas to the MSVC optimisations of r6469:

author Jacob Nevins <jacobn@chiark.greenend.org.uk>

Tue, 6 Dec 2005 23:18:27 +0000 (23:18 +0000)

committer Jacob Nevins <jacobn@chiark.greenend.org.uk>

Tue, 6 Dec 2005 23:18:27 +0000 (23:18 +0000)
author Jacob Nevins <jacobn@chiark.greenend.org.uk>
Tue, 6 Dec 2005 23:18:27 +0000 (23:18 +0000)
committer Jacob Nevins <jacobn@chiark.greenend.org.uk>
Tue, 6 Dec 2005 23:18:27 +0000 (23:18 +0000)
diff --git a/sshbn.c b/sshbn.c

index 14cdd05e17e22b64258b9a718a1320310d6efad0..728b6fa08283c9567e8a4a0c6852bbca082c1ed3 100644 (file)
--- a/sshbn.c
+++ b/sshbn.c
@@ -9,6 +9,20 @@
  
  #include "misc.h"
  
+/*
+ * Usage notes:
+ *  * Do not call the DIVMOD_WORD macro with expressions such as array
+ *    subscripts, as some implementations object to this (see below).
+ *  * Note that none of the division methods below will cope if the
+ *    quotient won't fit into BIGNUM_INT_BITS. Callers should be careful
+ *    to avoid this case.
+ *    If this condition occurs, in the case of the x86 DIV instruction,
+ *    an overflow exception will occur, which (according to a correspondent)
+ *    will manifest on Windows as something like
+ *      0xC0000095: Integer overflow
+ *    The C variant won't give the right answer, either.
+ */
+
  #if defined __GNUC__ && defined __i386__
  typedef unsigned long BignumInt;
  typedef unsigned long long BignumDblInt;
@@ -27,26 +41,16 @@ typedef unsigned __int64 BignumDblInt;
  #define BIGNUM_TOP_BIT   0x80000000UL
  #define BIGNUM_INT_BITS  32
  #define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
-typedef struct {
-    unsigned __int32 quot;
-    unsigned __int32 remd;
-} msvc_quorem;
-static __declspec(naked) msvc_quorem __stdcall msvc_divmod(
-    unsigned __int32 hi,
-    unsigned __int32 lo,
-    unsigned __int32 w)
-{
-    __asm {
-       mov edx, dword ptr [esp+4]
-        mov eax, dword ptr [esp+8]
-        div dword ptr [esp+12]
-        ret 12
-    };
-}
+/* Note: MASM interprets array subscripts in the macro arguments as
+ * assembler syntax, which gives the wrong answer. Don't supply them.
+ * <http://msdn2.microsoft.com/en-us/library/bf1dw62z.aspx> */
  #define DIVMOD_WORD(q, r, hi, lo, w) do { \
-    const msvc_quorem qr = msvc_divmod((hi), (lo), (w)); \
-    (q) = qr.quot; (r) = qr.remd; \
-} while (0)
+    __asm mov edx, hi \
+    __asm mov eax, lo \
+    __asm div w \
+    __asm mov r, edx \
+    __asm mov q, eax \
+} while(0)
  #else
  typedef unsigned short BignumInt;
  typedef unsigned long BignumDblInt;
@@ -230,7 +234,10 @@ static void internal_mod(BignumInt *a, int alen,
              */
             q = BIGNUM_INT_MASK;
         } else {
-           DIVMOD_WORD(q, r, h, a[i], m0);
+           /* Macro doesn't want an array subscript expression passed
+            * into it (see definition), so use a temporary. */
+           BignumInt tmplo = a[i];
+           DIVMOD_WORD(q, r, h, tmplo, m0);
  
             /* Refine our estimate of q by looking at
              h:a[i]:a[i+1] / m0:m1 */
author	Jacob Nevins <jacobn@chiark.greenend.org.uk>
	Tue, 6 Dec 2005 23:18:27 +0000 (23:18 +0000)
committer	Jacob Nevins <jacobn@chiark.greenend.org.uk>
	Tue, 6 Dec 2005 23:18:27 +0000 (23:18 +0000)