+ if (full_bitoffset < 0) {
+ /*
+ * If we find ourselves needing to shift q*m _right_, that
+ * means we've reached the bottom of the quotient. Clip q
+ * so that its right shift becomes zero, and if that means
+ * q becomes _actually_ zero, this loop is done.
+ */
+ if (full_bitoffset <= -BIGNUM_INT_BITS)
+ break;
+ q >>= -full_bitoffset;
+ full_bitoffset = 0;
+ if (!q)
+ break;
+#ifdef DIVISION_DEBUG
+ printf("now full_bitoffset=%d, q=%#0*llx\n",
+ full_bitoffset, BIGNUM_INT_BITS/4, (unsigned long long)q);
+#endif
+ }
+
+ wordoffset = full_bitoffset / BIGNUM_INT_BITS;
+ bitoffset = full_bitoffset % BIGNUM_INT_BITS;
+#ifdef DIVISION_DEBUG
+ printf("wordoffset=%d, bitoffset=%d\n", wordoffset, bitoffset);
+#endif
+
+ /* wordoffset as computed above is the offset between the LSWs
+ * of m and a. But in fact m and a are stored MSW-first, so we
+ * need to adjust it to be the offset between the actual array
+ * indices, and flip the sign too. */
+ wordoffset = alen - mlen - wordoffset;
+
+ if (bitoffset == 0) {
+ BignumCarry c = 1;
+ BignumInt prev_hi_word = 0;
+ for (k = mlen - 1; wordoffset+k >= i; k--) {
+ BignumInt mword = k<0 ? 0 : m[k];
+ BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word);
+#ifdef DIVISION_DEBUG
+ printf(" aligned sub: product word for m[%d] = %#0*llx\n",
+ k, BIGNUM_INT_BITS/4,
+ (unsigned long long)product);
+#endif
+#ifdef DIVISION_DEBUG
+ printf(" aligned sub: subtrahend for a[%d] = %#0*llx\n",
+ wordoffset+k, BIGNUM_INT_BITS/4,
+ (unsigned long long)product);
+#endif
+ BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~product, c);
+ }
+ } else {
+ BignumInt add_word = 0;
+ BignumInt c = 1;
+ BignumInt prev_hi_word = 0;
+ for (k = mlen - 1; wordoffset+k >= i; k--) {
+ BignumInt mword = k<0 ? 0 : m[k];
+ BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word);
+#ifdef DIVISION_DEBUG
+ printf(" unaligned sub: product word for m[%d] = %#0*llx\n",
+ k, BIGNUM_INT_BITS/4,
+ (unsigned long long)product);
+#endif
+
+ add_word |= product << bitoffset;
+
+#ifdef DIVISION_DEBUG
+ printf(" unaligned sub: subtrahend for a[%d] = %#0*llx\n",
+ wordoffset+k,
+ BIGNUM_INT_BITS/4, (unsigned long long)add_word);
+#endif
+ BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~add_word, c);
+
+ add_word = product >> (BIGNUM_INT_BITS - bitoffset);
+ }
+ }
+
+ if (quot) {
+#ifdef DIVISION_DEBUG
+ printf("adding quotient word %#0*llx << %d\n",
+ BIGNUM_INT_BITS/4, (unsigned long long)q, full_bitoffset);
+#endif
+ internal_add_shifted(quot, q, full_bitoffset);
+#ifdef DIVISION_DEBUG
+ {
+ int d;
+ printf("now quot=0x");
+ for (d = quot[0]; d > 0; d--)
+ printf("%0*llx", BIGNUM_INT_BITS/4,
+ (unsigned long long)quot[d]);
+ printf("\n");
+ }
+#endif
+ }
+ }
+
+#ifdef DIVISION_DEBUG
+ {
+ int d;
+ printf("end main loop, a=0x");
+ for (d = 0; d < alen; d++)
+ printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)a[d]);
+ if (quot) {
+ printf(", quot=0x");
+ for (d = quot[0]; d > 0; d--)
+ printf("%0*llx", BIGNUM_INT_BITS/4,
+ (unsigned long long)quot[d]);
+ }
+ printf("\n");
+ }
+#endif
+
+ /*
+ * The above loop should terminate with the remaining value in a
+ * being strictly less than 2*m (if a >= 2*m then we should always
+ * have managed to get a nonzero q word), but we can't guarantee
+ * that it will be strictly less than m: consider a case where the
+ * remainder is 1, and another where the remainder is m-1. By the
+ * time a contains a value that's _about m_, you clearly can't
+ * distinguish those cases by looking at only the top word of a -
+ * you have to go all the way down to the bottom before you find
+ * out whether it's just less or just more than m.
+ *
+ * Hence, we now do a final fixup in which we subtract one last
+ * copy of m, or don't, accordingly. We should never have to
+ * subtract more than one copy of m here.
+ */
+ for (i = 0; i < alen; i++) {
+ /* Compare a with m, word by word, from the MSW down. As soon
+ * as we encounter a difference, we know whether we need the
+ * fixup. */
+ int mindex = mlen-alen+i;
+ BignumInt mword = mindex < 0 ? 0 : m[mindex];
+ if (a[i] < mword) {
+#ifdef DIVISION_DEBUG
+ printf("final fixup not needed, a < m\n");
+#endif
+ return;
+ } else if (a[i] > mword) {
+#ifdef DIVISION_DEBUG
+ printf("final fixup is needed, a > m\n");
+#endif
+ break;
+ }
+ /* If neither of those cases happened, the words are the same,
+ * so keep going and look at the next one. */
+ }
+#ifdef DIVISION_DEBUG
+ if (i == mlen) /* if we printed neither of the above diagnostics */
+ printf("final fixup is needed, a == m\n");
+#endif
+
+ /*
+ * If we got here without returning, then a >= m, so we must
+ * subtract m, and increment the quotient.
+ */
+ {
+ BignumCarry c = 1;
+ for (i = alen - 1; i >= 0; i--) {
+ int mindex = mlen-alen+i;
+ BignumInt mword = mindex < 0 ? 0 : m[mindex];
+ BignumADC(a[i], c, a[i], ~mword, c);
+ }
+ }
+ if (quot)
+ internal_add_shifted(quot, 1, 0);
+
+#ifdef DIVISION_DEBUG
+ {
+ int d;
+ printf("after final fixup, a=0x");
+ for (d = 0; d < alen; d++)
+ printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)a[d]);
+ if (quot) {
+ printf(", quot=0x");
+ for (d = quot[0]; d > 0; d--)
+ printf("%0*llx", BIGNUM_INT_BITS/4,
+ (unsigned long long)quot[d]);
+ }
+ printf("\n");
+ }
+#endif
+}
+
+/*
+ * Compute (base ^ exp) % mod, the pedestrian way.
+ */
+Bignum modpow_simple(Bignum base_in, Bignum exp, Bignum mod)
+{
+ BignumInt *a, *b, *n, *m, *scratch;
+ BignumInt recip;
+ int rshift;
+ int mlen, scratchlen, i, j;
+ Bignum base, result;
+
+ /*
+ * The most significant word of mod needs to be non-zero. It
+ * should already be, but let's make sure.
+ */
+ assert(mod[mod[0]] != 0);
+
+ /*
+ * Make sure the base is smaller than the modulus, by reducing
+ * it modulo the modulus if not.
+ */
+ base = bigmod(base_in, mod);
+
+ /* Allocate m of size mlen, copy mod to m */
+ /* We use big endian internally */
+ mlen = mod[0];
+ m = snewn(mlen, BignumInt);
+ for (j = 0; j < mlen; j++)
+ m[j] = mod[mod[0] - j];
+
+ /* Allocate n of size mlen, copy base to n */
+ n = snewn(mlen, BignumInt);
+ i = mlen - base[0];
+ for (j = 0; j < i; j++)
+ n[j] = 0;
+ for (j = 0; j < (int)base[0]; j++)
+ n[i + j] = base[base[0] - j];
+
+ /* Allocate a and b of size 2*mlen. Set a = 1 */
+ a = snewn(2 * mlen, BignumInt);
+ b = snewn(2 * mlen, BignumInt);
+ for (i = 0; i < 2 * mlen; i++)
+ a[i] = 0;
+ a[2 * mlen - 1] = 1;
+
+ /* Scratch space for multiplies */
+ scratchlen = mul_compute_scratch(mlen);
+ scratch = snewn(scratchlen, BignumInt);
+
+ /* Skip leading zero bits of exp. */
+ i = 0;
+ j = BIGNUM_INT_BITS-1;
+ while (i < (int)exp[0] && (exp[exp[0] - i] & ((BignumInt)1 << j)) == 0) {
+ j--;
+ if (j < 0) {
+ i++;
+ j = BIGNUM_INT_BITS-1;