#!/usr/bin/env python
import sys
+import string
+from collections import namedtuple
-class Output(object):
- def __init__(self, bignum_int_bits):
- self.bignum_int_bits = bignum_int_bits
- self.text = ""
- self.vars = []
- def stmt(self, statement):
- self.text += " %s;\n" % statement
- def register_var(self, var):
- self.vars.append(var)
- def finalise(self):
- for var in self.vars:
- assert var.maxval == 0, "Variable not clear: %s" % var.name
- return self.text
-
-class Variable(object):
- def __init__(self, out, name):
- self.out = out
- self.maxval = 0
- self.name = name
- self.placeval = None
- self.out.stmt("BignumDblInt %s" % (self.name))
- self.out.register_var(self)
- def clear(self, placeval):
- self.maxval = 0
- self.placeval = placeval
- self.out.stmt("%s = 0" % (self.name))
- def set_word(self, name, limit=None):
- if limit is not None:
- self.maxval = limit-1
- else:
- self.maxval = (1 << self.out.bignum_int_bits) - 1
- assert self.maxval < (1 << 2*self.out.bignum_int_bits)
- self.out.stmt("%s = %s" % (self.name, name))
- def add_word(self, name, limit=None):
- if limit is not None:
- self.maxval += limit-1
- else:
- self.maxval += (1 << self.out.bignum_int_bits) - 1
- assert self.maxval < (1 << 2*self.out.bignum_int_bits)
- self.out.stmt("%s += %s" % (self.name, name))
- def add_input_word(self, fmt, wordpos, limit=None):
- assert self.placeval == wordpos * self.out.bignum_int_bits
- self.add_word(fmt % wordpos, limit)
- def set_to_product(self, a, b, placeval):
- self.maxval = ((1 << self.out.bignum_int_bits) - 1) ** 2
- assert self.maxval < (1 << 2*self.out.bignum_int_bits)
- self.out.stmt("%s = (BignumDblInt)(%s) * (%s)" % (self.name, a, b))
- self.placeval = placeval
- def add_bottom_half(self, srcvar):
- self.add_word("%s & BIGNUM_INT_MASK" % (srcvar.name))
- def add_top_half(self, srcvar):
- self.add_word("%s >> %d" % (srcvar.name, self.out.bignum_int_bits))
- def unload_into(self, topvar, botvar):
- assert botvar.placeval == self.placeval
- botvar.add_bottom_half(self)
- assert topvar.placeval == self.placeval + self.out.bignum_int_bits
- topvar.add_top_half(self)
- self.maxval = 0
- def output_word(self, bitpos, bits, destfmt, destwordpos):
- assert bitpos == 0
- assert self.placeval == destwordpos * self.out.bignum_int_bits
- dest = destfmt % destwordpos
- if bits == self.out.bignum_int_bits:
- self.out.stmt("%s = %s" % (dest, self.name))
- else:
- self.out.stmt("%s = %s & (((BignumInt)1 << %d)-1)" %
- (dest, self.name, bits))
- def transfer_to_next_acc(self, bitpos, bits, pow5, destvar):
- destbitpos = self.placeval + bitpos - 130 * pow5 - destvar.placeval
- #print "transfer", "*%d" % 5**pow5, self.name, self.placeval, bitpos, destvar.name, destvar.placeval, destbitpos, bits
- assert 0 <= bitpos < bitpos+bits <= self.out.bignum_int_bits
- assert 0 <= destbitpos < destbitpos+bits <= self.out.bignum_int_bits
- expr = self.name
- if bitpos > 0:
- expr = "(%s >> %d)" % (expr, bitpos)
- expr = "(%s & (((BignumInt)1 << %d)-1))" % (expr, bits)
- self.out.stmt("%s += %s * ((BignumDblInt)%d << %d)" %
- (destvar.name, expr, 5**pow5, destbitpos))
- destvar.maxval += (((1 << bits)-1) << destbitpos) * (5**pow5)
- def shift_down_from(self, top):
- if top is not None:
- self.out.stmt("%s = %s + (%s >> %d)" %
- (self.name, top.name, self.name,
- self.out.bignum_int_bits))
- topmaxval = top.maxval
- else:
- self.out.stmt("%s >>= %d" % (self.name, self.out.bignum_int_bits))
- topmaxval = 0
- self.maxval = topmaxval + self.maxval >> self.out.bignum_int_bits
- assert self.maxval < (1 << 2*self.out.bignum_int_bits)
- if top is not None:
- assert self.placeval + self.out.bignum_int_bits == top.placeval
- top.clear(top.placeval + self.out.bignum_int_bits)
- self.placeval += self.out.bignum_int_bits
-
-def gen_add(bignum_int_bits):
- out = Output(bignum_int_bits)
-
- inbits = 130
- inwords = (inbits + bignum_int_bits - 1) / bignum_int_bits
+class Multiprecision(object):
+ def __init__(self, target, minval, maxval, words):
+ self.target = target
+ self.minval = minval
+ self.maxval = maxval
+ self.words = words
+ assert 0 <= self.minval
+ assert self.minval <= self.maxval
+ assert self.target.nwords(self.maxval) == len(words)
+ def getword(self, n):
+ return self.words[n] if n < len(self.words) else "0"
+
+ def __add__(self, rhs):
+ newmin = self.minval + rhs.minval
+ newmax = self.maxval + rhs.maxval
+ nwords = self.target.nwords(newmax)
+ words = []
+
+ addfn = self.target.add
+ for i in range(nwords):
+ words.append(addfn(self.getword(i), rhs.getword(i)))
+ addfn = self.target.adc
+
+ return Multiprecision(self.target, newmin, newmax, words)
+
+ def __mul__(self, rhs):
+ newmin = self.minval * rhs.minval
+ newmax = self.maxval * rhs.maxval
+ nwords = self.target.nwords(newmax)
+ words = []
+
+ # There are basically two strategies we could take for
+ # multiplying two multiprecision integers. One is to enumerate
+ # the space of pairs of word indices in lexicographic order,
+ # essentially computing a*b[i] for each i and adding them
+ # together; the other is to enumerate in diagonal order,
+ # computing everything together that belongs at a particular
+ # output word index.
+ #
+ # For the moment, I've gone for the former.
+
+ sprev = []
+ for i, sword in enumerate(self.words):
+ rprev = None
+ sthis = sprev[:i]
+ for j, rword in enumerate(rhs.words):
+ prevwords = []
+ if i+j < len(sprev):
+ prevwords.append(sprev[i+j])
+ if rprev is not None:
+ prevwords.append(rprev)
+ vhi, vlo = self.target.muladd(sword, rword, *prevwords)
+ sthis.append(vlo)
+ rprev = vhi
+ sthis.append(rprev)
+ sprev = sthis
+
+ # Remove unneeded words from the top of the output, if we can
+ # prove by range analysis that they'll always be zero.
+ sprev = sprev[:self.target.nwords(newmax)]
+
+ return Multiprecision(self.target, newmin, newmax, sprev)
+
+ def extract_bits(self, start, bits=None):
+ if bits is None:
+ bits = (self.maxval >> start).bit_length()
+
+ # Overly thorough range analysis: if min and max have the same
+ # *quotient* by 2^bits, then the result of reducing anything
+ # in the range [min,max] mod 2^bits has to fall within the
+ # obvious range. But if they have different quotients, then
+ # you can wrap round the modulus and so any value mod 2^bits
+ # is possible.
+ newmin = self.minval >> start
+ newmax = self.maxval >> start
+ if (newmin >> bits) != (newmax >> bits):
+ newmin = 0
+ newmax = (1 << bits) - 1
+
+ nwords = self.target.nwords(newmax)
+ words = []
+ for i in range(nwords):
+ srcpos = i * self.target.bits + start
+ maxbits = min(self.target.bits, start + bits - srcpos)
+ wordindex = srcpos / self.target.bits
+ if srcpos % self.target.bits == 0:
+ word = self.getword(srcpos / self.target.bits)
+ elif (wordindex+1 >= len(self.words) or
+ srcpos % self.target.bits + maxbits < self.target.bits):
+ word = self.target.new_value(
+ "(%%s) >> %d" % (srcpos % self.target.bits),
+ self.getword(srcpos / self.target.bits))
+ else:
+ word = self.target.new_value(
+ "((%%s) >> %d) | ((%%s) << %d)" % (
+ srcpos % self.target.bits,
+ self.target.bits - (srcpos % self.target.bits)),
+ self.getword(srcpos / self.target.bits),
+ self.getword(srcpos / self.target.bits + 1))
+ if maxbits < self.target.bits and maxbits < bits:
+ word = self.target.new_value(
+ "(%%s) & ((((BignumInt)1) << %d)-1)" % maxbits,
+ word)
+ words.append(word)
+
+ return Multiprecision(self.target, newmin, newmax, words)
+
+# Each Statement has a list of variables it reads, and a list of ones
+# it writes. 'forms' is a list of multiple actual C statements it
+# could be generated as, depending on which of its output variables is
+# actually used (e.g. no point calling BignumADC if the generated
+# carry in a particular case is unused, or BignumMUL if nobody needs
+# the top half). It is indexed by a bitmap whose bits correspond to
+# the entries in wvars, with wvars[0] the MSB and wvars[-1] the LSB.
+Statement = namedtuple("Statement", "rvars wvars forms")
+
+class CodegenTarget(object):
+ def __init__(self, bits):
+ self.bits = bits
+ self.valindex = 0
+ self.stmts = []
+ self.generators = {}
+ self.bv_words = (130 + self.bits - 1) / self.bits
+ self.carry_index = 0
+
+ def nwords(self, maxval):
+ return (maxval.bit_length() + self.bits - 1) / self.bits
+
+ def stmt(self, stmt, needed=False):
+ index = len(self.stmts)
+ self.stmts.append([needed, stmt])
+ for val in stmt.wvars:
+ self.generators[val] = index
+
+ def new_value(self, formatstr=None, *deps):
+ name = "v%d" % self.valindex
+ self.valindex += 1
+ if formatstr is not None:
+ self.stmt(Statement(
+ rvars=deps, wvars=[name],
+ forms=[None, name + " = " + formatstr % deps]))
+ return name
+
+ def bigval_input(self, name, bits):
+ words = (bits + self.bits - 1) / self.bits
+ # Expect not to require an entire extra word
+ assert words == self.bv_words
+
+ return Multiprecision(self, 0, (1<<bits)-1, [
+ self.new_value("%s->w[%d]" % (name, i)) for i in range(words)])
+
+ def const(self, value):
+ # We only support constants small enough to both fit in a
+ # BignumInt (of any size supported) _and_ be expressible in C
+ # with no weird integer literal syntax like a trailing LL.
+ #
+ # Supporting larger constants would be possible - you could
+ # break 'value' up into word-sized pieces on the Python side,
+ # and generate a legal C expression for each piece by
+ # splitting it further into pieces within the
+ # standards-guaranteed 'unsigned long' limit of 32 bits and
+ # then casting those to BignumInt before combining them with
+ # shifts. But it would be a lot of effort, and since the
+ # application for this code doesn't even need it, there's no
+ # point in bothering.
+ assert value < 2**16
+ return Multiprecision(self, value, value, ["%d" % value])
+
+ def current_carry(self):
+ return "carry%d" % self.carry_index
+
+ def add(self, a1, a2):
+ ret = self.new_value()
+ adcform = "BignumADC(%s, carry, %s, %s, 0)" % (ret, a1, a2)
+ plainform = "%s = %s + %s" % (ret, a1, a2)
+ self.carry_index += 1
+ carryout = self.current_carry()
+ self.stmt(Statement(
+ rvars=[a1,a2], wvars=[ret,carryout],
+ forms=[None, adcform, plainform, adcform]))
+ return ret
+
+ def adc(self, a1, a2):
+ ret = self.new_value()
+ adcform = "BignumADC(%s, carry, %s, %s, carry)" % (ret, a1, a2)
+ plainform = "%s = %s + %s + carry" % (ret, a1, a2)
+ carryin = self.current_carry()
+ self.carry_index += 1
+ carryout = self.current_carry()
+ self.stmt(Statement(
+ rvars=[a1,a2,carryin], wvars=[ret,carryout],
+ forms=[None, adcform, plainform, adcform]))
+ return ret
+
+ def muladd(self, m1, m2, *addends):
+ rlo = self.new_value()
+ rhi = self.new_value()
+ wideform = "BignumMUL%s(%s)" % (
+ { 0:"", 1:"ADD", 2:"ADD2" }[len(addends)],
+ ", ".join([rhi, rlo, m1, m2] + list(addends)))
+ narrowform = " + ".join(["%s = %s * %s" % (rlo, m1, m2)] +
+ list(addends))
+ self.stmt(Statement(
+ rvars=[m1,m2]+list(addends), wvars=[rhi,rlo],
+ forms=[None, narrowform, wideform, wideform]))
+ return rhi, rlo
+
+ def write_bigval(self, name, val):
+ for i in range(self.bv_words):
+ word = val.getword(i)
+ self.stmt(Statement(
+ rvars=[word], wvars=[],
+ forms=["%s->w[%d] = %s" % (name, i, word)]),
+ needed=True)
+
+ def compute_needed(self):
+ used_vars = set()
+
+ self.queue = [stmt for (needed,stmt) in self.stmts if needed]
+ while len(self.queue) > 0:
+ stmt = self.queue.pop(0)
+ deps = []
+ for var in stmt.rvars:
+ if var[0] in string.digits:
+ continue # constant
+ deps.append(self.generators[var])
+ used_vars.add(var)
+ for index in deps:
+ if not self.stmts[index][0]:
+ self.stmts[index][0] = True
+ self.queue.append(self.stmts[index][1])
+
+ forms = []
+ for i, (needed, stmt) in enumerate(self.stmts):
+ if needed:
+ formindex = 0
+ for (j, var) in enumerate(stmt.wvars):
+ formindex *= 2
+ if var in used_vars:
+ formindex += 1
+ forms.append(stmt.forms[formindex])
+
+ # Now we must check whether this form of the statement
+ # also writes some variables we _don't_ actually need
+ # (e.g. if you only wanted the top half from a mul, or
+ # only the carry from an adc, you'd be forced to
+ # generate the other output too). Easiest way to do
+ # this is to look for an identical statement form
+ # later in the array.
+ maxindex = max(i for i in range(len(stmt.forms))
+ if stmt.forms[i] == stmt.forms[formindex])
+ extra_vars = maxindex & ~formindex
+ bitpos = 0
+ while extra_vars != 0:
+ if extra_vars & (1 << bitpos):
+ extra_vars &= ~(1 << bitpos)
+ var = stmt.wvars[-1-bitpos]
+ used_vars.add(var)
+ # Also, write out a cast-to-void for each
+ # subsequently unused value, to prevent gcc
+ # warnings when the output code is compiled.
+ forms.append("(void)" + var)
+ bitpos += 1
+
+ used_carry = any(v.startswith("carry") for v in used_vars)
+ used_vars = [v for v in used_vars if v.startswith("v")]
+ used_vars.sort(key=lambda v: int(v[1:]))
+
+ return used_carry, used_vars, forms
+
+ def text(self):
+ used_carry, values, forms = self.compute_needed()
+
+ ret = ""
+ while len(values) > 0:
+ prefix, sep, suffix = " BignumInt ", ", ", ";"
+ currline = values.pop(0)
+ while (len(values) > 0 and
+ len(prefix+currline+sep+values[0]+suffix) < 79):
+ currline += sep + values.pop(0)
+ ret += prefix + currline + suffix + "\n"
+ if used_carry:
+ ret += " BignumCarry carry;\n"
+ if ret != "":
+ ret += "\n"
+ for stmtform in forms:
+ ret += " %s;\n" % stmtform
+ return ret
+
+def gen_add(target):
# This is an addition _without_ reduction mod p, so that it can be
# used both during accumulation of the polynomial and for adding
# on the encrypted nonce at the end (which is mod 2^128, not mod
# Because one of the inputs will have come from our
# not-completely-reducing multiplication function, we expect up to
# 3 extra bits of input.
- acclo = Variable(out, "acclo")
-
- acclo.clear(0)
-
- for wordpos in range(inwords):
- limit = min(1 << bignum_int_bits, 1 << (130 - wordpos*bignum_int_bits))
- acclo.add_input_word("a->w[%d]", wordpos, limit)
- acclo.add_input_word("b->w[%d]", wordpos, limit)
- acclo.output_word(0, bignum_int_bits, "r->w[%d]", wordpos)
- acclo.shift_down_from(None)
-
- return out.finalise()
-def gen_mul_1305(bignum_int_bits):
- out = Output(bignum_int_bits)
-
- inbits = 130
- inwords = (inbits + bignum_int_bits - 1) / bignum_int_bits
+ a = target.bigval_input("a", 133)
+ b = target.bigval_input("b", 133)
+ ret = a + b
+ target.write_bigval("r", ret)
+ return """\
+static void bigval_add(bigval *r, const bigval *a, const bigval *b)
+{
+%s}
+\n""" % target.text()
+def gen_mul(target):
# The inputs are not 100% reduced mod p. Specifically, we can get
# a full 130-bit number from the pow5==0 pass, and then a 130-bit
# number times 5 from the pow5==1 pass, plus a possible carry. The
# total of that can be easily bounded above by 2^130 * 8, so we
# need to assume we're multiplying two 133-bit numbers.
- outbits = (inbits + 3) * 2
- outwords = (outbits + bignum_int_bits - 1) / bignum_int_bits + 1
-
- tmp = Variable(out, "tmp")
- acclo = Variable(out, "acclo")
- acchi = Variable(out, "acchi")
- acc2lo = Variable(out, "acc2lo")
-
- pow5, bits_at_pow5 = 0, inbits
-
- acclo.clear(0)
- acchi.clear(bignum_int_bits)
- bits_needed_in_acc2 = bignum_int_bits
-
- for outwordpos in range(outwords):
- for a in range(inwords):
- b = outwordpos - a
- if 0 <= b < inwords:
- tmp.set_to_product("a->w[%d]" % a, "b->w[%d]" % b,
- outwordpos * bignum_int_bits)
- tmp.unload_into(acchi, acclo)
-
- bits_in_word = bignum_int_bits
- bitpos = 0
- #print "begin output"
- while bits_in_word > 0:
- chunk = min(bits_in_word, bits_at_pow5)
- if pow5 > 0:
- chunk = min(chunk, bits_needed_in_acc2)
- if pow5 == 0:
- acclo.output_word(bitpos, chunk, "r->w[%d]", outwordpos)
- else:
- acclo.transfer_to_next_acc(bitpos, chunk, pow5, acc2lo)
- bits_needed_in_acc2 -= chunk
- if bits_needed_in_acc2 == 0:
- assert acc2lo.placeval % bignum_int_bits == 0
- other_outwordpos = acc2lo.placeval / bignum_int_bits
- acc2lo.add_input_word("r->w[%d]", other_outwordpos)
- acc2lo.output_word(bitpos, bignum_int_bits, "r->w[%d]",
- other_outwordpos)
- acc2lo.shift_down_from(None)
- bits_needed_in_acc2 = bignum_int_bits
- bits_in_word -= chunk
- bits_at_pow5 -= chunk
- bitpos += chunk
- if bits_at_pow5 == 0:
- if pow5 > 0:
- assert acc2lo.placeval % bignum_int_bits == 0
- other_outwordpos = acc2lo.placeval / bignum_int_bits
- acc2lo.add_input_word("r->w[%d]", other_outwordpos)
- acc2lo.output_word(0, bignum_int_bits, "r->w[%d]",
- other_outwordpos)
- pow5 += 1
- bits_at_pow5 = inbits
- acc2lo.clear(0)
- bits_needed_in_acc2 = bignum_int_bits
- acclo.shift_down_from(acchi)
-
- while acc2lo.maxval > 0:
- other_outwordpos = acc2lo.placeval / bignum_int_bits
- bitsleft = inbits - other_outwordpos * bignum_int_bits
- limit = 1<<bitsleft if bitsleft < bignum_int_bits else None
- acc2lo.add_input_word("r->w[%d]", other_outwordpos, limit=limit)
- acc2lo.output_word(0, bignum_int_bits, "r->w[%d]", other_outwordpos)
- acc2lo.shift_down_from(None)
-
- return out.finalise()
-
-def gen_final_reduce_1305(bignum_int_bits):
- out = Output(bignum_int_bits)
-
- inbits = 130
- inwords = (inbits + bignum_int_bits - 1) / bignum_int_bits
-
- # We take our input number n, and compute k = 5 + 5*(n >> 130).
+
+ a = target.bigval_input("a", 133)
+ b = target.bigval_input("b", 133)
+ ab = a * b
+ ab0 = ab.extract_bits(0, 130)
+ ab1 = ab.extract_bits(130, 130)
+ ab2 = ab.extract_bits(260)
+ ab1_5 = target.const(5) * ab1
+ ab2_25 = target.const(25) * ab2
+ ret = ab0 + ab1_5 + ab2_25
+ target.write_bigval("r", ret)
+ return """\
+static void bigval_mul_mod_p(bigval *r, const bigval *a, const bigval *b)
+{
+%s}
+\n""" % target.text()
+
+def gen_final_reduce(target):
+ # We take our input number n, and compute k = n + 5*(n >> 130).
# Then k >> 130 is precisely the multiple of p that needs to be
# subtracted from n to reduce it to strictly less than p.
- acclo = Variable(out, "acclo")
-
- acclo.clear(0)
- # Hopefully all the bits we're shifting down fit in the same word.
- assert 130 / bignum_int_bits == (130 + 3 - 1) / bignum_int_bits
- acclo.add_word("5 * ((n->w[%d] >> %d) + 1)" %
- (130 / bignum_int_bits, 130 % bignum_int_bits),
- limit = 5 * (7 + 1))
- for wordpos in range(inwords):
- acclo.add_input_word("n->w[%d]", wordpos)
- # Notionally, we could call acclo.output_word here to store
- # our adjusted value k. But we don't need to, because all we
- # actually want is the very top word of it.
- if wordpos == 130 / bignum_int_bits:
- break
- acclo.shift_down_from(None)
-
- # Now we can find the right multiple of p to subtract. We actually
- # subtract it by adding 5 times it, and then finally discarding
- # the top bits of the output.
-
- # Hopefully all the bits we're shifting down fit in the same word.
- assert 130 / bignum_int_bits == (130 + 3 - 1) / bignum_int_bits
- acclo.set_word("5 * (acclo >> %d)" % (130 % bignum_int_bits),
- limit = 5 * (7 + 1))
- acclo.placeval = 0
- for wordpos in range(inwords):
- acclo.add_input_word("n->w[%d]", wordpos)
- acclo.output_word(0, bignum_int_bits, "n->w[%d]", wordpos)
- acclo.shift_down_from(None)
-
- out.stmt("n->w[%d] &= (1 << %d) - 1" %
- (130 / bignum_int_bits, 130 % bignum_int_bits))
-
- # Here we don't call out.finalise(), because that will complain
- # that there are bits of output we never dealt with. This is true,
- # but all the bits in question are above 2^130, so they're bits
- # we're discarding anyway.
- return out.text # not out.finalise()
-
-ops = { "mul" : gen_mul_1305,
- "add" : gen_add,
- "final_reduce" : gen_final_reduce_1305 }
-
-args = sys.argv[1:]
-if len(args) != 2 or args[0] not in ops:
- sys.stderr.write("usage: make1305.py (%s) <bits>\n" % (" | ".join(sorted(ops))))
- sys.exit(1)
-
-sys.stdout.write(" /* ./contrib/make1305.py %s %s */\n" % tuple(args))
-s = ops[args[0]](int(args[1]))
-sys.stdout.write(s)
+ a = target.bigval_input("n", 133)
+ a1 = a.extract_bits(130, 130)
+ k = a + target.const(5) * a1
+ q = k.extract_bits(130)
+ adjusted = a + target.const(5) * q
+ ret = adjusted.extract_bits(0, 130)
+ target.write_bigval("n", ret)
+ return """\
+static void bigval_final_reduce(bigval *n)
+{
+%s}
+\n""" % target.text()
+
+pp_keyword = "#if"
+for bits in [16, 32, 64]:
+ sys.stdout.write("%s BIGNUM_INT_BITS == %d\n\n" % (pp_keyword, bits))
+ pp_keyword = "#elif"
+ sys.stdout.write(gen_add(CodegenTarget(bits)))
+ sys.stdout.write(gen_mul(CodegenTarget(bits)))
+ sys.stdout.write(gen_final_reduce(CodegenTarget(bits)))
+sys.stdout.write("""#else
+#error Add another bit count to contrib/make1305.py and rerun it
+#endif
+""")
/*
* Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
- * big-endian arrays of 'len' BignumInts. Returns a BignumInt carried
- * off the top.
+ * big-endian arrays of 'len' BignumInts. Returns the carry off the
+ * top.
*/
-static BignumInt internal_add(const BignumInt *a, const BignumInt *b,
- BignumInt *c, int len)
+static BignumCarry internal_add(const BignumInt *a, const BignumInt *b,
+ BignumInt *c, int len)
{
int i;
- BignumDblInt carry = 0;
+ BignumCarry carry = 0;
- for (i = len-1; i >= 0; i--) {
- carry += (BignumDblInt)a[i] + b[i];
- c[i] = (BignumInt)carry;
- carry >>= BIGNUM_INT_BITS;
- }
+ for (i = len-1; i >= 0; i--)
+ BignumADC(c[i], carry, a[i], b[i], carry);
return (BignumInt)carry;
}
BignumInt *c, int len)
{
int i;
- BignumDblInt carry = 1;
+ BignumCarry carry = 1;
- for (i = len-1; i >= 0; i--) {
- carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK);
- c[i] = (BignumInt)carry;
- carry >>= BIGNUM_INT_BITS;
- }
+ for (i = len-1; i >= 0; i--)
+ BignumADC(c[i], carry, a[i], ~b[i], carry);
}
/*
int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
int midlen = botlen + 1;
- BignumDblInt carry;
+ BignumCarry carry;
#ifdef KARA_DEBUG
int i;
#endif
i = 2*len - botlen - 2*midlen - 1;
while (carry) {
assert(i >= 0);
- carry += c[i];
- c[i] = (BignumInt)carry;
- carry >>= BIGNUM_INT_BITS;
+ BignumADC(c[i], carry, c[i], 0, carry);
i--;
}
#ifdef KARA_DEBUG
} else {
int i;
BignumInt carry;
- BignumDblInt t;
const BignumInt *ap, *bp;
BignumInt *cp, *cps;
for (cps = c + 2*len, ap = a + len; ap-- > a; cps--) {
carry = 0;
- for (cp = cps, bp = b + len; cp--, bp-- > b ;) {
- t = (MUL_WORD(*ap, *bp) + carry) + *cp;
- *cp = (BignumInt) t;
- carry = (BignumInt)(t >> BIGNUM_INT_BITS);
- }
+ for (cp = cps, bp = b + len; cp--, bp-- > b ;)
+ BignumMULADD2(carry, *cp, *ap, *bp, *cp, carry);
*cp = carry;
}
}
} else {
int i;
BignumInt carry;
- BignumDblInt t;
const BignumInt *ap, *bp;
BignumInt *cp, *cps;
for (cps = c + len, ap = a + len; ap-- > a; cps--) {
carry = 0;
- for (cp = cps, bp = b + len; bp--, cp-- > c ;) {
- t = (MUL_WORD(*ap, *bp) + carry) + *cp;
- *cp = (BignumInt) t;
- carry = (BignumInt)(t >> BIGNUM_INT_BITS);
- }
+ for (cp = cps, bp = b + len; bp--, cp-- > c ;)
+ BignumMULADD2(carry, *cp, *ap, *bp, *cp, carry);
}
}
}
{
int word = 1 + (shift / BIGNUM_INT_BITS);
int bshift = shift % BIGNUM_INT_BITS;
- BignumDblInt addend;
-
- addend = (BignumDblInt)n << bshift;
-
- while (addend) {
+ BignumInt addendh, addendl;
+ BignumCarry carry;
+
+ addendl = n << bshift;
+ addendh = (bshift == 0 ? 0 : n >> (BIGNUM_INT_BITS - bshift));
+
+ assert(word <= number[0]);
+ BignumADC(number[word], carry, number[word], addendl, 0);
+ word++;
+ if (!addendh && !carry)
+ return;
+ assert(word <= number[0]);
+ BignumADC(number[word], carry, number[word], addendh, carry);
+ word++;
+ while (carry) {
assert(word <= number[0]);
- addend += number[word];
- number[word] = (BignumInt) addend & BIGNUM_INT_MASK;
- addend >>= BIGNUM_INT_BITS;
+ BignumADC(number[word], carry, number[word], 0, carry);
word++;
}
}
static BignumInt reciprocal_word(BignumInt d)
{
- BignumInt dshort, recip;
- BignumDblInt product;
+ BignumInt dshort, recip, prodh, prodl;
int corrections;
/*
* iteration, and the initial division above already gave us half
* the output word, so it's only worth doing one iteration.
*/
- product = MUL_WORD(recip, d);
- product += recip;
- product = -product; /* the 2K shifts just off the top */
- product &= (((BignumDblInt)BIGNUM_INT_MASK << BIGNUM_INT_BITS) +
- BIGNUM_INT_MASK);
- product >>= BIGNUM_INT_BITS;
- product = MUL_WORD(product, recip);
- product >>= (BIGNUM_INT_BITS-1);
- recip = (BignumInt)product;
+ BignumMULADD(prodh, prodl, recip, d, recip);
+ prodl = ~prodl;
+ prodh = ~prodh;
+ {
+ BignumCarry c;
+ BignumADC(prodl, c, prodl, 1, 0);
+ prodh += c;
+ }
+ BignumMUL(prodh, prodl, prodh, recip);
+ recip = (prodh << 1) | (prodl >> (BIGNUM_INT_BITS-1));
/*
* Now make sure we have the best possible reciprocal estimate,
* way - not enough to bother with any better-thought-out kind of
* correction loop.
*/
- product = MUL_WORD(recip, d);
- product += recip;
+ BignumMULADD(prodh, prodl, recip, d, recip);
corrections = 0;
- if (product >= ((BignumDblInt)1 << (2*BIGNUM_INT_BITS-1))) {
+ if (prodh >= BIGNUM_TOP_BIT) {
do {
- product -= d;
+ BignumCarry c = 1;
+ BignumADC(prodl, c, prodl, ~d, c); prodh += BIGNUM_INT_MASK + c;
recip--;
corrections++;
- } while (product >= ((BignumDblInt)1 << (2*BIGNUM_INT_BITS-1)));
+ } while (prodh >= ((BignumInt)1 << (BIGNUM_INT_BITS-1)));
} else {
- while (product < ((BignumDblInt)1 << (2*BIGNUM_INT_BITS-1)) - d) {
- product += d;
+ while (1) {
+ BignumInt newprodh, newprodl;
+ BignumCarry c = 0;
+ BignumADC(newprodl, c, prodl, d, c); newprodh = prodh + c;
+ if (newprodh >= BIGNUM_TOP_BIT)
+ break;
+ prodh = newprodh;
+ prodl = newprodl;
recip++;
corrections++;
}
* here.
*/
for (i = 0; i <= alen - mlen ;) {
- BignumDblInt product, subtmp, t;
+ BignumInt product;
BignumInt aword, q;
int shift, full_bitoffset, bitoffset, wordoffset;
if (shift > 0 && i+1 < alen)
aword |= a[i+1] >> (BIGNUM_INT_BITS - shift);
- t = MUL_WORD(recip, aword);
- q = (BignumInt)(t >> BIGNUM_INT_BITS);
+ {
+ BignumInt unused;
+ BignumMUL(q, unused, recip, aword);
+ (void)unused;
+ }
#ifdef DIVISION_DEBUG
printf("i=%d, aword=%#0*llx, shift=%d, q=%#0*llx\n",
wordoffset = alen - mlen - wordoffset;
if (bitoffset == 0) {
- BignumInt c = 1;
+ BignumCarry c = 1;
BignumInt prev_hi_word = 0;
for (k = mlen - 1; wordoffset+k >= i; k--) {
BignumInt mword = k<0 ? 0 : m[k];
- product = MUL_WORD(q, mword);
- product += prev_hi_word;
- prev_hi_word = product >> BIGNUM_INT_BITS;
+ BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word);
#ifdef DIVISION_DEBUG
printf(" aligned sub: product word for m[%d] = %#0*llx\n",
k, BIGNUM_INT_BITS/4,
- (unsigned long long)(BignumInt)product);
+ (unsigned long long)product);
#endif
#ifdef DIVISION_DEBUG
printf(" aligned sub: subtrahend for a[%d] = %#0*llx\n",
wordoffset+k, BIGNUM_INT_BITS/4,
- (unsigned long long)(BignumInt)product);
+ (unsigned long long)product);
#endif
- subtmp = (BignumDblInt)a[wordoffset+k] +
- ((BignumInt)product ^ BIGNUM_INT_MASK) + c;
- a[wordoffset+k] = (BignumInt)subtmp;
- c = subtmp >> BIGNUM_INT_BITS;
+ BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~product, c);
}
} else {
BignumInt add_word = 0;
BignumInt prev_hi_word = 0;
for (k = mlen - 1; wordoffset+k >= i; k--) {
BignumInt mword = k<0 ? 0 : m[k];
- product = MUL_WORD(q, mword);
- product += prev_hi_word;
- prev_hi_word = product >> BIGNUM_INT_BITS;
+ BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word);
#ifdef DIVISION_DEBUG
printf(" unaligned sub: product word for m[%d] = %#0*llx\n",
k, BIGNUM_INT_BITS/4,
- (unsigned long long)(BignumInt)product);
+ (unsigned long long)product);
#endif
- add_word |= (BignumInt)product << bitoffset;
+ add_word |= product << bitoffset;
#ifdef DIVISION_DEBUG
printf(" unaligned sub: subtrahend for a[%d] = %#0*llx\n",
wordoffset+k,
BIGNUM_INT_BITS/4, (unsigned long long)add_word);
#endif
- subtmp = (BignumDblInt)a[wordoffset+k] +
- (add_word ^ BIGNUM_INT_MASK) + c;
- a[wordoffset+k] = (BignumInt)subtmp;
- c = subtmp >> BIGNUM_INT_BITS;
+ BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~add_word, c);
- add_word = (BignumInt)product >> (BIGNUM_INT_BITS - bitoffset);
+ add_word = product >> (BIGNUM_INT_BITS - bitoffset);
}
}
* subtract m, and increment the quotient.
*/
{
- BignumInt c = 1;
+ BignumCarry c = 1;
for (i = alen - 1; i >= 0; i--) {
int mindex = mlen-alen+i;
BignumInt mword = mindex < 0 ? 0 : m[mindex];
- BignumDblInt subtmp = (BignumDblInt)a[i] +
- ((BignumInt)mword ^ BIGNUM_INT_MASK) + c;
- a[i] = (BignumInt)subtmp;
- c = subtmp >> BIGNUM_INT_BITS;
+ BignumADC(a[i], c, a[i], ~mword, c);
}
}
if (quot)
/* now add in the addend, if any */
if (addend) {
- BignumDblInt carry = 0;
+ BignumCarry carry = 0;
for (i = 1; i <= rlen; i++) {
- carry += (i <= (int)ret[0] ? ret[i] : 0);
- carry += (i <= (int)addend[0] ? addend[i] : 0);
- ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
- carry >>= BIGNUM_INT_BITS;
+ BignumInt retword = (i <= (int)ret[0] ? ret[i] : 0);
+ BignumInt addword = (i <= (int)addend[0] ? addend[i] : 0);
+ BignumADC(ret[i], carry, retword, addword, carry);
if (ret[i] != 0 && i > maxspot)
maxspot = i;
}
int rlen = (alen > blen ? alen : blen) + 1;
int i, maxspot;
Bignum ret;
- BignumDblInt carry;
+ BignumCarry carry;
ret = newbn(rlen);
carry = 0;
maxspot = 0;
for (i = 1; i <= rlen; i++) {
- carry += (i <= (int)a[0] ? a[i] : 0);
- carry += (i <= (int)b[0] ? b[i] : 0);
- ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
- carry >>= BIGNUM_INT_BITS;
+ BignumInt aword = (i <= (int)a[0] ? a[i] : 0);
+ BignumInt bword = (i <= (int)b[0] ? b[i] : 0);
+ BignumADC(ret[i], carry, aword, bword, carry);
if (ret[i] != 0 && i > maxspot)
maxspot = i;
}
int rlen = (alen > blen ? alen : blen);
int i, maxspot;
Bignum ret;
- BignumDblInt carry;
+ BignumCarry carry;
ret = newbn(rlen);
carry = 1;
maxspot = 0;
for (i = 1; i <= rlen; i++) {
- carry += (i <= (int)a[0] ? a[i] : 0);
- carry += (i <= (int)b[0] ? b[i] ^ BIGNUM_INT_MASK : BIGNUM_INT_MASK);
- ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
- carry >>= BIGNUM_INT_BITS;
+ BignumInt aword = (i <= (int)a[0] ? a[i] : 0);
+ BignumInt bword = (i <= (int)b[0] ? b[i] : 0);
+ BignumADC(ret[i], carry, aword, ~bword, carry);
if (ret[i] != 0 && i > maxspot)
maxspot = i;
}
}
/*
- * Convert a (max 32-bit) long into a bignum.
+ * Convert an unsigned long into a bignum.
*/
-Bignum bignum_from_long(unsigned long nn)
+Bignum bignum_from_long(unsigned long n)
{
+ const int maxwords =
+ (sizeof(unsigned long) + sizeof(BignumInt) - 1) / sizeof(BignumInt);
Bignum ret;
- BignumDblInt n = nn;
+ int i;
+
+ ret = newbn(maxwords);
+ ret[0] = 0;
+ for (i = 0; i < maxwords; i++) {
+ ret[i+1] = n >> (i * BIGNUM_INT_BITS);
+ if (ret[i+1] != 0)
+ ret[0] = i+1;
+ }
- ret = newbn(3);
- ret[1] = (BignumInt)(n & BIGNUM_INT_MASK);
- ret[2] = (BignumInt)((n >> BIGNUM_INT_BITS) & BIGNUM_INT_MASK);
- ret[3] = 0;
- ret[0] = (ret[2] ? 2 : 1);
return ret;
}
/*
* Add a long to a bignum.
*/
-Bignum bignum_add_long(Bignum number, unsigned long addendx)
+Bignum bignum_add_long(Bignum number, unsigned long n)
{
- Bignum ret = newbn(number[0] + 1);
- int i, maxspot = 0;
- BignumDblInt carry = 0, addend = addendx;
+ const int maxwords =
+ (sizeof(unsigned long) + sizeof(BignumInt) - 1) / sizeof(BignumInt);
+ Bignum ret;
+ int words, i;
+ BignumCarry carry;
- for (i = 1; i <= (int)ret[0]; i++) {
- carry += addend & BIGNUM_INT_MASK;
- carry += (i <= (int)number[0] ? number[i] : 0);
- addend >>= BIGNUM_INT_BITS;
- ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
- carry >>= BIGNUM_INT_BITS;
- if (ret[i] != 0)
- maxspot = i;
+ words = number[0];
+ if (words < maxwords)
+ words = maxwords;
+ words++;
+ ret = newbn(words);
+
+ carry = 0;
+ ret[0] = 0;
+ for (i = 0; i < words; i++) {
+ BignumInt nword = (i < maxwords ? n >> (i * BIGNUM_INT_BITS) : 0);
+ BignumInt numword = (i < number[0] ? number[i+1] : 0);
+ BignumADC(ret[i+1], carry, numword, nword, carry);
+ if (ret[i+1] != 0)
+ ret[0] = i+1;
}
- ret[0] = maxspot;
return ret;
}
*/
unsigned short bignum_mod_short(Bignum number, unsigned short modulus)
{
- BignumDblInt mod, r;
+ unsigned long mod = modulus, r = 0;
+ /* Precompute (BIGNUM_INT_MASK+1) % mod */
+ unsigned long base_r = (BIGNUM_INT_MASK - modulus + 1) % mod;
int i;
- r = 0;
- mod = modulus;
- for (i = number[0]; i > 0; i--)
- r = (r * (BIGNUM_TOP_BIT % mod) * 2 + number[i] % mod) % mod;
+ for (i = number[0]; i > 0; i--) {
+ /*
+ * Conceptually, ((r << BIGNUM_INT_BITS) + number[i]) % mod
+ */
+ r = ((r * base_r) + (number[i] % mod)) % mod;
+ }
return (unsigned short) r;
}
{
int ndigits, ndigit;
int i, iszero;
- BignumDblInt carry;
+ BignumInt carry;
char *ret;
BignumInt *workspace;
iszero = 1;
carry = 0;
for (i = 0; i < (int)x[0]; i++) {
- carry = (carry << BIGNUM_INT_BITS) + workspace[i];
- workspace[i] = (BignumInt) (carry / 10);
+ /*
+ * Conceptually, we want to compute
+ *
+ * (carry << BIGNUM_INT_BITS) + workspace[i]
+ * -----------------------------------------
+ * 10
+ *
+ * but we don't have an integer type longer than BignumInt
+ * to work with. So we have to do it in pieces.
+ */
+
+ BignumInt q, r;
+ q = workspace[i] / 10;
+ r = workspace[i] % 10;
+
+ /* I want (BIGNUM_INT_MASK+1)/10 but can't say so directly! */
+ q += carry * ((BIGNUM_INT_MASK-9) / 10 + 1);
+ r += carry * ((BIGNUM_INT_MASK-9) % 10);
+
+ q += r / 10;
+ r %= 10;
+
+ workspace[i] = q;
+ carry = r;
+
if (workspace[i])
iszero = 0;
- carry %= 10;
}
ret[--ndigit] = (char) (carry + '0');
} while (!iszero);
}
/*
- * Addition of bigvals, not mod p.
+ * Core functions to do arithmetic mod p = 2^130-5. The whole
+ * collection of these, up to and including the surrounding #if, are
+ * generated automatically for various sizes of BignumInt by
+ * contrib/make1305.py.
*/
+
+#if BIGNUM_INT_BITS == 16
+
static void bigval_add(bigval *r, const bigval *a, const bigval *b)
{
-#if BIGNUM_INT_BITS == 64
- /* ./contrib/make1305.py add 64 */
- BignumDblInt acclo;
- acclo = 0;
- acclo += a->w[0];
- acclo += b->w[0];
- r->w[0] = acclo;
- acclo >>= 64;
- acclo += a->w[1];
- acclo += b->w[1];
- r->w[1] = acclo;
- acclo >>= 64;
- acclo += a->w[2];
- acclo += b->w[2];
- r->w[2] = acclo;
- acclo >>= 64;
-#elif BIGNUM_INT_BITS == 32
- /* ./contrib/make1305.py add 32 */
- BignumDblInt acclo;
- acclo = 0;
- acclo += a->w[0];
- acclo += b->w[0];
- r->w[0] = acclo;
- acclo >>= 32;
- acclo += a->w[1];
- acclo += b->w[1];
- r->w[1] = acclo;
- acclo >>= 32;
- acclo += a->w[2];
- acclo += b->w[2];
- r->w[2] = acclo;
- acclo >>= 32;
- acclo += a->w[3];
- acclo += b->w[3];
- r->w[3] = acclo;
- acclo >>= 32;
- acclo += a->w[4];
- acclo += b->w[4];
- r->w[4] = acclo;
- acclo >>= 32;
-#elif BIGNUM_INT_BITS == 16
- /* ./contrib/make1305.py add 16 */
- BignumDblInt acclo;
- acclo = 0;
- acclo += a->w[0];
- acclo += b->w[0];
- r->w[0] = acclo;
- acclo >>= 16;
- acclo += a->w[1];
- acclo += b->w[1];
- r->w[1] = acclo;
- acclo >>= 16;
- acclo += a->w[2];
- acclo += b->w[2];
- r->w[2] = acclo;
- acclo >>= 16;
- acclo += a->w[3];
- acclo += b->w[3];
- r->w[3] = acclo;
- acclo >>= 16;
- acclo += a->w[4];
- acclo += b->w[4];
- r->w[4] = acclo;
- acclo >>= 16;
- acclo += a->w[5];
- acclo += b->w[5];
- r->w[5] = acclo;
- acclo >>= 16;
- acclo += a->w[6];
- acclo += b->w[6];
- r->w[6] = acclo;
- acclo >>= 16;
- acclo += a->w[7];
- acclo += b->w[7];
- r->w[7] = acclo;
- acclo >>= 16;
- acclo += a->w[8];
- acclo += b->w[8];
- r->w[8] = acclo;
- acclo >>= 16;
-#else
-#error Run contrib/make1305.py again with a different bit count
-#endif
+ BignumInt v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14;
+ BignumInt v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26;
+ BignumCarry carry;
+
+ v0 = a->w[0];
+ v1 = a->w[1];
+ v2 = a->w[2];
+ v3 = a->w[3];
+ v4 = a->w[4];
+ v5 = a->w[5];
+ v6 = a->w[6];
+ v7 = a->w[7];
+ v8 = a->w[8];
+ v9 = b->w[0];
+ v10 = b->w[1];
+ v11 = b->w[2];
+ v12 = b->w[3];
+ v13 = b->w[4];
+ v14 = b->w[5];
+ v15 = b->w[6];
+ v16 = b->w[7];
+ v17 = b->w[8];
+ BignumADC(v18, carry, v0, v9, 0);
+ BignumADC(v19, carry, v1, v10, carry);
+ BignumADC(v20, carry, v2, v11, carry);
+ BignumADC(v21, carry, v3, v12, carry);
+ BignumADC(v22, carry, v4, v13, carry);
+ BignumADC(v23, carry, v5, v14, carry);
+ BignumADC(v24, carry, v6, v15, carry);
+ BignumADC(v25, carry, v7, v16, carry);
+ v26 = v8 + v17 + carry;
+ r->w[0] = v18;
+ r->w[1] = v19;
+ r->w[2] = v20;
+ r->w[3] = v21;
+ r->w[4] = v22;
+ r->w[5] = v23;
+ r->w[6] = v24;
+ r->w[7] = v25;
+ r->w[8] = v26;
}
-/*
- * Multiplication of bigvals mod p. Uses r as temporary storage, so
- * don't pass r aliasing a or b.
- */
static void bigval_mul_mod_p(bigval *r, const bigval *a, const bigval *b)
{
-#if BIGNUM_INT_BITS == 64
- /* ./contrib/make1305.py mul 64 */
- BignumDblInt tmp;
- BignumDblInt acclo;
- BignumDblInt acchi;
- BignumDblInt acc2lo;
- acclo = 0;
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 64;
- r->w[0] = acclo;
- acclo = acchi + (acclo >> 64);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 64;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 64;
- r->w[1] = acclo;
- acclo = acchi + (acclo >> 64);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 64;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 64;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 64;
- r->w[2] = acclo & (((BignumInt)1 << 2)-1);
- acc2lo = 0;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 62)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 64);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 64;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 64;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 62);
- acc2lo += r->w[0];
- r->w[0] = acc2lo;
- acc2lo >>= 64;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 62)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 64);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 64;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 62);
- acc2lo += r->w[1];
- r->w[1] = acc2lo;
- acc2lo >>= 64;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 0);
- acc2lo += r->w[2];
- r->w[2] = acc2lo;
- acc2lo = 0;
- acc2lo += ((acclo >> 4) & (((BignumInt)1 << 60)-1)) * ((BignumDblInt)25 << 0);
- acclo = acchi + (acclo >> 64);
- acchi = 0;
- acc2lo += (acclo & (((BignumInt)1 << 4)-1)) * ((BignumDblInt)25 << 60);
- acc2lo += r->w[0];
- r->w[0] = acc2lo;
- acc2lo >>= 64;
- acc2lo += ((acclo >> 4) & (((BignumInt)1 << 60)-1)) * ((BignumDblInt)25 << 0);
- acclo = acchi + (acclo >> 64);
- acchi = 0;
- acc2lo += r->w[1];
- r->w[1] = acc2lo;
- acc2lo >>= 64;
- acc2lo += r->w[2];
- r->w[2] = acc2lo;
- acc2lo >>= 64;
-#elif BIGNUM_INT_BITS == 32
- /* ./contrib/make1305.py mul 32 */
- BignumDblInt tmp;
- BignumDblInt acclo;
- BignumDblInt acchi;
- BignumDblInt acc2lo;
- acclo = 0;
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- r->w[0] = acclo;
- acclo = acchi + (acclo >> 32);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- r->w[1] = acclo;
- acclo = acchi + (acclo >> 32);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- r->w[2] = acclo;
- acclo = acchi + (acclo >> 32);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- r->w[3] = acclo;
- acclo = acchi + (acclo >> 32);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- r->w[4] = acclo & (((BignumInt)1 << 2)-1);
- acc2lo = 0;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 30)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 32);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 30);
- acc2lo += r->w[0];
- r->w[0] = acc2lo;
- acc2lo >>= 32;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 30)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 32);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 30);
- acc2lo += r->w[1];
- r->w[1] = acc2lo;
- acc2lo >>= 32;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 30)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 32);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 30);
- acc2lo += r->w[2];
- r->w[2] = acc2lo;
- acc2lo >>= 32;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 30)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 32);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 32;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 30);
- acc2lo += r->w[3];
- r->w[3] = acc2lo;
- acc2lo >>= 32;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 0);
- acc2lo += r->w[4];
- r->w[4] = acc2lo;
- acc2lo = 0;
- acc2lo += ((acclo >> 4) & (((BignumInt)1 << 28)-1)) * ((BignumDblInt)25 << 0);
- acclo = acchi + (acclo >> 32);
- acchi = 0;
- acc2lo += (acclo & (((BignumInt)1 << 4)-1)) * ((BignumDblInt)25 << 28);
- acc2lo += r->w[0];
- r->w[0] = acc2lo;
- acc2lo >>= 32;
- acc2lo += ((acclo >> 4) & (((BignumInt)1 << 28)-1)) * ((BignumDblInt)25 << 0);
- acclo = acchi + (acclo >> 32);
- acchi = 0;
- acc2lo += r->w[1];
- r->w[1] = acc2lo;
- acc2lo >>= 32;
- acc2lo += r->w[2];
- r->w[2] = acc2lo;
- acc2lo >>= 32;
- acc2lo += r->w[3];
- r->w[3] = acc2lo;
- acc2lo >>= 32;
- acc2lo += r->w[4];
- r->w[4] = acc2lo;
- acc2lo >>= 32;
-#elif BIGNUM_INT_BITS == 16
- /* ./contrib/make1305.py mul 16 */
- BignumDblInt tmp;
- BignumDblInt acclo;
- BignumDblInt acchi;
- BignumDblInt acc2lo;
- acclo = 0;
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- r->w[0] = acclo;
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- r->w[1] = acclo;
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- r->w[2] = acclo;
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- r->w[3] = acclo;
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- r->w[4] = acclo;
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[5]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[5]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- r->w[5] = acclo;
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[6]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[5]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[5]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[6]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- r->w[6] = acclo;
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[7]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[6]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[5]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[5]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[6]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[7]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- r->w[7] = acclo;
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[0]) * (b->w[8]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[7]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[6]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[5]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[5]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[6]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[7]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[8]) * (b->w[0]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- r->w[8] = acclo & (((BignumInt)1 << 2)-1);
- acc2lo = 0;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 14)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[1]) * (b->w[8]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[7]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[6]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[5]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[5]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[6]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[7]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[8]) * (b->w[1]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 14);
- acc2lo += r->w[0];
- r->w[0] = acc2lo;
- acc2lo >>= 16;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 14)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[2]) * (b->w[8]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[7]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[6]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[5]) * (b->w[5]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[6]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[7]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[8]) * (b->w[2]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 14);
- acc2lo += r->w[1];
- r->w[1] = acc2lo;
- acc2lo >>= 16;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 14)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[3]) * (b->w[8]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[7]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[5]) * (b->w[6]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[6]) * (b->w[5]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[7]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[8]) * (b->w[3]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 14);
- acc2lo += r->w[2];
- r->w[2] = acc2lo;
- acc2lo >>= 16;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 14)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[4]) * (b->w[8]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[5]) * (b->w[7]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[6]) * (b->w[6]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[7]) * (b->w[5]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[8]) * (b->w[4]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 14);
- acc2lo += r->w[3];
- r->w[3] = acc2lo;
- acc2lo >>= 16;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 14)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[5]) * (b->w[8]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[6]) * (b->w[7]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[7]) * (b->w[6]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[8]) * (b->w[5]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 14);
- acc2lo += r->w[4];
- r->w[4] = acc2lo;
- acc2lo >>= 16;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 14)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[6]) * (b->w[8]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[7]) * (b->w[7]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[8]) * (b->w[6]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 14);
- acc2lo += r->w[5];
- r->w[5] = acc2lo;
- acc2lo >>= 16;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 14)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[7]) * (b->w[8]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- tmp = (BignumDblInt)(a->w[8]) * (b->w[7]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 14);
- acc2lo += r->w[6];
- r->w[6] = acc2lo;
- acc2lo >>= 16;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 14)-1)) * ((BignumDblInt)5 << 0);
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- tmp = (BignumDblInt)(a->w[8]) * (b->w[8]);
- acclo += tmp & BIGNUM_INT_MASK;
- acchi += tmp >> 16;
- acc2lo += (acclo & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 14);
- acc2lo += r->w[7];
- r->w[7] = acc2lo;
- acc2lo >>= 16;
- acc2lo += ((acclo >> 2) & (((BignumInt)1 << 2)-1)) * ((BignumDblInt)5 << 0);
- acc2lo += r->w[8];
- r->w[8] = acc2lo;
- acc2lo = 0;
- acc2lo += ((acclo >> 4) & (((BignumInt)1 << 12)-1)) * ((BignumDblInt)25 << 0);
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- acc2lo += (acclo & (((BignumInt)1 << 4)-1)) * ((BignumDblInt)25 << 12);
- acc2lo += r->w[0];
- r->w[0] = acc2lo;
- acc2lo >>= 16;
- acc2lo += ((acclo >> 4) & (((BignumInt)1 << 12)-1)) * ((BignumDblInt)25 << 0);
- acclo = acchi + (acclo >> 16);
- acchi = 0;
- acc2lo += r->w[1];
- r->w[1] = acc2lo;
- acc2lo >>= 16;
- acc2lo += r->w[2];
- r->w[2] = acc2lo;
- acc2lo >>= 16;
- acc2lo += r->w[3];
- r->w[3] = acc2lo;
- acc2lo >>= 16;
- acc2lo += r->w[4];
- r->w[4] = acc2lo;
- acc2lo >>= 16;
- acc2lo += r->w[5];
- r->w[5] = acc2lo;
- acc2lo >>= 16;
- acc2lo += r->w[6];
- r->w[6] = acc2lo;
- acc2lo >>= 16;
- acc2lo += r->w[7];
- r->w[7] = acc2lo;
- acc2lo >>= 16;
- acc2lo += r->w[8];
- r->w[8] = acc2lo;
- acc2lo >>= 16;
-#else
-#error Run contrib/make1305.py again with a different bit count
-#endif
+ BignumInt v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14;
+ BignumInt v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27;
+ BignumInt v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40;
+ BignumInt v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53;
+ BignumInt v54, v55, v56, v57, v58, v59, v60, v61, v62, v63, v64, v65, v66;
+ BignumInt v67, v68, v69, v70, v71, v72, v73, v74, v75, v76, v77, v78, v79;
+ BignumInt v80, v81, v82, v83, v84, v85, v86, v87, v88, v89, v90, v91, v92;
+ BignumInt v93, v94, v95, v96, v97, v98, v99, v100, v101, v102, v103, v104;
+ BignumInt v105, v106, v107, v108, v109, v110, v111, v112, v113, v114;
+ BignumInt v115, v116, v117, v118, v119, v120, v121, v122, v123, v124;
+ BignumInt v125, v126, v127, v128, v129, v130, v131, v132, v133, v134;
+ BignumInt v135, v136, v137, v138, v139, v140, v141, v142, v143, v144;
+ BignumInt v145, v146, v147, v148, v149, v150, v151, v152, v153, v154;
+ BignumInt v155, v156, v157, v158, v159, v160, v161, v162, v163, v164;
+ BignumInt v165, v166, v167, v168, v169, v170, v171, v172, v173, v174;
+ BignumInt v175, v176, v177, v178, v180, v181, v182, v183, v184, v185;
+ BignumInt v186, v187, v188, v189, v190, v191, v192, v193, v194, v195;
+ BignumInt v196, v197, v198, v199, v200, v201, v202, v203, v204, v205;
+ BignumInt v206, v207, v208, v210, v212, v213, v214, v215, v216, v217;
+ BignumInt v218, v219, v220, v221, v222, v223, v224, v225, v226, v227;
+ BignumInt v228, v229;
+ BignumCarry carry;
+
+ v0 = a->w[0];
+ v1 = a->w[1];
+ v2 = a->w[2];
+ v3 = a->w[3];
+ v4 = a->w[4];
+ v5 = a->w[5];
+ v6 = a->w[6];
+ v7 = a->w[7];
+ v8 = a->w[8];
+ v9 = b->w[0];
+ v10 = b->w[1];
+ v11 = b->w[2];
+ v12 = b->w[3];
+ v13 = b->w[4];
+ v14 = b->w[5];
+ v15 = b->w[6];
+ v16 = b->w[7];
+ v17 = b->w[8];
+ BignumMUL(v19, v18, v0, v9);
+ BignumMULADD(v21, v20, v0, v10, v19);
+ BignumMULADD(v23, v22, v0, v11, v21);
+ BignumMULADD(v25, v24, v0, v12, v23);
+ BignumMULADD(v27, v26, v0, v13, v25);
+ BignumMULADD(v29, v28, v0, v14, v27);
+ BignumMULADD(v31, v30, v0, v15, v29);
+ BignumMULADD(v33, v32, v0, v16, v31);
+ BignumMULADD(v35, v34, v0, v17, v33);
+ BignumMULADD(v37, v36, v1, v9, v20);
+ BignumMULADD2(v39, v38, v1, v10, v22, v37);
+ BignumMULADD2(v41, v40, v1, v11, v24, v39);
+ BignumMULADD2(v43, v42, v1, v12, v26, v41);
+ BignumMULADD2(v45, v44, v1, v13, v28, v43);
+ BignumMULADD2(v47, v46, v1, v14, v30, v45);
+ BignumMULADD2(v49, v48, v1, v15, v32, v47);
+ BignumMULADD2(v51, v50, v1, v16, v34, v49);
+ BignumMULADD2(v53, v52, v1, v17, v35, v51);
+ BignumMULADD(v55, v54, v2, v9, v38);
+ BignumMULADD2(v57, v56, v2, v10, v40, v55);
+ BignumMULADD2(v59, v58, v2, v11, v42, v57);
+ BignumMULADD2(v61, v60, v2, v12, v44, v59);
+ BignumMULADD2(v63, v62, v2, v13, v46, v61);
+ BignumMULADD2(v65, v64, v2, v14, v48, v63);
+ BignumMULADD2(v67, v66, v2, v15, v50, v65);
+ BignumMULADD2(v69, v68, v2, v16, v52, v67);
+ BignumMULADD2(v71, v70, v2, v17, v53, v69);
+ BignumMULADD(v73, v72, v3, v9, v56);
+ BignumMULADD2(v75, v74, v3, v10, v58, v73);
+ BignumMULADD2(v77, v76, v3, v11, v60, v75);
+ BignumMULADD2(v79, v78, v3, v12, v62, v77);
+ BignumMULADD2(v81, v80, v3, v13, v64, v79);
+ BignumMULADD2(v83, v82, v3, v14, v66, v81);
+ BignumMULADD2(v85, v84, v3, v15, v68, v83);
+ BignumMULADD2(v87, v86, v3, v16, v70, v85);
+ BignumMULADD2(v89, v88, v3, v17, v71, v87);
+ BignumMULADD(v91, v90, v4, v9, v74);
+ BignumMULADD2(v93, v92, v4, v10, v76, v91);
+ BignumMULADD2(v95, v94, v4, v11, v78, v93);
+ BignumMULADD2(v97, v96, v4, v12, v80, v95);
+ BignumMULADD2(v99, v98, v4, v13, v82, v97);
+ BignumMULADD2(v101, v100, v4, v14, v84, v99);
+ BignumMULADD2(v103, v102, v4, v15, v86, v101);
+ BignumMULADD2(v105, v104, v4, v16, v88, v103);
+ BignumMULADD2(v107, v106, v4, v17, v89, v105);
+ BignumMULADD(v109, v108, v5, v9, v92);
+ BignumMULADD2(v111, v110, v5, v10, v94, v109);
+ BignumMULADD2(v113, v112, v5, v11, v96, v111);
+ BignumMULADD2(v115, v114, v5, v12, v98, v113);
+ BignumMULADD2(v117, v116, v5, v13, v100, v115);
+ BignumMULADD2(v119, v118, v5, v14, v102, v117);
+ BignumMULADD2(v121, v120, v5, v15, v104, v119);
+ BignumMULADD2(v123, v122, v5, v16, v106, v121);
+ BignumMULADD2(v125, v124, v5, v17, v107, v123);
+ BignumMULADD(v127, v126, v6, v9, v110);
+ BignumMULADD2(v129, v128, v6, v10, v112, v127);
+ BignumMULADD2(v131, v130, v6, v11, v114, v129);
+ BignumMULADD2(v133, v132, v6, v12, v116, v131);
+ BignumMULADD2(v135, v134, v6, v13, v118, v133);
+ BignumMULADD2(v137, v136, v6, v14, v120, v135);
+ BignumMULADD2(v139, v138, v6, v15, v122, v137);
+ BignumMULADD2(v141, v140, v6, v16, v124, v139);
+ BignumMULADD2(v143, v142, v6, v17, v125, v141);
+ BignumMULADD(v145, v144, v7, v9, v128);
+ BignumMULADD2(v147, v146, v7, v10, v130, v145);
+ BignumMULADD2(v149, v148, v7, v11, v132, v147);
+ BignumMULADD2(v151, v150, v7, v12, v134, v149);
+ BignumMULADD2(v153, v152, v7, v13, v136, v151);
+ BignumMULADD2(v155, v154, v7, v14, v138, v153);
+ BignumMULADD2(v157, v156, v7, v15, v140, v155);
+ BignumMULADD2(v159, v158, v7, v16, v142, v157);
+ BignumMULADD2(v161, v160, v7, v17, v143, v159);
+ BignumMULADD(v163, v162, v8, v9, v146);
+ BignumMULADD2(v165, v164, v8, v10, v148, v163);
+ BignumMULADD2(v167, v166, v8, v11, v150, v165);
+ BignumMULADD2(v169, v168, v8, v12, v152, v167);
+ BignumMULADD2(v171, v170, v8, v13, v154, v169);
+ BignumMULADD2(v173, v172, v8, v14, v156, v171);
+ BignumMULADD2(v175, v174, v8, v15, v158, v173);
+ BignumMULADD2(v177, v176, v8, v16, v160, v175);
+ v178 = v8 * v17 + v161 + v177;
+ v180 = (v162) & ((((BignumInt)1) << 2)-1);
+ v181 = ((v162) >> 2) | ((v164) << 14);
+ v182 = ((v164) >> 2) | ((v166) << 14);
+ v183 = ((v166) >> 2) | ((v168) << 14);
+ v184 = ((v168) >> 2) | ((v170) << 14);
+ v185 = ((v170) >> 2) | ((v172) << 14);
+ v186 = ((v172) >> 2) | ((v174) << 14);
+ v187 = ((v174) >> 2) | ((v176) << 14);
+ v188 = ((v176) >> 2) | ((v178) << 14);
+ v189 = (v178) >> 2;
+ v190 = (v189) & ((((BignumInt)1) << 2)-1);
+ v191 = (v178) >> 4;
+ BignumMUL(v193, v192, 5, v181);
+ BignumMULADD(v195, v194, 5, v182, v193);
+ BignumMULADD(v197, v196, 5, v183, v195);
+ BignumMULADD(v199, v198, 5, v184, v197);
+ BignumMULADD(v201, v200, 5, v185, v199);
+ BignumMULADD(v203, v202, 5, v186, v201);
+ BignumMULADD(v205, v204, 5, v187, v203);
+ BignumMULADD(v207, v206, 5, v188, v205);
+ v208 = 5 * v190 + v207;
+ v210 = 25 * v191;
+ BignumADC(v212, carry, v18, v192, 0);
+ BignumADC(v213, carry, v36, v194, carry);
+ BignumADC(v214, carry, v54, v196, carry);
+ BignumADC(v215, carry, v72, v198, carry);
+ BignumADC(v216, carry, v90, v200, carry);
+ BignumADC(v217, carry, v108, v202, carry);
+ BignumADC(v218, carry, v126, v204, carry);
+ BignumADC(v219, carry, v144, v206, carry);
+ v220 = v180 + v208 + carry;
+ BignumADC(v221, carry, v212, v210, 0);
+ BignumADC(v222, carry, v213, 0, carry);
+ BignumADC(v223, carry, v214, 0, carry);
+ BignumADC(v224, carry, v215, 0, carry);
+ BignumADC(v225, carry, v216, 0, carry);
+ BignumADC(v226, carry, v217, 0, carry);
+ BignumADC(v227, carry, v218, 0, carry);
+ BignumADC(v228, carry, v219, 0, carry);
+ v229 = v220 + 0 + carry;
+ r->w[0] = v221;
+ r->w[1] = v222;
+ r->w[2] = v223;
+ r->w[3] = v224;
+ r->w[4] = v225;
+ r->w[5] = v226;
+ r->w[6] = v227;
+ r->w[7] = v228;
+ r->w[8] = v229;
}
static void bigval_final_reduce(bigval *n)
{
-#if BIGNUM_INT_BITS == 64
- /* ./contrib/make1305.py final_reduce 64 */
- BignumDblInt acclo;
- acclo = 0;
- acclo += 5 * ((n->w[2] >> 2) + 1);
- acclo += n->w[0];
- acclo >>= 64;
- acclo += n->w[1];
- acclo >>= 64;
- acclo += n->w[2];
- acclo = 5 * (acclo >> 2);
- acclo += n->w[0];
- n->w[0] = acclo;
- acclo >>= 64;
- acclo += n->w[1];
- n->w[1] = acclo;
- acclo >>= 64;
- acclo += n->w[2];
- n->w[2] = acclo;
- acclo >>= 64;
- n->w[2] &= (1 << 2) - 1;
+ BignumInt v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v12, v13, v14, v15;
+ BignumInt v16, v17, v18, v19, v20, v21, v22, v24, v25, v26, v27, v28, v29;
+ BignumInt v30, v31, v32, v33;
+ BignumCarry carry;
+
+ v0 = n->w[0];
+ v1 = n->w[1];
+ v2 = n->w[2];
+ v3 = n->w[3];
+ v4 = n->w[4];
+ v5 = n->w[5];
+ v6 = n->w[6];
+ v7 = n->w[7];
+ v8 = n->w[8];
+ v9 = (v8) >> 2;
+ v10 = 5 * v9;
+ BignumADC(v12, carry, v0, v10, 0);
+ (void)v12;
+ BignumADC(v13, carry, v1, 0, carry);
+ (void)v13;
+ BignumADC(v14, carry, v2, 0, carry);
+ (void)v14;
+ BignumADC(v15, carry, v3, 0, carry);
+ (void)v15;
+ BignumADC(v16, carry, v4, 0, carry);
+ (void)v16;
+ BignumADC(v17, carry, v5, 0, carry);
+ (void)v17;
+ BignumADC(v18, carry, v6, 0, carry);
+ (void)v18;
+ BignumADC(v19, carry, v7, 0, carry);
+ (void)v19;
+ v20 = v8 + 0 + carry;
+ v21 = (v20) >> 2;
+ v22 = 5 * v21;
+ BignumADC(v24, carry, v0, v22, 0);
+ BignumADC(v25, carry, v1, 0, carry);
+ BignumADC(v26, carry, v2, 0, carry);
+ BignumADC(v27, carry, v3, 0, carry);
+ BignumADC(v28, carry, v4, 0, carry);
+ BignumADC(v29, carry, v5, 0, carry);
+ BignumADC(v30, carry, v6, 0, carry);
+ BignumADC(v31, carry, v7, 0, carry);
+ v32 = v8 + 0 + carry;
+ v33 = (v32) & ((((BignumInt)1) << 2)-1);
+ n->w[0] = v24;
+ n->w[1] = v25;
+ n->w[2] = v26;
+ n->w[3] = v27;
+ n->w[4] = v28;
+ n->w[5] = v29;
+ n->w[6] = v30;
+ n->w[7] = v31;
+ n->w[8] = v33;
+}
+
#elif BIGNUM_INT_BITS == 32
- /* ./contrib/make1305.py final_reduce 32 */
- BignumDblInt acclo;
- acclo = 0;
- acclo += 5 * ((n->w[4] >> 2) + 1);
- acclo += n->w[0];
- acclo >>= 32;
- acclo += n->w[1];
- acclo >>= 32;
- acclo += n->w[2];
- acclo >>= 32;
- acclo += n->w[3];
- acclo >>= 32;
- acclo += n->w[4];
- acclo = 5 * (acclo >> 2);
- acclo += n->w[0];
- n->w[0] = acclo;
- acclo >>= 32;
- acclo += n->w[1];
- n->w[1] = acclo;
- acclo >>= 32;
- acclo += n->w[2];
- n->w[2] = acclo;
- acclo >>= 32;
- acclo += n->w[3];
- n->w[3] = acclo;
- acclo >>= 32;
- acclo += n->w[4];
- n->w[4] = acclo;
- acclo >>= 32;
- n->w[4] &= (1 << 2) - 1;
-#elif BIGNUM_INT_BITS == 16
- /* ./contrib/make1305.py final_reduce 16 */
- BignumDblInt acclo;
- acclo = 0;
- acclo += 5 * ((n->w[8] >> 2) + 1);
- acclo += n->w[0];
- acclo >>= 16;
- acclo += n->w[1];
- acclo >>= 16;
- acclo += n->w[2];
- acclo >>= 16;
- acclo += n->w[3];
- acclo >>= 16;
- acclo += n->w[4];
- acclo >>= 16;
- acclo += n->w[5];
- acclo >>= 16;
- acclo += n->w[6];
- acclo >>= 16;
- acclo += n->w[7];
- acclo >>= 16;
- acclo += n->w[8];
- acclo = 5 * (acclo >> 2);
- acclo += n->w[0];
- n->w[0] = acclo;
- acclo >>= 16;
- acclo += n->w[1];
- n->w[1] = acclo;
- acclo >>= 16;
- acclo += n->w[2];
- n->w[2] = acclo;
- acclo >>= 16;
- acclo += n->w[3];
- n->w[3] = acclo;
- acclo >>= 16;
- acclo += n->w[4];
- n->w[4] = acclo;
- acclo >>= 16;
- acclo += n->w[5];
- n->w[5] = acclo;
- acclo >>= 16;
- acclo += n->w[6];
- n->w[6] = acclo;
- acclo >>= 16;
- acclo += n->w[7];
- n->w[7] = acclo;
- acclo >>= 16;
- acclo += n->w[8];
- n->w[8] = acclo;
- acclo >>= 16;
- n->w[8] &= (1 << 2) - 1;
+
+static void bigval_add(bigval *r, const bigval *a, const bigval *b)
+{
+ BignumInt v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14;
+ BignumCarry carry;
+
+ v0 = a->w[0];
+ v1 = a->w[1];
+ v2 = a->w[2];
+ v3 = a->w[3];
+ v4 = a->w[4];
+ v5 = b->w[0];
+ v6 = b->w[1];
+ v7 = b->w[2];
+ v8 = b->w[3];
+ v9 = b->w[4];
+ BignumADC(v10, carry, v0, v5, 0);
+ BignumADC(v11, carry, v1, v6, carry);
+ BignumADC(v12, carry, v2, v7, carry);
+ BignumADC(v13, carry, v3, v8, carry);
+ v14 = v4 + v9 + carry;
+ r->w[0] = v10;
+ r->w[1] = v11;
+ r->w[2] = v12;
+ r->w[3] = v13;
+ r->w[4] = v14;
+}
+
+static void bigval_mul_mod_p(bigval *r, const bigval *a, const bigval *b)
+{
+ BignumInt v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14;
+ BignumInt v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27;
+ BignumInt v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40;
+ BignumInt v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53;
+ BignumInt v54, v55, v56, v57, v58, v60, v61, v62, v63, v64, v65, v66, v67;
+ BignumInt v68, v69, v70, v71, v72, v73, v74, v75, v76, v78, v80, v81, v82;
+ BignumInt v83, v84, v85, v86, v87, v88, v89;
+ BignumCarry carry;
+
+ v0 = a->w[0];
+ v1 = a->w[1];
+ v2 = a->w[2];
+ v3 = a->w[3];
+ v4 = a->w[4];
+ v5 = b->w[0];
+ v6 = b->w[1];
+ v7 = b->w[2];
+ v8 = b->w[3];
+ v9 = b->w[4];
+ BignumMUL(v11, v10, v0, v5);
+ BignumMULADD(v13, v12, v0, v6, v11);
+ BignumMULADD(v15, v14, v0, v7, v13);
+ BignumMULADD(v17, v16, v0, v8, v15);
+ BignumMULADD(v19, v18, v0, v9, v17);
+ BignumMULADD(v21, v20, v1, v5, v12);
+ BignumMULADD2(v23, v22, v1, v6, v14, v21);
+ BignumMULADD2(v25, v24, v1, v7, v16, v23);
+ BignumMULADD2(v27, v26, v1, v8, v18, v25);
+ BignumMULADD2(v29, v28, v1, v9, v19, v27);
+ BignumMULADD(v31, v30, v2, v5, v22);
+ BignumMULADD2(v33, v32, v2, v6, v24, v31);
+ BignumMULADD2(v35, v34, v2, v7, v26, v33);
+ BignumMULADD2(v37, v36, v2, v8, v28, v35);
+ BignumMULADD2(v39, v38, v2, v9, v29, v37);
+ BignumMULADD(v41, v40, v3, v5, v32);
+ BignumMULADD2(v43, v42, v3, v6, v34, v41);
+ BignumMULADD2(v45, v44, v3, v7, v36, v43);
+ BignumMULADD2(v47, v46, v3, v8, v38, v45);
+ BignumMULADD2(v49, v48, v3, v9, v39, v47);
+ BignumMULADD(v51, v50, v4, v5, v42);
+ BignumMULADD2(v53, v52, v4, v6, v44, v51);
+ BignumMULADD2(v55, v54, v4, v7, v46, v53);
+ BignumMULADD2(v57, v56, v4, v8, v48, v55);
+ v58 = v4 * v9 + v49 + v57;
+ v60 = (v50) & ((((BignumInt)1) << 2)-1);
+ v61 = ((v50) >> 2) | ((v52) << 30);
+ v62 = ((v52) >> 2) | ((v54) << 30);
+ v63 = ((v54) >> 2) | ((v56) << 30);
+ v64 = ((v56) >> 2) | ((v58) << 30);
+ v65 = (v58) >> 2;
+ v66 = (v65) & ((((BignumInt)1) << 2)-1);
+ v67 = (v58) >> 4;
+ BignumMUL(v69, v68, 5, v61);
+ BignumMULADD(v71, v70, 5, v62, v69);
+ BignumMULADD(v73, v72, 5, v63, v71);
+ BignumMULADD(v75, v74, 5, v64, v73);
+ v76 = 5 * v66 + v75;
+ v78 = 25 * v67;
+ BignumADC(v80, carry, v10, v68, 0);
+ BignumADC(v81, carry, v20, v70, carry);
+ BignumADC(v82, carry, v30, v72, carry);
+ BignumADC(v83, carry, v40, v74, carry);
+ v84 = v60 + v76 + carry;
+ BignumADC(v85, carry, v80, v78, 0);
+ BignumADC(v86, carry, v81, 0, carry);
+ BignumADC(v87, carry, v82, 0, carry);
+ BignumADC(v88, carry, v83, 0, carry);
+ v89 = v84 + 0 + carry;
+ r->w[0] = v85;
+ r->w[1] = v86;
+ r->w[2] = v87;
+ r->w[3] = v88;
+ r->w[4] = v89;
+}
+
+static void bigval_final_reduce(bigval *n)
+{
+ BignumInt v0, v1, v2, v3, v4, v5, v6, v8, v9, v10, v11, v12, v13, v14;
+ BignumInt v16, v17, v18, v19, v20, v21;
+ BignumCarry carry;
+
+ v0 = n->w[0];
+ v1 = n->w[1];
+ v2 = n->w[2];
+ v3 = n->w[3];
+ v4 = n->w[4];
+ v5 = (v4) >> 2;
+ v6 = 5 * v5;
+ BignumADC(v8, carry, v0, v6, 0);
+ (void)v8;
+ BignumADC(v9, carry, v1, 0, carry);
+ (void)v9;
+ BignumADC(v10, carry, v2, 0, carry);
+ (void)v10;
+ BignumADC(v11, carry, v3, 0, carry);
+ (void)v11;
+ v12 = v4 + 0 + carry;
+ v13 = (v12) >> 2;
+ v14 = 5 * v13;
+ BignumADC(v16, carry, v0, v14, 0);
+ BignumADC(v17, carry, v1, 0, carry);
+ BignumADC(v18, carry, v2, 0, carry);
+ BignumADC(v19, carry, v3, 0, carry);
+ v20 = v4 + 0 + carry;
+ v21 = (v20) & ((((BignumInt)1) << 2)-1);
+ n->w[0] = v16;
+ n->w[1] = v17;
+ n->w[2] = v18;
+ n->w[3] = v19;
+ n->w[4] = v21;
+}
+
+#elif BIGNUM_INT_BITS == 64
+
+static void bigval_add(bigval *r, const bigval *a, const bigval *b)
+{
+ BignumInt v0, v1, v2, v3, v4, v5, v6, v7, v8;
+ BignumCarry carry;
+
+ v0 = a->w[0];
+ v1 = a->w[1];
+ v2 = a->w[2];
+ v3 = b->w[0];
+ v4 = b->w[1];
+ v5 = b->w[2];
+ BignumADC(v6, carry, v0, v3, 0);
+ BignumADC(v7, carry, v1, v4, carry);
+ v8 = v2 + v5 + carry;
+ r->w[0] = v6;
+ r->w[1] = v7;
+ r->w[2] = v8;
+}
+
+static void bigval_mul_mod_p(bigval *r, const bigval *a, const bigval *b)
+{
+ BignumInt v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14;
+ BignumInt v15, v16, v17, v18, v19, v20, v21, v22, v24, v25, v26, v27, v28;
+ BignumInt v29, v30, v31, v32, v33, v34, v36, v38, v39, v40, v41, v42, v43;
+ BignumCarry carry;
+
+ v0 = a->w[0];
+ v1 = a->w[1];
+ v2 = a->w[2];
+ v3 = b->w[0];
+ v4 = b->w[1];
+ v5 = b->w[2];
+ BignumMUL(v7, v6, v0, v3);
+ BignumMULADD(v9, v8, v0, v4, v7);
+ BignumMULADD(v11, v10, v0, v5, v9);
+ BignumMULADD(v13, v12, v1, v3, v8);
+ BignumMULADD2(v15, v14, v1, v4, v10, v13);
+ BignumMULADD2(v17, v16, v1, v5, v11, v15);
+ BignumMULADD(v19, v18, v2, v3, v14);
+ BignumMULADD2(v21, v20, v2, v4, v16, v19);
+ v22 = v2 * v5 + v17 + v21;
+ v24 = (v18) & ((((BignumInt)1) << 2)-1);
+ v25 = ((v18) >> 2) | ((v20) << 62);
+ v26 = ((v20) >> 2) | ((v22) << 62);
+ v27 = (v22) >> 2;
+ v28 = (v27) & ((((BignumInt)1) << 2)-1);
+ v29 = (v22) >> 4;
+ BignumMUL(v31, v30, 5, v25);
+ BignumMULADD(v33, v32, 5, v26, v31);
+ v34 = 5 * v28 + v33;
+ v36 = 25 * v29;
+ BignumADC(v38, carry, v6, v30, 0);
+ BignumADC(v39, carry, v12, v32, carry);
+ v40 = v24 + v34 + carry;
+ BignumADC(v41, carry, v38, v36, 0);
+ BignumADC(v42, carry, v39, 0, carry);
+ v43 = v40 + 0 + carry;
+ r->w[0] = v41;
+ r->w[1] = v42;
+ r->w[2] = v43;
+}
+
+static void bigval_final_reduce(bigval *n)
+{
+ BignumInt v0, v1, v2, v3, v4, v6, v7, v8, v9, v10, v12, v13, v14, v15;
+ BignumCarry carry;
+
+ v0 = n->w[0];
+ v1 = n->w[1];
+ v2 = n->w[2];
+ v3 = (v2) >> 2;
+ v4 = 5 * v3;
+ BignumADC(v6, carry, v0, v4, 0);
+ (void)v6;
+ BignumADC(v7, carry, v1, 0, carry);
+ (void)v7;
+ v8 = v2 + 0 + carry;
+ v9 = (v8) >> 2;
+ v10 = 5 * v9;
+ BignumADC(v12, carry, v0, v10, 0);
+ BignumADC(v13, carry, v1, 0, carry);
+ v14 = v2 + 0 + carry;
+ v15 = (v14) & ((((BignumInt)1) << 2)-1);
+ n->w[0] = v12;
+ n->w[1] = v13;
+ n->w[2] = v15;
+}
+
#else
-#error Run contrib/make1305.py again with a different bit count
+#error Add another bit count to contrib/make1305.py and rerun it
#endif
-}
struct poly1305 {
unsigned char nonce[16];