Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.
There's no need to use RBP as a temporary register for the TBL value,
because it always stores the same value: the address of the K256 table.
Instead just reference the address of K256 directly.
Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
e = %edx # clobbers NUM_BLKS
y3 = %esi # clobbers INP
e = %edx # clobbers NUM_BLKS
y3 = %esi # clobbers INP
SRND = CTX # SRND is same register as CTX
a = %eax
SRND = CTX # SRND is same register as CTX
a = %eax
ENTRY(sha256_transform_rorx)
.align 32
pushq %rbx
ENTRY(sha256_transform_rorx)
.align 32
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r12
pushq %r13
pushq %r14
mov CTX, _CTX(%rsp)
loop0:
mov CTX, _CTX(%rsp)
loop0:
## Load first 16 dwords from two blocks
VMOVDQ 0*32(INP),XTMP0
VMOVDQ 1*32(INP),XTMP1
## Load first 16 dwords from two blocks
VMOVDQ 0*32(INP),XTMP0
VMOVDQ 1*32(INP),XTMP1
- vpaddd 0*32(TBL, SRND), X0, XFER
+ vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 0*32
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 0*32
- vpaddd 1*32(TBL, SRND), X0, XFER
+ vpaddd K256+1*32(SRND), X0, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 1*32
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 1*32
- vpaddd 2*32(TBL, SRND), X0, XFER
+ vpaddd K256+2*32(SRND), X0, XFER
vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 2*32
vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 2*32
- vpaddd 3*32(TBL, SRND), X0, XFER
+ vpaddd K256+3*32(SRND), X0, XFER
vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 3*32
vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 3*32
loop2:
## Do last 16 rounds with no scheduling
loop2:
## Do last 16 rounds with no scheduling
- vpaddd 0*32(TBL, SRND), X0, XFER
+ vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 0*32
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 0*32
- vpaddd 1*32(TBL, SRND), X1, XFER
+
+ vpaddd K256+1*32(SRND), X1, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 1*32
add $2*32, SRND
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 1*32
add $2*32, SRND
ja done_hash
do_last_block:
ja done_hash
do_last_block:
- #### do last block
- lea K256(%rip), TBL
-
VMOVDQ 0*16(INP),XWORD0
VMOVDQ 1*16(INP),XWORD1
VMOVDQ 2*16(INP),XWORD2
VMOVDQ 0*16(INP),XWORD0
VMOVDQ 1*16(INP),XWORD1
VMOVDQ 2*16(INP),XWORD2
popq %r14
popq %r13
popq %r12
popq %r14
popq %r13
popq %r12
popq %rbx
ret
ENDPROC(sha256_transform_rorx)
popq %rbx
ret
ENDPROC(sha256_transform_rorx)