]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
powerpc/32: Optimise __csum_partial()
authorChristophe Leroy <christophe.leroy@c-s.fr>
Thu, 24 May 2018 11:22:27 +0000 (11:22 +0000)
committerMichael Ellerman <mpe@ellerman.id.au>
Sun, 3 Jun 2018 14:39:19 +0000 (00:39 +1000)
Improve __csum_partial by interleaving loads and adds.

On a 8xx, it brings neither improvement nor degradation.
On a 83xx, it brings a 25% improvement.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/lib/checksum_32.S

index 9a671c774b2255379b2d752efd381767a2dd30d4..422d66e938e3794858db59238e8fac84e9529eba 100644 (file)
@@ -47,16 +47,25 @@ _GLOBAL(__csum_partial)
        bdnz    2b
 21:    srwi.   r6,r4,4         /* # blocks of 4 words to do */
        beq     3f
+       lwz     r0,4(r3)
        mtctr   r6
-22:    lwz     r0,4(r3)
        lwz     r6,8(r3)
+       adde    r5,r5,r0
        lwz     r7,12(r3)
+       adde    r5,r5,r6
        lwzu    r8,16(r3)
+       adde    r5,r5,r7
+       bdz     23f
+22:    lwz     r0,4(r3)
+       adde    r5,r5,r8
+       lwz     r6,8(r3)
        adde    r5,r5,r0
+       lwz     r7,12(r3)
        adde    r5,r5,r6
+       lwzu    r8,16(r3)
        adde    r5,r5,r7
-       adde    r5,r5,r8
        bdnz    22b
+23:    adde    r5,r5,r8
 3:     andi.   r0,r4,2
        beq+    4f
        lhz     r0,4(r3)