]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
ARCv2: lib: introduce memcpy optimized for unaligned access
authorEugeniy Paltsev <eugeniy.paltsev@synopsys.com>
Wed, 30 Jan 2019 16:32:43 +0000 (19:32 +0300)
committerVineet Gupta <vgupta@synopsys.com>
Mon, 25 Feb 2019 16:52:16 +0000 (08:52 -0800)
Optimise code to use efficient unaligned memory access which is
available on ARCv2. This allows us to really simplify memcpy code
and speed up the code one and a half times (in case of unaligned
source or destination).

Don't wire it up yet !

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
arch/arc/lib/memcpy-archs-unaligned.S [new file with mode: 0644]

diff --git a/arch/arc/lib/memcpy-archs-unaligned.S b/arch/arc/lib/memcpy-archs-unaligned.S
new file mode 100644 (file)
index 0000000..28993a7
--- /dev/null
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * ARCv2 memcpy implementation optimized for unaligned memory access using.
+ *
+ * Copyright (C) 2019 Synopsys
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#include <linux/linkage.h>
+
+#ifdef CONFIG_ARC_HAS_LL64
+# define LOADX(DST,RX)         ldd.ab  DST, [RX, 8]
+# define STOREX(SRC,RX)                std.ab  SRC, [RX, 8]
+# define ZOLSHFT               5
+# define ZOLAND                        0x1F
+#else
+# define LOADX(DST,RX)         ld.ab   DST, [RX, 4]
+# define STOREX(SRC,RX)                st.ab   SRC, [RX, 4]
+# define ZOLSHFT               4
+# define ZOLAND                        0xF
+#endif
+
+ENTRY_CFI(memcpy)
+       mov     r3, r0          ; don;t clobber ret val
+
+       lsr.f   lp_count, r2, ZOLSHFT
+       lpnz    @.Lcopy32_64bytes
+       ;; LOOP START
+       LOADX   (r6, r1)
+       LOADX   (r8, r1)
+       LOADX   (r10, r1)
+       LOADX   (r4, r1)
+       STOREX  (r6, r3)
+       STOREX  (r8, r3)
+       STOREX  (r10, r3)
+       STOREX  (r4, r3)
+.Lcopy32_64bytes:
+
+       and.f   lp_count, r2, ZOLAND ;Last remaining 31 bytes
+       lpnz    @.Lcopyremainingbytes
+       ;; LOOP START
+       ldb.ab  r5, [r1, 1]
+       stb.ab  r5, [r3, 1]
+.Lcopyremainingbytes:
+
+       j       [blink]
+END_CFI(memcpy)