Rewrite to use the `reorder' assembly mode and remove manually scheduled
delay slots except where GAS cannot schedule a delay-slot instruction
due to a data dependency or a section switch (as is the case with the EX
macro). No change in machine code produced.
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
[paul.burton@mips.com:
Fix conflict with commit
932afdeec18b ("MIPS: Add Kconfig variable for
CPUs with unaligned load/store instructions")]
Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/20834/
Cc: Ralf Baechle <ralf@linux-mips.org>
.endif
sltiu t0, a2, STORSIZE /* very small region? */
.endif
sltiu t0, a2, STORSIZE /* very small region? */
bnez t0, .Lsmall_memset\@
andi t0, a0, STORMASK /* aligned? */
bnez t0, .Lsmall_memset\@
andi t0, a0, STORMASK /* aligned? */
#ifdef CONFIG_CPU_MICROMIPS
move t8, a1 /* used by 'swp' instruction */
move t9, a1
#endif
#ifdef CONFIG_CPU_MICROMIPS
move t8, a1 /* used by 'swp' instruction */
move t9, a1
#endif
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
beqz t0, 1f
PTR_SUBU t0, STORSIZE /* alignment in bytes */
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
beqz t0, 1f
PTR_SUBU t0, STORSIZE /* alignment in bytes */
PTR_SUBU t0, AT /* alignment in bytes */
.set at
#endif
PTR_SUBU t0, AT /* alignment in bytes */
.set at
#endif
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
R10KCBARRIER(0(ra))
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
R10KCBARRIER(0(ra))
#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define STORE_BYTE(N) \
EX(sb, a1, N(a0), .Lbyte_fixup\@); \
#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define STORE_BYTE(N) \
EX(sb, a1, N(a0), .Lbyte_fixup\@); \
+ PTR_ADDU t0, 1; \
+ .set reorder;
PTR_ADDU a2, t0 /* correct size */
PTR_ADDU t0, 1
PTR_ADDU a2, t0 /* correct size */
PTR_ADDU t0, 1
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
1: ori t1, a2, 0x3f /* # of full blocks */
xori t1, 0x3f
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
1: ori t1, a2, 0x3f /* # of full blocks */
xori t1, 0x3f
+ andi t0, a2, 0x40-STORSIZE
beqz t1, .Lmemset_partial\@ /* no block to fill */
beqz t1, .Lmemset_partial\@ /* no block to fill */
- andi t0, a2, 0x40-STORSIZE
PTR_ADDU t1, a0 /* end address */
PTR_ADDU t1, a0 /* end address */
1: PTR_ADDIU a0, 64
R10KCBARRIER(0(ra))
f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
bne t1, a0, 1b
1: PTR_ADDIU a0, 64
R10KCBARRIER(0(ra))
f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
bne t1, a0, 1b
.Lmemset_partial\@:
R10KCBARRIER(0(ra))
.Lmemset_partial\@:
R10KCBARRIER(0(ra))
PTR_SUBU t1, AT
.set at
#endif
PTR_SUBU t1, AT
.set at
#endif
+ PTR_ADDU a0, t0 /* dest ptr */
- PTR_ADDU a0, t0 /* dest ptr */
- .set push
- .set noreorder
- .set nomacro
/* ... but first do longs ... */
f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
/* ... but first do longs ... */
f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
-2: .set pop
- andi a2, STORMASK /* At most one long to go */
+2: andi a2, STORMASK /* At most one long to go */
beqz a2, 1f
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
PTR_ADDU a0, a2 /* What's left */
beqz a2, 1f
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
PTR_ADDU a0, a2 /* What's left */
R10KCBARRIER(0(ra))
#ifdef __MIPSEB__
EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
R10KCBARRIER(0(ra))
#ifdef __MIPSEB__
EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
#endif
#else
PTR_SUBU t0, $0, a2
#endif
#else
PTR_SUBU t0, $0, a2
move a2, zero /* No remaining longs */
PTR_ADDIU t0, 1
STORE_BYTE(0)
move a2, zero /* No remaining longs */
PTR_ADDIU t0, 1
STORE_BYTE(0)
-1: jr ra
- move a2, zero
+1: move a2, zero
+ jr ra
1: PTR_ADDIU a0, 1 /* fill bytewise */
R10KCBARRIER(0(ra))
1: PTR_ADDIU a0, 1 /* fill bytewise */
R10KCBARRIER(0(ra))
bne t1, a0, 1b
EX(sb, a1, -1(a0), .Lsmall_fixup\@)
bne t1, a0, 1b
EX(sb, a1, -1(a0), .Lsmall_fixup\@)
-2: jr ra /* done */
- move a2, zero
+2: move a2, zero
+ jr ra /* done */
.if __memset == 1
END(memset)
.set __memset, 0
.if __memset == 1
END(memset)
.set __memset, 0
* a2 = a2 - t0 + 1
*/
PTR_SUBU a2, t0
* a2 = a2 - t0 + 1
*/
PTR_SUBU a2, t0
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
.Lfirst_fixup\@:
/* unset_bytes already in a2 */
jr ra
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
.Lfirst_fixup\@:
/* unset_bytes already in a2 */
jr ra
andi a2, 0x3f
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, t1
andi a2, 0x3f
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, t1
andi a2, STORMASK
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, a0
andi a2, STORMASK
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, a0
.Llast_fixup\@:
/* unset_bytes already in a2 */
jr ra
.Llast_fixup\@:
/* unset_bytes already in a2 */
jr ra
.Lsmall_fixup\@:
/*
* unset_bytes = end_addr - current_addr + 1
* a2 = t1 - a0 + 1
*/
.Lsmall_fixup\@:
/*
* unset_bytes = end_addr - current_addr + 1
* a2 = t1 - a0 + 1
*/
PTR_SUBU a2, t1, a0
PTR_ADDIU a2, 1
jr ra
PTR_SUBU a2, t1, a0
PTR_ADDIU a2, 1
jr ra
LEAF(memset)
EXPORT_SYMBOL(memset)
LEAF(memset)
EXPORT_SYMBOL(memset)
+ move v0, a0 /* result */
- move v0, a0 /* result */
andi a1, 0xff /* spread fillword */
LONG_SLL t1, a1, 8
andi a1, 0xff /* spread fillword */
LONG_SLL t1, a1, 8