xtensa: clean up word alignment macros in assembly code (fbb871e2) · Commits · jan.koester / Linux

arch/xtensa/include/asm/asmmacro.h

+33 −0

Original line number	Original line	Diff line number	Diff line
	@@ -158,4 +158,37 @@
	.previous \		.previous \
	97:		97:


			/*
			* Extract unaligned word that is split between two registers w0 and w1
			* into r regardless of machine endianness. SAR must be loaded with the
			* starting bit of the word (see __ssa8).
			*/

			.macro __src_b r, w0, w1
			#ifdef __XTENSA_EB__
			src \r, \w0, \w1
			#else
			src \r, \w1, \w0
			#endif
			.endm

			/*
			* Load 2 lowest address bits of r into SAR for __src_b to extract unaligned
			* word starting at r from two registers loaded from consecutive aligned
			* addresses covering r regardless of machine endianness.
			*
			* r 0 1 2 3
			* LE SAR 0 8 16 24
			* BE SAR 32 24 16 8
			*/

			.macro __ssa8 r
			#ifdef __XTENSA_EB__
			ssa8b \r
			#else
			ssa8l \r
			#endif
			.endm

	#endif /* _XTENSA_ASMMACRO_H */		#endif /* _XTENSA_ASMMACRO_H */

arch/xtensa/kernel/align.S

+1 −4

Original line number	Original line	Diff line number	Diff line
	@@ -19,6 +19,7 @@
	#include <linux/linkage.h>		#include <linux/linkage.h>
	#include <asm/current.h>		#include <asm/current.h>
	#include <asm/asm-offsets.h>		#include <asm/asm-offsets.h>
			#include <asm/asmmacro.h>
	#include <asm/processor.h>		#include <asm/processor.h>

	#if XCHAL_UNALIGNED_LOAD_EXCEPTION \|\| XCHAL_UNALIGNED_STORE_EXCEPTION		#if XCHAL_UNALIGNED_LOAD_EXCEPTION \|\| XCHAL_UNALIGNED_STORE_EXCEPTION
	@@ -66,8 +67,6 @@
	#define INSN_T 24		#define INSN_T 24
	#define INSN_OP1 16		#define INSN_OP1 16

	.macro __src_b r, w0, w1; src \r, \w0, \w1; .endm
	.macro __ssa8 r; ssa8b \r; .endm
	.macro __ssa8r r; ssa8l \r; .endm		.macro __ssa8r r; ssa8l \r; .endm
	.macro __sh r, s; srl \r, \s; .endm		.macro __sh r, s; srl \r, \s; .endm
	.macro __sl r, s; sll \r, \s; .endm		.macro __sl r, s; sll \r, \s; .endm
	@@ -81,8 +80,6 @@
	#define INSN_T 4		#define INSN_T 4
	#define INSN_OP1 12		#define INSN_OP1 12

	.macro __src_b r, w0, w1; src \r, \w1, \w0; .endm
	.macro __ssa8 r; ssa8l \r; .endm
	.macro __ssa8r r; ssa8b \r; .endm		.macro __ssa8r r; ssa8b \r; .endm
	.macro __sh r, s; sll \r, \s; .endm		.macro __sh r, s; sll \r, \s; .endm
	.macro __sl r, s; srl \r, \s; .endm		.macro __sl r, s; srl \r, \s; .endm

arch/xtensa/lib/memcopy.S

+17 −32

Original line number	Original line	Diff line number	Diff line
	@@ -10,22 +10,7 @@
	*/		*/

	#include <variant/core.h>		#include <variant/core.h>
			#include <asm/asmmacro.h>
	.macro src_b r, w0, w1
	#ifdef __XTENSA_EB__
	src \r, \w0, \w1
	#else
	src \r, \w1, \w0
	#endif
	.endm

	.macro ssa8 r
	#ifdef __XTENSA_EB__
	ssa8b \r
	#else
	ssa8l \r
	#endif
	.endm

	/*		/*
	* void memcpy(void dst, const void *src, size_t len);		* void memcpy(void dst, const void *src, size_t len);
	@@ -209,7 +194,7 @@ memcpy:
	.Lsrcunaligned:		.Lsrcunaligned:
	_beqz a4, .Ldone # avoid loading anything for zero-length copies		_beqz a4, .Ldone # avoid loading anything for zero-length copies
	# copy 16 bytes per iteration for word-aligned dst and unaligned src		# copy 16 bytes per iteration for word-aligned dst and unaligned src
	ssa8 a3 # set shift amount from byte offset		__ssa8 a3 # set shift amount from byte offset

	/* set to 1 when running on ISS (simulator) with the		/* set to 1 when running on ISS (simulator) with the
	lint or ferret client, or 0 to save a few cycles */		lint or ferret client, or 0 to save a few cycles */
	@@ -229,16 +214,16 @@ memcpy:
	.Loop2:		.Loop2:
	l32i a7, a3, 4		l32i a7, a3, 4
	l32i a8, a3, 8		l32i a8, a3, 8
	src_b a6, a6, a7		__src_b a6, a6, a7
	s32i a6, a5, 0		s32i a6, a5, 0
	l32i a9, a3, 12		l32i a9, a3, 12
	src_b a7, a7, a8		__src_b a7, a7, a8
	s32i a7, a5, 4		s32i a7, a5, 4
	l32i a6, a3, 16		l32i a6, a3, 16
	src_b a8, a8, a9		__src_b a8, a8, a9
	s32i a8, a5, 8		s32i a8, a5, 8
	addi a3, a3, 16		addi a3, a3, 16
	src_b a9, a9, a6		__src_b a9, a9, a6
	s32i a9, a5, 12		s32i a9, a5, 12
	addi a5, a5, 16		addi a5, a5, 16
	#if !XCHAL_HAVE_LOOPS		#if !XCHAL_HAVE_LOOPS
	@@ -249,10 +234,10 @@ memcpy:
	# copy 8 bytes		# copy 8 bytes
	l32i a7, a3, 4		l32i a7, a3, 4
	l32i a8, a3, 8		l32i a8, a3, 8
	src_b a6, a6, a7		__src_b a6, a6, a7
	s32i a6, a5, 0		s32i a6, a5, 0
	addi a3, a3, 8		addi a3, a3, 8
	src_b a7, a7, a8		__src_b a7, a7, a8
	s32i a7, a5, 4		s32i a7, a5, 4
	addi a5, a5, 8		addi a5, a5, 8
	mov a6, a8		mov a6, a8
	@@ -261,7 +246,7 @@ memcpy:
	# copy 4 bytes		# copy 4 bytes
	l32i a7, a3, 4		l32i a7, a3, 4
	addi a3, a3, 4		addi a3, a3, 4
	src_b a6, a6, a7		__src_b a6, a6, a7
	s32i a6, a5, 0		s32i a6, a5, 0
	addi a5, a5, 4		addi a5, a5, 4
	mov a6, a7		mov a6, a7
	@@ -485,7 +470,7 @@ memmove:
	.Lbacksrcunaligned:		.Lbacksrcunaligned:
	_beqz a4, .Lbackdone # avoid loading anything for zero-length copies		_beqz a4, .Lbackdone # avoid loading anything for zero-length copies
	# copy 16 bytes per iteration for word-aligned dst and unaligned src		# copy 16 bytes per iteration for word-aligned dst and unaligned src
	ssa8 a3 # set shift amount from byte offset		__ssa8 a3 # set shift amount from byte offset
	#define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with		#define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with
	* the lint or ferret client, or 0		* the lint or ferret client, or 0
	* to save a few cycles */		* to save a few cycles */
	@@ -506,15 +491,15 @@ memmove:
	l32i a7, a3, 12		l32i a7, a3, 12
	l32i a8, a3, 8		l32i a8, a3, 8
	addi a5, a5, -16		addi a5, a5, -16
	src_b a6, a7, a6		__src_b a6, a7, a6
	s32i a6, a5, 12		s32i a6, a5, 12
	l32i a9, a3, 4		l32i a9, a3, 4
	src_b a7, a8, a7		__src_b a7, a8, a7
	s32i a7, a5, 8		s32i a7, a5, 8
	l32i a6, a3, 0		l32i a6, a3, 0
	src_b a8, a9, a8		__src_b a8, a9, a8
	s32i a8, a5, 4		s32i a8, a5, 4
	src_b a9, a6, a9		__src_b a9, a6, a9
	s32i a9, a5, 0		s32i a9, a5, 0
	#if !XCHAL_HAVE_LOOPS		#if !XCHAL_HAVE_LOOPS
	bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start		bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
	@@ -526,9 +511,9 @@ memmove:
	l32i a7, a3, 4		l32i a7, a3, 4
	l32i a8, a3, 0		l32i a8, a3, 0
	addi a5, a5, -8		addi a5, a5, -8
	src_b a6, a7, a6		__src_b a6, a7, a6
	s32i a6, a5, 4		s32i a6, a5, 4
	src_b a7, a8, a7		__src_b a7, a8, a7
	s32i a7, a5, 0		s32i a7, a5, 0
	mov a6, a8		mov a6, a8
	.Lback12:		.Lback12:
	@@ -537,7 +522,7 @@ memmove:
	addi a3, a3, -4		addi a3, a3, -4
	l32i a7, a3, 0		l32i a7, a3, 0
	addi a5, a5, -4		addi a5, a5, -4
	src_b a6, a7, a6		__src_b a6, a7, a6
	s32i a6, a5, 0		s32i a6, a5, 0
	mov a6, a7		mov a6, a7
	.Lback13:		.Lback13:

arch/xtensa/lib/usercopy.S

+8 −16

Original line number	Original line	Diff line number	Diff line
	@@ -56,14 +56,6 @@
	#include <variant/core.h>		#include <variant/core.h>
	#include <asm/asmmacro.h>		#include <asm/asmmacro.h>

	#ifdef __XTENSA_EB__
	#define ALIGN(R, W0, W1) src R, W0, W1
	#define SSA8(R) ssa8b R
	#else
	#define ALIGN(R, W0, W1) src R, W1, W0
	#define SSA8(R) ssa8l R
	#endif

	.text		.text
	.align 4		.align 4
	.global __xtensa_copy_user		.global __xtensa_copy_user
	@@ -81,7 +73,7 @@ __xtensa_copy_user:
	# per iteration		# per iteration
	movi a8, 3 # if source is also aligned,		movi a8, 3 # if source is also aligned,
	bnone a3, a8, .Laligned # then use word copy		bnone a3, a8, .Laligned # then use word copy
	SSA8( a3) # set shift amount from byte offset		__ssa8 a3 # set shift amount from byte offset
	bnez a4, .Lsrcunaligned		bnez a4, .Lsrcunaligned
	movi a2, 0 # return success for len==0		movi a2, 0 # return success for len==0
	retw		retw
	@@ -220,16 +212,16 @@ EX(10f) l32i a6, a3, 0 # load first word
	.Loop2:		.Loop2:
	EX(10f) l32i a7, a3, 4		EX(10f) l32i a7, a3, 4
	EX(10f) l32i a8, a3, 8		EX(10f) l32i a8, a3, 8
	ALIGN( a6, a6, a7)		__src_b a6, a6, a7
	EX(10f) s32i a6, a5, 0		EX(10f) s32i a6, a5, 0
	EX(10f) l32i a9, a3, 12		EX(10f) l32i a9, a3, 12
	ALIGN( a7, a7, a8)		__src_b a7, a7, a8
	EX(10f) s32i a7, a5, 4		EX(10f) s32i a7, a5, 4
	EX(10f) l32i a6, a3, 16		EX(10f) l32i a6, a3, 16
	ALIGN( a8, a8, a9)		__src_b a8, a8, a9
	EX(10f) s32i a8, a5, 8		EX(10f) s32i a8, a5, 8
	addi a3, a3, 16		addi a3, a3, 16
	ALIGN( a9, a9, a6)		__src_b a9, a9, a6
	EX(10f) s32i a9, a5, 12		EX(10f) s32i a9, a5, 12
	addi a5, a5, 16		addi a5, a5, 16
	#if !XCHAL_HAVE_LOOPS		#if !XCHAL_HAVE_LOOPS
	@@ -240,10 +232,10 @@ EX(10f) s32i a9, a5, 12
	# copy 8 bytes		# copy 8 bytes
	EX(10f) l32i a7, a3, 4		EX(10f) l32i a7, a3, 4
	EX(10f) l32i a8, a3, 8		EX(10f) l32i a8, a3, 8
	ALIGN( a6, a6, a7)		__src_b a6, a6, a7
	EX(10f) s32i a6, a5, 0		EX(10f) s32i a6, a5, 0
	addi a3, a3, 8		addi a3, a3, 8
	ALIGN( a7, a7, a8)		__src_b a7, a7, a8
	EX(10f) s32i a7, a5, 4		EX(10f) s32i a7, a5, 4
	addi a5, a5, 8		addi a5, a5, 8
	mov a6, a8		mov a6, a8
	@@ -252,7 +244,7 @@ EX(10f) s32i a7, a5, 4
	# copy 4 bytes		# copy 4 bytes
	EX(10f) l32i a7, a3, 4		EX(10f) l32i a7, a3, 4
	addi a3, a3, 4		addi a3, a3, 4
	ALIGN( a6, a6, a7)		__src_b a6, a6, a7
	EX(10f) s32i a6, a5, 0		EX(10f) s32i a6, a5, 0
	addi a5, a5, 4		addi a5, a5, 4
	mov a6, a7		mov a6, a7