diff options
author | Heiko Carstens <hca@linux.ibm.com> | 2024-11-06 11:03:06 +0100 |
---|---|---|
committer | Heiko Carstens <hca@linux.ibm.com> | 2024-11-12 14:01:28 +0100 |
commit | c8603d692857a43e6f8b7b5eccab1d9aec48bd12 (patch) | |
tree | 24d87c6b7096962c9e2464e984aaf5cf0172ec9e /arch/s390/include/asm | |
parent | 01bfb451a3e9b7005d626964588d89b699749147 (diff) | |
download | linux-c8603d692857a43e6f8b7b5eccab1d9aec48bd12.tar.gz linux-c8603d692857a43e6f8b7b5eccab1d9aec48bd12.tar.bz2 linux-c8603d692857a43e6f8b7b5eccab1d9aec48bd12.zip |
s390/cmpxchg: Convert one and two byte case inline assemblies to C
Rewrite __cmpxchg() in order to get rid of the large inline
assemblies. Convert the one and two byte inline assemblies to
C functions.
The generated code of the new implementation is nearly as good or bad as
the old variant, but easier to read.
Note that the new variants are quite close to the generic cmpxchg_emu_u8()
implementation, however a conversion to the generic variant will not follow
since with mm/vmstat.c there is heavy user of one byte cmpxchg(). A not
inlined variant would have a negative performance impact.
Also note that the calls within __arch_cmpxchg() come with rather pointless
"& 0xff..." operations. They exist only to avoid false positive sparse
warnings like "warning: cast truncates bits from constant value ...".
Reviewed-by: Juergen Christ <jchrist@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/include/asm')
-rw-r--r-- | arch/s390/include/asm/cmpxchg.h | 162 |
1 files changed, 73 insertions, 89 deletions
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index aae0315374de..587529673399 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -84,106 +84,90 @@ __arch_xchg(unsigned long x, unsigned long address, int size) void __cmpxchg_called_with_bad_pointer(void); -static __always_inline unsigned long __cmpxchg(unsigned long address, - unsigned long old, - unsigned long new, int size) +static __always_inline u32 __cs_asm(u64 ptr, u32 old, u32 new) { - switch (size) { - case 1: { - unsigned int prev, shift, mask; + asm volatile( + " cs %[old],%[new],%[ptr]\n" + : [old] "+d" (old), [ptr] "+Q" (*(u32 *)ptr) + : [new] "d" (new) + : "memory", "cc"); + return old; +} - shift = (3 ^ (address & 3)) << 3; - address ^= address & 3; - old = (old & 0xff) << shift; - new = (new & 0xff) << shift; - mask = ~(0xff << shift); - asm volatile( - " l %[prev],%[address]\n" - " nr %[prev],%[mask]\n" - " xilf %[mask],0xffffffff\n" - " or %[new],%[prev]\n" - " or %[prev],%[tmp]\n" - "0: lr %[tmp],%[prev]\n" - " cs %[prev],%[new],%[address]\n" - " jnl 1f\n" - " xr %[tmp],%[prev]\n" - " xr %[new],%[tmp]\n" - " nr %[tmp],%[mask]\n" - " jz 0b\n" - "1:" - : [prev] "=&d" (prev), - [address] "+Q" (*(int *)address), - [tmp] "+&d" (old), - [new] "+&d" (new), - [mask] "+&d" (mask) - :: "memory", "cc"); - return prev >> shift; - } - case 2: { - unsigned int prev, shift, mask; +static __always_inline u64 __csg_asm(u64 ptr, u64 old, u64 new) +{ + asm volatile( + " csg %[old],%[new],%[ptr]\n" + : [old] "+d" (old), [ptr] "+QS" (*(u64 *)ptr) + : [new] "d" (new) + : "memory", "cc"); + return old; +} - shift = (2 ^ (address & 2)) << 3; - address ^= address & 2; - old = (old & 0xffff) << shift; - new = (new & 0xffff) << shift; - mask = ~(0xffff << shift); - asm volatile( - " l %[prev],%[address]\n" - " nr %[prev],%[mask]\n" - " xilf %[mask],0xffffffff\n" - " or %[new],%[prev]\n" - " or %[prev],%[tmp]\n" - "0: lr %[tmp],%[prev]\n" - " cs %[prev],%[new],%[address]\n" - " jnl 1f\n" - " xr %[tmp],%[prev]\n" - " xr %[new],%[tmp]\n" - " nr %[tmp],%[mask]\n" - " jz 0b\n" - "1:" - : [prev] "=&d" (prev), - [address] "+Q" (*(int *)address), - [tmp] "+&d" (old), - [new] "+&d" (new), - [mask] "+&d" (mask) - :: "memory", "cc"); - return prev >> shift; - } - case 4: { - unsigned int prev = old; +static inline u8 __arch_cmpxchg1(u64 ptr, u8 old, u8 new) +{ + union { + u8 b[4]; + u32 w; + } old32, new32; + u32 prev; + int i; + + i = ptr & 3; + ptr &= ~0x3; + prev = READ_ONCE(*(u32 *)ptr); + do { + old32.w = prev; + if (old32.b[i] != old) + return old32.b[i]; + new32.w = old32.w; + new32.b[i] = new; + prev = __cs_asm(ptr, old32.w, new32.w); + } while (prev != old32.w); + return old; +} - asm volatile( - " cs %[prev],%[new],%[address]\n" - : [prev] "+&d" (prev), - [address] "+Q" (*(int *)address) - : [new] "d" (new) - : "memory", "cc"); - return prev; - } - case 8: { - unsigned long prev = old; +static inline u16 __arch_cmpxchg2(u64 ptr, u16 old, u16 new) +{ + union { + u16 b[2]; + u32 w; + } old32, new32; + u32 prev; + int i; + + i = (ptr & 3) >> 1; + ptr &= ~0x3; + prev = READ_ONCE(*(u32 *)ptr); + do { + old32.w = prev; + if (old32.b[i] != old) + return old32.b[i]; + new32.w = old32.w; + new32.b[i] = new; + prev = __cs_asm(ptr, old32.w, new32.w); + } while (prev != old32.w); + return old; +} - asm volatile( - " csg %[prev],%[new],%[address]\n" - : [prev] "+&d" (prev), - [address] "+QS" (*(long *)address) - : [new] "d" (new) - : "memory", "cc"); - return prev; - } +static __always_inline u64 __arch_cmpxchg(u64 ptr, u64 old, u64 new, int size) +{ + switch (size) { + case 1: return __arch_cmpxchg1(ptr, old & 0xff, new & 0xff); + case 2: return __arch_cmpxchg2(ptr, old & 0xffff, new & 0xffff); + case 4: return __cs_asm(ptr, old & 0xffffffff, new & 0xffffffff); + case 8: return __csg_asm(ptr, old, new); + default: __cmpxchg_called_with_bad_pointer(); } - __cmpxchg_called_with_bad_pointer(); return old; } #define arch_cmpxchg(ptr, o, n) \ ({ \ - __typeof__(*(ptr)) __ret; \ - \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg((unsigned long)(ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr))); \ - __ret; \ + (__typeof__(*(ptr)))__arch_cmpxchg((unsigned long)(ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr))); \ }) #define arch_cmpxchg64 arch_cmpxchg |