s390/cmpxchg: Convert one and two byte case inline assemblies to C

Rewrite __cmpxchg() in order to get rid of the large inline assemblies. Convert the one and two byte inline assemblies to C functions. The generated code of the new implementation is nearly as good or bad as the old variant, but easier to read. Note that the new variants are quite close to the generic cmpxchg_emu_u8() implementation, however a conversion to the generic variant will not follow since with mm/vmstat.c there is heavy user of one byte cmpxchg(). A not inlined variant would have a negative performance impact. Also note that the calls within __arch_cmpxchg() come with rather pointless "& 0xff..." operations. They exist only to avoid false positive sparse warnings like "warning: cast truncates bits from constant value ...". Reviewed-by: Juergen Christ <jchrist@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
author: Heiko Carstens <hca@linux.ibm.com> 2024-11-06 11:03:06 +0100
committer: Heiko Carstens <hca@linux.ibm.com> 2024-11-12 14:01:28 +0100
commit: c8603d692857a43e6f8b7b5eccab1d9aec48bd12 (patch)
tree: 24d87c6b7096962c9e2464e984aaf5cf0172ec9e /arch/s390/include/asm
parent: 01bfb451a3e9b7005d626964588d89b699749147 (diff)
download: linux-c8603d692857a43e6f8b7b5eccab1d9aec48bd12.tar.gz
linux-c8603d692857a43e6f8b7b5eccab1d9aec48bd12.tar.bz2
linux-c8603d692857a43e6f8b7b5eccab1d9aec48bd12.zip
1 files changed, 73 insertions, 89 deletions
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index aae0315374de..587529673399 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -84,106 +84,90 @@ __arch_xchg(unsigned long x, unsigned long address, int size)
 
 void __cmpxchg_called_with_bad_pointer(void);
 
-static __always_inline unsigned long __cmpxchg(unsigned long address,
-					       unsigned long old,
-					       unsigned long new, int size)
+static __always_inline u32 __cs_asm(u64 ptr, u32 old, u32 new)
 {
-	switch (size) {
-	case 1: {
-		unsigned int prev, shift, mask;
+	asm volatile(
+		"	cs	%[old],%[new],%[ptr]\n"
+		: [old] "+d" (old), [ptr] "+Q" (*(u32 *)ptr)
+		: [new] "d" (new)
+		: "memory", "cc");
+	return old;
+}
 
-		shift = (3 ^ (address & 3)) << 3;
-		address ^= address & 3;
-		old = (old & 0xff) << shift;
-		new = (new & 0xff) << shift;
-		mask = ~(0xff << shift);
-		asm volatile(
-			"	l	%[prev],%[address]\n"
-			"	nr	%[prev],%[mask]\n"
-			"	xilf	%[mask],0xffffffff\n"
-			"	or	%[new],%[prev]\n"
-			"	or	%[prev],%[tmp]\n"
-			"0:	lr	%[tmp],%[prev]\n"
-			"	cs	%[prev],%[new],%[address]\n"
-			"	jnl	1f\n"
-			"	xr	%[tmp],%[prev]\n"
-			"	xr	%[new],%[tmp]\n"
-			"	nr	%[tmp],%[mask]\n"
-			"	jz	0b\n"
-			"1:"
-			: [prev] "=&d" (prev),
-			  [address] "+Q" (*(int *)address),
-			  [tmp] "+&d" (old),
-			  [new] "+&d" (new),
-			  [mask] "+&d" (mask)
-			:: "memory", "cc");
-		return prev >> shift;
-	}
-	case 2: {
-		unsigned int prev, shift, mask;
+static __always_inline u64 __csg_asm(u64 ptr, u64 old, u64 new)
+{
+	asm volatile(
+		"	csg	%[old],%[new],%[ptr]\n"
+		: [old] "+d" (old), [ptr] "+QS" (*(u64 *)ptr)
+		: [new] "d" (new)
+		: "memory", "cc");
+	return old;
+}
 
-		shift = (2 ^ (address & 2)) << 3;
-		address ^= address & 2;
-		old = (old & 0xffff) << shift;
-		new = (new & 0xffff) << shift;
-		mask = ~(0xffff << shift);
-		asm volatile(
-			"	l	%[prev],%[address]\n"
-			"	nr	%[prev],%[mask]\n"
-			"	xilf	%[mask],0xffffffff\n"
-			"	or	%[new],%[prev]\n"
-			"	or	%[prev],%[tmp]\n"
-			"0:	lr	%[tmp],%[prev]\n"
-			"	cs	%[prev],%[new],%[address]\n"
-			"	jnl	1f\n"
-			"	xr	%[tmp],%[prev]\n"
-			"	xr	%[new],%[tmp]\n"
-			"	nr	%[tmp],%[mask]\n"
-			"	jz	0b\n"
-			"1:"
-			: [prev] "=&d" (prev),
-			  [address] "+Q" (*(int *)address),
-			  [tmp] "+&d" (old),
-			  [new] "+&d" (new),
-			  [mask] "+&d" (mask)
-			:: "memory", "cc");
-		return prev >> shift;
-	}
-	case 4: {
-		unsigned int prev = old;
+static inline u8 __arch_cmpxchg1(u64 ptr, u8 old, u8 new)
+{
+	union {
+		u8 b[4];
+		u32 w;
+	} old32, new32;
+	u32 prev;
+	int i;
+
+	i = ptr & 3;
+	ptr &= ~0x3;
+	prev = READ_ONCE(*(u32 *)ptr);
+	do {
+		old32.w = prev;
+		if (old32.b[i] != old)
+			return old32.b[i];
+		new32.w = old32.w;
+		new32.b[i] = new;
+		prev = __cs_asm(ptr, old32.w, new32.w);
+	} while (prev != old32.w);
+	return old;
+}
 
-		asm volatile(
-			"	cs	%[prev],%[new],%[address]\n"
-			: [prev] "+&d" (prev),
-			  [address] "+Q" (*(int *)address)
-			: [new] "d" (new)
-			: "memory", "cc");
-		return prev;
-	}
-	case 8: {
-		unsigned long prev = old;
+static inline u16 __arch_cmpxchg2(u64 ptr, u16 old, u16 new)
+{
+	union {
+		u16 b[2];
+		u32 w;
+	} old32, new32;
+	u32 prev;
+	int i;
+
+	i = (ptr & 3) >> 1;
+	ptr &= ~0x3;
+	prev = READ_ONCE(*(u32 *)ptr);
+	do {
+		old32.w = prev;
+		if (old32.b[i] != old)
+			return old32.b[i];
+		new32.w = old32.w;
+		new32.b[i] = new;
+		prev = __cs_asm(ptr, old32.w, new32.w);
+	} while (prev != old32.w);
+	return old;
+}
 
-		asm volatile(
-			"	csg	%[prev],%[new],%[address]\n"
-			: [prev] "+&d" (prev),
-			  [address] "+QS" (*(long *)address)
-			: [new] "d" (new)
-			: "memory", "cc");
-		return prev;
-	}
+static __always_inline u64 __arch_cmpxchg(u64 ptr, u64 old, u64 new, int size)
+{
+	switch (size) {
+	case 1:	 return __arch_cmpxchg1(ptr, old & 0xff, new & 0xff);
+	case 2:  return __arch_cmpxchg2(ptr, old & 0xffff, new & 0xffff);
+	case 4:  return __cs_asm(ptr, old & 0xffffffff, new & 0xffffffff);
+	case 8:  return __csg_asm(ptr, old, new);
+	default: __cmpxchg_called_with_bad_pointer();
 	}
-	__cmpxchg_called_with_bad_pointer();
 	return old;
 }
 
 #define arch_cmpxchg(ptr, o, n)						\
 ({									\
-	__typeof__(*(ptr)) __ret;					\
-									\
-	__ret = (__typeof__(*(ptr)))					\
-		__cmpxchg((unsigned long)(ptr), (unsigned long)(o),	\
-			  (unsigned long)(n), sizeof(*(ptr)));		\
-	__ret;								\
+	(__typeof__(*(ptr)))__arch_cmpxchg((unsigned long)(ptr),	\
+					   (unsigned long)(o),		\
+					   (unsigned long)(n),		\
+					   sizeof(*(ptr)));		\
 })
 
 #define arch_cmpxchg64		arch_cmpxchg
author	Heiko Carstens <hca@linux.ibm.com>	2024-11-06 11:03:06 +0100
committer	Heiko Carstens <hca@linux.ibm.com>	2024-11-12 14:01:28 +0100
commit	c8603d692857a43e6f8b7b5eccab1d9aec48bd12 (patch)
tree	24d87c6b7096962c9e2464e984aaf5cf0172ec9e /arch/s390/include/asm
parent	01bfb451a3e9b7005d626964588d89b699749147 (diff)
download	linux-c8603d692857a43e6f8b7b5eccab1d9aec48bd12.tar.gz linux-c8603d692857a43e6f8b7b5eccab1d9aec48bd12.tar.bz2 linux-c8603d692857a43e6f8b7b5eccab1d9aec48bd12.zip