summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_buf.h
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2022-07-14 09:22:14 -0700
committerDarrick J. Wong <djwong@kernel.org>2022-07-14 09:22:14 -0700
commit35c5a09f5346e690df7ff2c9075853e340ee10b3 (patch)
tree045be2e041e91ad1e6d25bde968b1c69fbdbd173 /fs/xfs/xfs_buf.h
parent4613b17cc4789d6061041f9bd424180251fb6228 (diff)
parent298f342245066309189d8637ca7339d56840c3e1 (diff)
downloadlinux-35c5a09f5346e690df7ff2c9075853e340ee10b3.tar.gz
linux-35c5a09f5346e690df7ff2c9075853e340ee10b3.tar.bz2
linux-35c5a09f5346e690df7ff2c9075853e340ee10b3.zip
Merge tag 'xfs-buf-lockless-lookup-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs into xfs-5.20-mergeB
xfs: lockless buffer cache lookups Current work to merge the XFS inode life cycle with the VFS inode life cycle is finding some interesting issues. If we have a path that hits buffer trylocks fairly hard (e.g. a non-blocking background inode freeing function), we end up hitting massive contention on the buffer cache hash locks: - 92.71% 0.05% [kernel] [k] xfs_inodegc_worker - 92.67% xfs_inodegc_worker - 92.13% xfs_inode_unlink - 91.52% xfs_inactive_ifree - 85.63% xfs_read_agi - 85.61% xfs_trans_read_buf_map - 85.59% xfs_buf_read_map - xfs_buf_get_map - 85.55% xfs_buf_find - 72.87% _raw_spin_lock - do_raw_spin_lock 71.86% __pv_queued_spin_lock_slowpath - 8.74% xfs_buf_rele - 7.88% _raw_spin_lock - 7.88% do_raw_spin_lock 7.63% __pv_queued_spin_lock_slowpath - 1.70% xfs_buf_trylock - 1.68% down_trylock - 1.41% _raw_spin_lock_irqsave - 1.39% do_raw_spin_lock __pv_queued_spin_lock_slowpath - 0.76% _raw_spin_unlock 0.75% do_raw_spin_unlock This is basically hammering the pag->pag_buf_lock from lots of CPUs doing trylocks at the same time. Most of the buffer trylock operations ultimately fail after we've done the lookup, so we're really hammering the buf hash lock whilst making no progress. We can also see significant spinlock traffic on the same lock just under normal operation when lots of tasks are accessing metadata from the same AG, so let's avoid all this by creating a lookup fast path which leverages the rhashtable's ability to do RCU protected lookups. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Darrick J. Wong <djwong@kernel.org> * tag 'xfs-buf-lockless-lookup-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: xfs: lockless buffer lookup xfs: remove a superflous hash lookup when inserting new buffers xfs: reduce the number of atomic when locking a buffer after lookup xfs: merge xfs_buf_find() and xfs_buf_get_map() xfs: break up xfs_buf_find() into individual pieces xfs: rework xfs_buf_incore() API
Diffstat (limited to 'fs/xfs/xfs_buf.h')
-rw-r--r--fs/xfs/xfs_buf.h21
1 files changed, 17 insertions, 4 deletions
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 1ee3056ff9cf..02b3c1635ec3 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -42,9 +42,11 @@ struct xfs_buf;
#define _XBF_DELWRI_Q (1u << 22)/* buffer on a delwri queue */
/* flags used only as arguments to access routines */
+#define XBF_INCORE (1u << 29)/* lookup only, return if found in cache */
#define XBF_TRYLOCK (1u << 30)/* lock requested, but do not wait */
#define XBF_UNMAPPED (1u << 31)/* do not map the buffer */
+
typedef unsigned int xfs_buf_flags_t;
#define XFS_BUF_FLAGS \
@@ -63,6 +65,7 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_KMEM, "KMEM" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
/* The following interface flags should never be set */ \
+ { XBF_INCORE, "INCORE" }, \
{ XBF_TRYLOCK, "TRYLOCK" }, \
{ XBF_UNMAPPED, "UNMAPPED" }
@@ -193,13 +196,10 @@ struct xfs_buf {
int b_last_error;
const struct xfs_buf_ops *b_ops;
+ struct rcu_head b_rcu;
};
/* Finding and Reading Buffers */
-struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target,
- xfs_daddr_t blkno, size_t numblks,
- xfs_buf_flags_t flags);
-
int xfs_buf_get_map(struct xfs_buftarg *target, struct xfs_buf_map *map,
int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp);
int xfs_buf_read_map(struct xfs_buftarg *target, struct xfs_buf_map *map,
@@ -210,6 +210,19 @@ void xfs_buf_readahead_map(struct xfs_buftarg *target,
const struct xfs_buf_ops *ops);
static inline int
+xfs_buf_incore(
+ struct xfs_buftarg *target,
+ xfs_daddr_t blkno,
+ size_t numblks,
+ xfs_buf_flags_t flags,
+ struct xfs_buf **bpp)
+{
+ DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+
+ return xfs_buf_get_map(target, &map, 1, XBF_INCORE | flags, bpp);
+}
+
+static inline int
xfs_buf_get(
struct xfs_buftarg *target,
xfs_daddr_t blkno,