diff options
author | Dave Chinner <david@fromorbit.com> | 2022-04-21 16:46:17 +1000 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2022-04-21 16:46:17 +1000 |
commit | a44a027a8b2a20fec30e0e9c99b0eb41c03e7420 (patch) | |
tree | 2cce770cced23924fd30b40601b52ffb609baab5 /fs/xfs/xfs_iomap.c | |
parent | 463260d7670566c357dfa2c38bc3124c98b646bc (diff) | |
parent | 973ac0eb3a7dfedecd385bd2b48b12e62a0492f2 (diff) | |
download | linux-a44a027a8b2a20fec30e0e9c99b0eb41c03e7420.tar.gz linux-a44a027a8b2a20fec30e0e9c99b0eb41c03e7420.tar.bz2 linux-a44a027a8b2a20fec30e0e9c99b0eb41c03e7420.zip |
Merge tag 'large-extent-counters-v9' of https://github.com/chandanr/linux into xfs-5.19-for-next
xfs: Large extent counters
The commit xfs: fix inode fork extent count overflow
(3f8a4f1d876d3e3e49e50b0396eaffcc4ba71b08) mentions that 10 billion
data fork extents should be possible to create. However the
corresponding on-disk field has a signed 32-bit type. Hence this
patchset extends the per-inode data fork extent counter to 64 bits
(out of which 48 bits are used to store the extent count).
Also, XFS has an attribute fork extent counter which is 16 bits
wide. A workload that,
1. Creates 1 million 255-byte sized xattrs,
2. Deletes 50% of these xattrs in an alternating manner,
3. Tries to insert 400,000 new 255-byte sized xattrs
causes the xattr extent counter to overflow.
Dave tells me that there are instances where a single file has more
than 100 million hardlinks. With parent pointers being stored in
xattrs, we will overflow the signed 16-bits wide attribute extent
counter when large number of hardlinks are created. Hence this
patchset extends the on-disk field to 32-bits.
The following changes are made to accomplish this,
1. A 64-bit inode field is carved out of existing di_pad and
di_flushiter fields to hold the 64-bit data fork extent counter.
2. The existing 32-bit inode data fork extent counter will be used to
hold the attribute fork extent counter.
3. A new incompat superblock flag to prevent older kernels from mounting
the filesystem.
Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/xfs_iomap.c')
-rw-r--r-- | fs/xfs/xfs_iomap.c | 33 |
1 files changed, 19 insertions, 14 deletions
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index e552ce541ec2..5a393259a3a3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -251,6 +251,8 @@ xfs_iomap_write_direct( return error; error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, nr_exts); + if (error == -EFBIG) + error = xfs_iext_count_upgrade(tp, ip, nr_exts); if (error) goto out_trans_cancel; @@ -402,7 +404,7 @@ xfs_iomap_prealloc_size( */ plen = prev.br_blockcount; while (xfs_iext_prev_extent(ifp, &ncur, &got)) { - if (plen > MAXEXTLEN / 2 || + if (plen > XFS_MAX_BMBT_EXTLEN / 2 || isnullstartblock(got.br_startblock) || got.br_startoff + got.br_blockcount != prev.br_startoff || got.br_startblock + got.br_blockcount != prev.br_startblock) @@ -414,23 +416,23 @@ xfs_iomap_prealloc_size( /* * If the size of the extents is greater than half the maximum extent * length, then use the current offset as the basis. This ensures that - * for large files the preallocation size always extends to MAXEXTLEN - * rather than falling short due to things like stripe unit/width - * alignment of real extents. + * for large files the preallocation size always extends to + * XFS_BMBT_MAX_EXTLEN rather than falling short due to things like stripe + * unit/width alignment of real extents. */ alloc_blocks = plen * 2; - if (alloc_blocks > MAXEXTLEN) + if (alloc_blocks > XFS_MAX_BMBT_EXTLEN) alloc_blocks = XFS_B_TO_FSB(mp, offset); qblocks = alloc_blocks; /* - * MAXEXTLEN is not a power of two value but we round the prealloc down - * to the nearest power of two value after throttling. To prevent the - * round down from unconditionally reducing the maximum supported - * prealloc size, we round up first, apply appropriate throttling, - * round down and cap the value to MAXEXTLEN. + * XFS_BMBT_MAX_EXTLEN is not a power of two value but we round the prealloc + * down to the nearest power of two value after throttling. To prevent + * the round down from unconditionally reducing the maximum supported + * prealloc size, we round up first, apply appropriate throttling, round + * down and cap the value to XFS_BMBT_MAX_EXTLEN. */ - alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), + alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(XFS_MAX_BMBT_EXTLEN), alloc_blocks); freesp = percpu_counter_read_positive(&mp->m_fdblocks); @@ -478,14 +480,14 @@ xfs_iomap_prealloc_size( */ if (alloc_blocks) alloc_blocks = rounddown_pow_of_two(alloc_blocks); - if (alloc_blocks > MAXEXTLEN) - alloc_blocks = MAXEXTLEN; + if (alloc_blocks > XFS_MAX_BMBT_EXTLEN) + alloc_blocks = XFS_MAX_BMBT_EXTLEN; /* * If we are still trying to allocate more space than is * available, squash the prealloc hard. This can happen if we * have a large file on a small filesystem and the above - * lowspace thresholds are smaller than MAXEXTLEN. + * lowspace thresholds are smaller than XFS_BMBT_MAX_EXTLEN. */ while (alloc_blocks && alloc_blocks >= freesp) alloc_blocks >>= 4; @@ -555,6 +557,9 @@ xfs_iomap_write_unwritten( error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, XFS_IEXT_WRITE_UNWRITTEN_CNT); + if (error == -EFBIG) + error = xfs_iext_count_upgrade(tp, ip, + XFS_IEXT_WRITE_UNWRITTEN_CNT); if (error) goto error_on_bmapi_transaction; |