From 68a9f5e7007c1afa2cf6830b690a90d0187c0684 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 21 Jun 2016 09:53:44 +1000
Subject: xfs: implement iomap based buffered write path

Convert XFS to use the new iomap based multipage write path. This involves
implementing the ->iomap_begin and ->iomap_end methods, and switching the
buffered file write, page_mkwrite and xfs_iozero paths to the new iomap
helpers.

With this change __xfs_get_blocks will never be used for buffered writes,
and the code handling them can be removed.

Based on earlier code from Dave Chinner.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_file.c | 71 +++++++++++++++++++++++--------------------------------
 1 file changed, 30 insertions(+), 41 deletions(-)

(limited to 'fs/xfs/xfs_file.c')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 47fc63295422..7316d3841c53 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -37,6 +37,7 @@
 #include "xfs_log.h"
 #include "xfs_icache.h"
 #include "xfs_pnfs.h"
+#include "xfs_iomap.h"
 
 #include <linux/dcache.h>
 #include <linux/falloc.h>
@@ -79,57 +80,27 @@ xfs_rw_ilock_demote(
 		inode_unlock(VFS_I(ip));
 }
 
-/*
- * xfs_iozero clears the specified range supplied via the page cache (except in
- * the DAX case). Writes through the page cache will allocate blocks over holes,
- * though the callers usually map the holes first and avoid them. If a block is
- * not completely zeroed, then it will be read from disk before being partially
- * zeroed.
- *
- * In the DAX case, we can just directly write to the underlying pages. This
- * will not allocate blocks, but will avoid holes and unwritten extents and so
- * not do unnecessary work.
- */
-int
-xfs_iozero(
-	struct xfs_inode	*ip,	/* inode			*/
-	loff_t			pos,	/* offset in file		*/
-	size_t			count)	/* size of data to zero		*/
+static int
+xfs_dax_zero_range(
+	struct inode		*inode,
+	loff_t			pos,
+	size_t			count)
 {
-	struct page		*page;
-	struct address_space	*mapping;
 	int			status = 0;
 
-
-	mapping = VFS_I(ip)->i_mapping;
 	do {
 		unsigned offset, bytes;
-		void *fsdata;
 
 		offset = (pos & (PAGE_SIZE -1)); /* Within page */
 		bytes = PAGE_SIZE - offset;
 		if (bytes > count)
 			bytes = count;
 
-		if (IS_DAX(VFS_I(ip))) {
-			status = dax_zero_page_range(VFS_I(ip), pos, bytes,
-						     xfs_get_blocks_direct);
-			if (status)
-				break;
-		} else {
-			status = pagecache_write_begin(NULL, mapping, pos, bytes,
-						AOP_FLAG_UNINTERRUPTIBLE,
-						&page, &fsdata);
-			if (status)
-				break;
-
-			zero_user(page, offset, bytes);
+		status = dax_zero_page_range(inode, pos, bytes,
+					     xfs_get_blocks_direct);
+		if (status)
+			break;
 
-			status = pagecache_write_end(NULL, mapping, pos, bytes,
-						bytes, page, fsdata);
-			WARN_ON(status <= 0); /* can't return less than zero! */
-			status = 0;
-		}
 		pos += bytes;
 		count -= bytes;
 	} while (count);
@@ -137,6 +108,24 @@ xfs_iozero(
 	return status;
 }
 
+/*
+ * Clear the specified ranges to zero through either the pagecache or DAX.
+ * Holes and unwritten extents will be left as-is as they already are zeroed.
+ */
+int
+xfs_iozero(
+	struct xfs_inode	*ip,
+	loff_t			pos,
+	size_t			count)
+{
+	struct inode		*inode = VFS_I(ip);
+
+	if (IS_DAX(VFS_I(ip)))
+		return xfs_dax_zero_range(inode, pos, count);
+	else
+		return iomap_zero_range(inode, pos, count, NULL, &xfs_iomap_ops);
+}
+
 int
 xfs_update_prealloc_flags(
 	struct xfs_inode	*ip,
@@ -841,7 +830,7 @@ xfs_file_buffered_aio_write(
 write_retry:
 	trace_xfs_file_buffered_write(ip, iov_iter_count(from),
 				      iocb->ki_pos, 0);
-	ret = generic_perform_write(file, from, iocb->ki_pos);
+	ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
 	if (likely(ret >= 0))
 		iocb->ki_pos += ret;
 
@@ -1553,7 +1542,7 @@ xfs_filemap_page_mkwrite(
 	if (IS_DAX(inode)) {
 		ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
 	} else {
-		ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+		ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
 		ret = block_page_mkwrite_return(ret);
 	}
 
-- 
cgit v1.2.3


From 459f0fbc2a827da37bbfaf3cae8da4ad8884da12 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 21 Jun 2016 09:55:18 +1000
Subject: xfs: use iomap infrastructure for DAX zeroing

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_file.c | 35 +----------------------------------
 1 file changed, 1 insertion(+), 34 deletions(-)

(limited to 'fs/xfs/xfs_file.c')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7316d3841c53..090a90f0d02c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -80,34 +80,6 @@ xfs_rw_ilock_demote(
 		inode_unlock(VFS_I(ip));
 }
 
-static int
-xfs_dax_zero_range(
-	struct inode		*inode,
-	loff_t			pos,
-	size_t			count)
-{
-	int			status = 0;
-
-	do {
-		unsigned offset, bytes;
-
-		offset = (pos & (PAGE_SIZE -1)); /* Within page */
-		bytes = PAGE_SIZE - offset;
-		if (bytes > count)
-			bytes = count;
-
-		status = dax_zero_page_range(inode, pos, bytes,
-					     xfs_get_blocks_direct);
-		if (status)
-			break;
-
-		pos += bytes;
-		count -= bytes;
-	} while (count);
-
-	return status;
-}
-
 /*
  * Clear the specified ranges to zero through either the pagecache or DAX.
  * Holes and unwritten extents will be left as-is as they already are zeroed.
@@ -118,12 +90,7 @@ xfs_iozero(
 	loff_t			pos,
 	size_t			count)
 {
-	struct inode		*inode = VFS_I(ip);
-
-	if (IS_DAX(VFS_I(ip)))
-		return xfs_dax_zero_range(inode, pos, count);
-	else
-		return iomap_zero_range(inode, pos, count, NULL, &xfs_iomap_ops);
+	return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
 }
 
 int
-- 
cgit v1.2.3


From 7bb41db3ea160ea55cc46af07e45f7cb1e2968ba Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 21 Jun 2016 09:56:26 +1000
Subject: xfs: handle 64-bit length in xfs_iozero

We'll want to use this code for large offsets now that we're
skipping holes and unwritten extents efficiently.  Also rename it to
xfs_zero_range to be a bit more descriptive, and tell the caller if
we actually did any zeroing.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_file.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs/xfs/xfs_file.c')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 090a90f0d02c..294e5f423028 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -85,10 +85,11 @@ xfs_rw_ilock_demote(
  * Holes and unwritten extents will be left as-is as they already are zeroed.
  */
 int
-xfs_iozero(
+xfs_zero_range(
 	struct xfs_inode	*ip,
-	loff_t			pos,
-	size_t			count)
+	xfs_off_t		pos,
+	xfs_off_t		count,
+	bool			*did_zero)
 {
 	return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
 }
@@ -419,7 +420,7 @@ xfs_zero_last_block(
 	if (isize + zero_len > offset)
 		zero_len = offset - isize;
 	*did_zeroing = true;
-	return xfs_iozero(ip, isize, zero_len);
+	return xfs_zero_range(ip, isize, zero_len, NULL);
 }
 
 /*
@@ -518,7 +519,7 @@ xfs_zero_eof(
 		if ((zero_off + zero_len) > offset)
 			zero_len = offset - zero_off;
 
-		error = xfs_iozero(ip, zero_off, zero_len);
+		error = xfs_zero_range(ip, zero_off, zero_len, NULL);
 		if (error)
 			return error;
 
-- 
cgit v1.2.3


From 570b6211b85692f408cbe47664ab2378eb9519ff Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 21 Jun 2016 09:57:26 +1000
Subject: xfs: use xfs_zero_range in xfs_zero_eof

We now skip holes in it, so no need to have the caller do it as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_file.c | 128 +-----------------------------------------------------
 1 file changed, 1 insertion(+), 127 deletions(-)

(limited to 'fs/xfs/xfs_file.c')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 294e5f423028..713991c22781 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -380,49 +380,6 @@ out:
 	return ret;
 }
 
-/*
- * This routine is called to handle zeroing any space in the last block of the
- * file that is beyond the EOF.  We do this since the size is being increased
- * without writing anything to that block and we don't want to read the
- * garbage on the disk.
- */
-STATIC int				/* error (positive) */
-xfs_zero_last_block(
-	struct xfs_inode	*ip,
-	xfs_fsize_t		offset,
-	xfs_fsize_t		isize,
-	bool			*did_zeroing)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		last_fsb = XFS_B_TO_FSBT(mp, isize);
-	int			zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-	int			zero_len;
-	int			nimaps = 1;
-	int			error = 0;
-	struct xfs_bmbt_irec	imap;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	if (error)
-		return error;
-
-	ASSERT(nimaps > 0);
-
-	/*
-	 * If the block underlying isize is just a hole, then there
-	 * is nothing to zero.
-	 */
-	if (imap.br_startblock == HOLESTARTBLOCK)
-		return 0;
-
-	zero_len = mp->m_sb.sb_blocksize - zero_offset;
-	if (isize + zero_len > offset)
-		zero_len = offset - isize;
-	*did_zeroing = true;
-	return xfs_zero_range(ip, isize, zero_len, NULL);
-}
-
 /*
  * Zero any on disk space between the current EOF and the new, larger EOF.
  *
@@ -441,94 +398,11 @@ xfs_zero_eof(
 	xfs_fsize_t		isize,		/* current inode size */
 	bool			*did_zeroing)
 {
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		start_zero_fsb;
-	xfs_fileoff_t		end_zero_fsb;
-	xfs_fileoff_t		zero_count_fsb;
-	xfs_fileoff_t		last_fsb;
-	xfs_fileoff_t		zero_off;
-	xfs_fsize_t		zero_len;
-	int			nimaps;
-	int			error = 0;
-	struct xfs_bmbt_irec	imap;
-
 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
 	ASSERT(offset > isize);
 
 	trace_xfs_zero_eof(ip, isize, offset - isize);
-
-	/*
-	 * First handle zeroing the block on which isize resides.
-	 *
-	 * We only zero a part of that block so it is handled specially.
-	 */
-	if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
-		error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
-		if (error)
-			return error;
-	}
-
-	/*
-	 * Calculate the range between the new size and the old where blocks
-	 * needing to be zeroed may exist.
-	 *
-	 * To get the block where the last byte in the file currently resides,
-	 * we need to subtract one from the size and truncate back to a block
-	 * boundary.  We subtract 1 in case the size is exactly on a block
-	 * boundary.
-	 */
-	last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
-	start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
-	end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
-	ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
-	if (last_fsb == end_zero_fsb) {
-		/*
-		 * The size was only incremented on its last block.
-		 * We took care of that above, so just return.
-		 */
-		return 0;
-	}
-
-	ASSERT(start_zero_fsb <= end_zero_fsb);
-	while (start_zero_fsb <= end_zero_fsb) {
-		nimaps = 1;
-		zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
-
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
-					  &imap, &nimaps, 0);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		if (error)
-			return error;
-
-		ASSERT(nimaps > 0);
-
-		if (imap.br_state == XFS_EXT_UNWRITTEN ||
-		    imap.br_startblock == HOLESTARTBLOCK) {
-			start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-			ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-			continue;
-		}
-
-		/*
-		 * There are blocks we need to zero.
-		 */
-		zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
-		zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
-
-		if ((zero_off + zero_len) > offset)
-			zero_len = offset - zero_off;
-
-		error = xfs_zero_range(ip, zero_off, zero_len, NULL);
-		if (error)
-			return error;
-
-		*did_zeroing = true;
-		start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-		ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-	}
-
-	return 0;
+	return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
 }
 
 /*
-- 
cgit v1.2.3