diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/cell.c | 9 | ||||
-rw-r--r-- | fs/afs/internal.h | 2 | ||||
-rw-r--r-- | fs/btrfs/block-group.c | 44 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 2 | ||||
-rw-r--r-- | fs/btrfs/file.c | 46 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 39 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 2 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 5 | ||||
-rw-r--r-- | fs/cifs/file.c | 11 | ||||
-rw-r--r-- | fs/cifs/inode.c | 9 | ||||
-rw-r--r-- | fs/cifs/misc.c | 16 | ||||
-rw-r--r-- | fs/cifs/smb2ops.c | 12 | ||||
-rw-r--r-- | fs/efivarfs/file.c | 7 | ||||
-rw-r--r-- | fs/erofs/zdata.h | 20 | ||||
-rw-r--r-- | fs/file_table.c | 2 | ||||
-rw-r--r-- | fs/io_uring.c | 22 | ||||
-rw-r--r-- | fs/nfs/direct.c | 13 | ||||
-rw-r--r-- | fs/nfs/file.c | 1 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.c | 11 | ||||
-rw-r--r-- | fs/nfs/nfs4namespace.c | 1 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 17 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_fs.h | 4 | ||||
-rw-r--r-- | fs/ocfs2/suballoc.c | 9 |
24 files changed, 217 insertions, 88 deletions
diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 005921e3b38d..5b79cdceefa0 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -154,10 +154,17 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, return ERR_PTR(-ENOMEM); } + cell->name = kmalloc(namelen + 1, GFP_KERNEL); + if (!cell->name) { + kfree(cell); + return ERR_PTR(-ENOMEM); + } + cell->net = net; cell->name_len = namelen; for (i = 0; i < namelen; i++) cell->name[i] = tolower(name[i]); + cell->name[i] = 0; atomic_set(&cell->usage, 2); INIT_WORK(&cell->manager, afs_manage_cell); @@ -207,6 +214,7 @@ parse_failed: if (ret == -EINVAL) printk(KERN_ERR "kAFS: bad VL server IP address\n"); error: + kfree(cell->name); kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); @@ -489,6 +497,7 @@ static void afs_cell_destroy(struct rcu_head *rcu) afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers)); afs_put_cell(cell->net, cell->alias_of); key_put(cell->anonymous_key); + kfree(cell->name); kfree(cell); _leave(" [destroyed]"); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index d520535ddb62..792ac711985e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -388,7 +388,7 @@ struct afs_cell { struct afs_vlserver_list __rcu *vl_servers; u8 name_len; /* Length of name */ - char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ + char *name; /* Cell name, case-flattened and NUL-padded */ }; /* diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 176e8a292fd1..c037ef514b64 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -940,7 +940,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; - goto out_put_group; + goto out; } /* @@ -978,7 +978,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_orphan_add(trans, BTRFS_I(inode)); if (ret) { btrfs_add_delayed_iput(inode); - goto out_put_group; + goto out; } clear_nlink(inode); /* One for the block groups ref */ @@ -1001,13 +1001,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); if (ret < 0) - goto out_put_group; + goto out; if (ret > 0) btrfs_release_path(path); if (ret == 0) { ret = btrfs_del_item(trans, tree_root, path); if (ret) - goto out_put_group; + goto out; btrfs_release_path(path); } @@ -1016,6 +1016,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, &fs_info->block_group_cache_tree); RB_CLEAR_NODE(&block_group->cache_node); + /* Once for the block groups rbtree */ + btrfs_put_block_group(block_group); + if (fs_info->first_logical_byte == block_group->start) fs_info->first_logical_byte = (u64)-1; spin_unlock(&fs_info->block_group_cache_lock); @@ -1089,6 +1092,25 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&block_group->space_info->lock); + /* + * Remove the free space for the block group from the free space tree + * and the block group's item from the extent tree before marking the + * block group as removed. This is to prevent races with tasks that + * freeze and unfreeze a block group, this task and another task + * allocating a new block group - the unfreeze task ends up removing + * the block group's extent map before the task calling this function + * deletes the block group item from the extent tree, allowing for + * another task to attempt to create another block group with the same + * item key (and failing with -EEXIST and a transaction abort). + */ + ret = remove_block_group_free_space(trans, block_group); + if (ret) + goto out; + + ret = remove_block_group_item(trans, path, block_group); + if (ret < 0) + goto out; + mutex_lock(&fs_info->chunk_mutex); spin_lock(&block_group->lock); block_group->removed = 1; @@ -1123,17 +1145,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, mutex_unlock(&fs_info->chunk_mutex); - ret = remove_block_group_free_space(trans, block_group); - if (ret) - goto out_put_group; - - /* Once for the block groups rbtree */ - btrfs_put_block_group(block_group); - - ret = remove_block_group_item(trans, path, block_group); - if (ret < 0) - goto out; - if (remove_em) { struct extent_map_tree *em_tree; @@ -1145,10 +1156,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, free_extent_map(em); } -out_put_group: +out: /* Once for the lookup reference */ btrfs_put_block_group(block_group); -out: if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); btrfs_free_path(path); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 30ce7039bc27..d404cce8ae40 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1009,6 +1009,8 @@ enum { BTRFS_ROOT_DEAD_RELOC_TREE, /* Mark dead root stored on device whose cleanup needs to be resumed */ BTRFS_ROOT_DEAD_TREE, + /* The root has a log tree. Used only for subvolume roots. */ + BTRFS_ROOT_HAS_LOG_TREE, }; /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2c14312b05e8..2520605afc25 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1533,7 +1533,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, } static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, - size_t *write_bytes) + size_t *write_bytes, bool nowait) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; @@ -1541,27 +1541,43 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, u64 num_bytes; int ret; - if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) + if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock)) return -EAGAIN; lockstart = round_down(pos, fs_info->sectorsize); lockend = round_up(pos + *write_bytes, fs_info->sectorsize) - 1; + num_bytes = lockend - lockstart + 1; - btrfs_lock_and_flush_ordered_range(inode, lockstart, - lockend, NULL); + if (nowait) { + struct btrfs_ordered_extent *ordered; + + if (!try_lock_extent(&inode->io_tree, lockstart, lockend)) + return -EAGAIN; + + ordered = btrfs_lookup_ordered_range(inode, lockstart, + num_bytes); + if (ordered) { + btrfs_put_ordered_extent(ordered); + ret = -EAGAIN; + goto out_unlock; + } + } else { + btrfs_lock_and_flush_ordered_range(inode, lockstart, + lockend, NULL); + } - num_bytes = lockend - lockstart + 1; ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, NULL, NULL, NULL); if (ret <= 0) { ret = 0; - btrfs_drew_write_unlock(&root->snapshot_lock); + if (!nowait) + btrfs_drew_write_unlock(&root->snapshot_lock); } else { *write_bytes = min_t(size_t, *write_bytes , num_bytes - pos + lockstart); } - +out_unlock: unlock_extent(&inode->io_tree, lockstart, lockend); return ret; @@ -1633,7 +1649,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) && check_can_nocow(BTRFS_I(inode), pos, - &write_bytes) > 0) { + &write_bytes, false) > 0) { /* * For nodata cow case, no need to reserve * data space. @@ -1904,13 +1920,25 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, pos = iocb->ki_pos; count = iov_iter_count(from); if (iocb->ki_flags & IOCB_NOWAIT) { + size_t nocow_bytes = count; + /* * We will allocate space in case nodatacow is not set, * so bail */ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) || - check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) { + check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes, + true) <= 0) { + inode_unlock(inode); + return -EAGAIN; + } + /* + * There are holes in the range or parts of the range that must + * be COWed (shared extents, RO block groups, etc), so just bail + * out. + */ + if (nocow_bytes < count) { inode_unlock(inode); return -EAGAIN; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d04c82c88418..18d384f4af54 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -985,6 +985,7 @@ static noinline int cow_file_range(struct inode *inode, u64 num_bytes; unsigned long ram_size; u64 cur_alloc_size = 0; + u64 min_alloc_size; u64 blocksize = fs_info->sectorsize; struct btrfs_key ins; struct extent_map *em; @@ -1035,10 +1036,26 @@ static noinline int cow_file_range(struct inode *inode, btrfs_drop_extent_cache(BTRFS_I(inode), start, start + num_bytes - 1, 0); + /* + * Relocation relies on the relocated extents to have exactly the same + * size as the original extents. Normally writeback for relocation data + * extents follows a NOCOW path because relocation preallocates the + * extents. However, due to an operation such as scrub turning a block + * group to RO mode, it may fallback to COW mode, so we must make sure + * an extent allocated during COW has exactly the requested size and can + * not be split into smaller extents, otherwise relocation breaks and + * fails during the stage where it updates the bytenr of file extent + * items. + */ + if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + min_alloc_size = num_bytes; + else + min_alloc_size = fs_info->sectorsize; + while (num_bytes > 0) { cur_alloc_size = num_bytes; ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, - fs_info->sectorsize, 0, alloc_hint, + min_alloc_size, 0, alloc_hint, &ins, 1, 1); if (ret < 0) goto out_unlock; @@ -1361,6 +1378,8 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page, int *page_started, unsigned long *nr_written) { const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode)); + const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid == + BTRFS_DATA_RELOC_TREE_OBJECTID); const u64 range_bytes = end + 1 - start; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 range_start = start; @@ -1391,18 +1410,23 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page, * data space info, which we incremented in the step above. * * If we need to fallback to cow and the inode corresponds to a free - * space cache inode, we must also increment bytes_may_use of the data - * space_info for the same reason. Space caches always get a prealloc + * space cache inode or an inode of the data relocation tree, we must + * also increment bytes_may_use of the data space_info for the same + * reason. Space caches and relocated data extents always get a prealloc * extent for them, however scrub or balance may have set the block - * group that contains that extent to RO mode. + * group that contains that extent to RO mode and therefore force COW + * when starting writeback. */ count = count_range_bits(io_tree, &range_start, end, range_bytes, EXTENT_NORESERVE, 0); - if (count > 0 || is_space_ino) { - const u64 bytes = is_space_ino ? range_bytes : count; + if (count > 0 || is_space_ino || is_reloc_ino) { + u64 bytes = count; struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct btrfs_space_info *sinfo = fs_info->data_sinfo; + if (is_space_ino || is_reloc_ino) + bytes = range_bytes; + spin_lock(&sinfo->lock); btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes); spin_unlock(&sinfo->lock); @@ -7865,9 +7889,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) dio_data.overwrite = 1; inode_unlock(inode); relock = true; - } else if (iocb->ki_flags & IOCB_NOWAIT) { - ret = -EAGAIN; - goto out; } ret = btrfs_delalloc_reserve_space(inode, &data_reserved, offset, count); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 168deb8ef68a..e8f7c5f00894 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2692,7 +2692,7 @@ out: btrfs_put_root(root); out_free: btrfs_free_path(path); - kzfree(subvol_info); + kfree(subvol_info); return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 920cee312f4e..cd5348f352dd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -169,6 +169,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, if (ret) goto out; + set_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state); clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state); root->log_start_pid = current->pid; } @@ -195,6 +196,9 @@ static int join_running_log_trans(struct btrfs_root *root) { int ret = -ENOENT; + if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state)) + return ret; + mutex_lock(&root->log_mutex); if (root->log_root) { ret = 0; @@ -3303,6 +3307,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) if (root->log_root) { free_log_tree(trans, root->log_root); root->log_root = NULL; + clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state); } return 0; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4fe757cfc360..9b0f8f33f832 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4336,7 +4336,8 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, break; __SetPageLocked(page); - if (add_to_page_cache_locked(page, mapping, page->index, gfp)) { + rc = add_to_page_cache_locked(page, mapping, page->index, gfp); + if (rc) { __ClearPageLocked(page); break; } @@ -4352,6 +4353,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, struct list_head *page_list, unsigned num_pages) { int rc; + int err = 0; struct list_head tmplist; struct cifsFileInfo *open_file = file->private_data; struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); @@ -4396,7 +4398,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, * the order of declining indexes. When we put the pages in * the rdata->pages, then we want them in increasing order. */ - while (!list_empty(page_list)) { + while (!list_empty(page_list) && !err) { unsigned int i, nr_pages, bytes, rsize; loff_t offset; struct page *page, *tpage; @@ -4429,9 +4431,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, return 0; } - rc = readpages_get_pages(mapping, page_list, rsize, &tmplist, + nr_pages = 0; + err = readpages_get_pages(mapping, page_list, rsize, &tmplist, &nr_pages, &offset, &bytes); - if (rc) { + if (!nr_pages) { add_credits_and_wake_if(server, credits, 0); break; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 583f5e4008c2..ce95801e9b66 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2535,6 +2535,15 @@ set_size_out: if (rc == 0) { cifsInode->server_eof = attrs->ia_size; cifs_setsize(inode, attrs->ia_size); + + /* + * The man page of truncate says if the size changed, + * then the st_ctime and st_mtime fields for the file + * are updated. + */ + attrs->ia_ctime = attrs->ia_mtime = current_time(inode); + attrs->ia_valid |= ATTR_CTIME | ATTR_MTIME; + cifs_truncate_page(inode->i_mapping, inode->i_size); } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 56791a692c8b..e44d049142d0 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -844,28 +844,26 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) struct bio_vec *bv = NULL; if (iov_iter_is_kvec(iter)) { - memcpy(&ctx->iter, iter, sizeof(struct iov_iter)); + memcpy(&ctx->iter, iter, sizeof(*iter)); ctx->len = count; iov_iter_advance(iter, count); return 0; } - if (max_pages * sizeof(struct bio_vec) <= CIFS_AIO_KMALLOC_LIMIT) - bv = kmalloc_array(max_pages, sizeof(struct bio_vec), - GFP_KERNEL); + if (array_size(max_pages, sizeof(*bv)) <= CIFS_AIO_KMALLOC_LIMIT) + bv = kmalloc_array(max_pages, sizeof(*bv), GFP_KERNEL); if (!bv) { - bv = vmalloc(array_size(max_pages, sizeof(struct bio_vec))); + bv = vmalloc(array_size(max_pages, sizeof(*bv))); if (!bv) return -ENOMEM; } - if (max_pages * sizeof(struct page *) <= CIFS_AIO_KMALLOC_LIMIT) - pages = kmalloc_array(max_pages, sizeof(struct page *), - GFP_KERNEL); + if (array_size(max_pages, sizeof(*pages)) <= CIFS_AIO_KMALLOC_LIMIT) + pages = kmalloc_array(max_pages, sizeof(*pages), GFP_KERNEL); if (!pages) { - pages = vmalloc(array_size(max_pages, sizeof(struct page *))); + pages = vmalloc(array_size(max_pages, sizeof(*pages))); if (!pages) { kvfree(bv); return -ENOMEM; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 736d86b8a910..d9fdafa5eb60 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -763,6 +763,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, /* close extra handle outside of crit sec */ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } + rc = 0; goto oshr_free; } @@ -3187,6 +3188,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid, ses->Suid, offset, len); + /* + * We zero the range through ioctl, so we need remove the page caches + * first, otherwise the data may be inconsistent with the server. + */ + truncate_pagecache_range(inode, offset, offset + len - 1); /* if file not oplocked can't be sure whether asking to extend size */ if (!CIFS_CACHE_READ(cifsi)) @@ -3253,6 +3259,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, return rc; } + /* + * We implement the punch hole through ioctl, so we need remove the page + * caches first, otherwise the data may be inconsistent with the server. + */ + truncate_pagecache_range(inode, offset, offset + len - 1); + cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); fsctl_buf.FileOffset = cpu_to_le64(offset); diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index e9e27a271af0..feaa5e182b7b 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -51,6 +51,7 @@ static ssize_t efivarfs_file_write(struct file *file, } else { inode_lock(inode); i_size_write(inode, datasize + sizeof(attributes)); + inode->i_mtime = current_time(inode); inode_unlock(inode); } @@ -72,10 +73,8 @@ static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf, ssize_t size = 0; int err; - while (!__ratelimit(&file->f_cred->user->ratelimit)) { - if (!msleep_interruptible(50)) - return -EINTR; - } + while (!__ratelimit(&file->f_cred->user->ratelimit)) + msleep(50); err = efivar_entry_size(var, &datasize); diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index 7824f5563a55..9b66c28b3ae9 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -144,22 +144,22 @@ static inline void z_erofs_onlinepage_init(struct page *page) static inline void z_erofs_onlinepage_fixup(struct page *page, uintptr_t index, bool down) { - unsigned long *p, o, v, id; -repeat: - p = &page_private(page); - o = READ_ONCE(*p); + union z_erofs_onlinepage_converter u = { .v = &page_private(page) }; + int orig, orig_index, val; - id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; - if (id) { +repeat: + orig = atomic_read(u.o); + orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; + if (orig_index) { if (!index) return; - DBG_BUGON(id != index); + DBG_BUGON(orig_index != index); } - v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) | - ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down); - if (cmpxchg(p, o, v) != o) + val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) | + ((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down); + if (atomic_cmpxchg(u.o, orig, val) != orig) goto repeat; } diff --git a/fs/file_table.c b/fs/file_table.c index 656647f9575a..65603502fed6 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -230,7 +230,7 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt, d_set_d_op(path.dentry, &anon_ops); path.mnt = mntget(mnt); d_instantiate(path.dentry, inode); - file = alloc_file(&path, flags, fops); + file = alloc_file(&path, flags | FMODE_NONOTIFY, fops); if (IS_ERR(file)) { ihold(inode); path_put(&path); diff --git a/fs/io_uring.c b/fs/io_uring.c index a78201b96179..e507737f044e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -890,6 +890,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_files_update *ip, unsigned nr_args); static int io_grab_files(struct io_kiocb *req); +static void io_complete_rw_common(struct kiocb *kiocb, long res); static void io_cleanup_req(struct io_kiocb *req); static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, int fd, struct file **out_file, bool fixed); @@ -1749,6 +1750,14 @@ static void io_iopoll_queue(struct list_head *again) do { req = list_first_entry(again, struct io_kiocb, list); list_del(&req->list); + + /* shouldn't happen unless io_uring is dying, cancel reqs */ + if (unlikely(!current->mm)) { + io_complete_rw_common(&req->rw.kiocb, -EAGAIN); + io_put_req(req); + continue; + } + refcount_inc(&req->refs); io_queue_async_work(req); } while (!list_empty(again)); @@ -1994,10 +2003,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) WRITE_ONCE(req->result, res); /* order with io_poll_complete() checking ->result */ - if (res != -EAGAIN) { - smp_wmb(); - WRITE_ONCE(req->iopoll_completed, 1); - } + smp_wmb(); + WRITE_ONCE(req->iopoll_completed, 1); } /* @@ -5353,9 +5360,6 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) { const bool in_async = io_wq_current_is_worker(); - if (req->result == -EAGAIN) - return -EAGAIN; - /* workqueue context doesn't hold uring_lock, grab it now */ if (in_async) mutex_lock(&ctx->uring_lock); @@ -6011,7 +6015,7 @@ static int io_sq_thread(void *data) * If submit got -EBUSY, flag us as needing the application * to enter the kernel to reap and flush events. */ - if (!to_submit || ret == -EBUSY) { + if (!to_submit || ret == -EBUSY || need_resched()) { /* * Drop cur_mm before scheduling, we can't hold it for * long periods (or over schedule()). Do this before @@ -6027,7 +6031,7 @@ static int io_sq_thread(void *data) * more IO, we should wait for the application to * reap events and wake us up. */ - if (!list_empty(&ctx->poll_list) || + if (!list_empty(&ctx->poll_list) || need_resched() || (!time_after(jiffies, timeout) && ret != -EBUSY && !percpu_ref_is_dying(&ctx->refs))) { if (current->task_works) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1b79dd5cf661..3d113cf8908a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -267,8 +267,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) { struct inode *inode = dreq->inode; - inode_dio_end(inode); - if (dreq->iocb) { long res = (long) dreq->error; if (dreq->count != 0) { @@ -280,7 +278,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) complete(&dreq->completion); + igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); } static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) @@ -410,8 +411,10 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - inode_dio_end(inode); + igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); return result < 0 ? result : -EIO; } @@ -864,8 +867,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - inode_dio_end(inode); + igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); return result < 0 ? result : -EIO; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f96367a2463e..ccd6c1637b27 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -83,6 +83,7 @@ nfs_file_release(struct inode *inode, struct file *filp) dprintk("NFS: release(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); + inode_dio_wait(inode); nfs_file_clear_open_context(filp); return 0; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7d399f72ebbb..de03e440b7ee 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -907,9 +907,8 @@ retry: goto out_mds; /* Use a direct mapping of ds_idx to pgio mirror_idx */ - if (WARN_ON_ONCE(pgio->pg_mirror_count != - FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))) - goto out_mds; + if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg)) + goto out_eagain; for (i = 0; i < pgio->pg_mirror_count; i++) { mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); @@ -931,7 +930,10 @@ retry: (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) pgio->pg_maxretrans = io_maxretrans; return; - +out_eagain: + pnfs_generic_pg_cleanup(pgio); + pgio->pg_error = -EAGAIN; + return; out_mds: trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode, 0, NFS4_MAX_UINT64, IOMODE_RW, @@ -941,6 +943,7 @@ out_mds: pgio->pg_lseg = NULL; pgio->pg_maxretrans = 0; nfs_pageio_reset_write_mds(pgio); + pgio->pg_error = -EAGAIN; } static unsigned int diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index a3ab6e219061..873342308dc0 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -308,6 +308,7 @@ static int try_location(struct fs_context *fc, if (IS_ERR(export_path)) return PTR_ERR(export_path); + kfree(ctx->nfs_server.export_path); ctx->nfs_server.export_path = export_path; source = kmalloc(len + 1 + ctx->nfs_server.export_path_len + 1, diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 152a0fc4e905..751bc4dc7466 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -689,6 +689,12 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, &ocfs2_nfs_sync_lops, osb); } +static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb) +{ + ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); + init_rwsem(&osb->nfs_sync_rwlock); +} + void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb) { struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; @@ -2855,6 +2861,11 @@ int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) if (ocfs2_is_hard_readonly(osb)) return -EROFS; + if (ex) + down_write(&osb->nfs_sync_rwlock); + else + down_read(&osb->nfs_sync_rwlock); + if (ocfs2_mount_local(osb)) return 0; @@ -2873,6 +2884,10 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) if (!ocfs2_mount_local(osb)) ocfs2_cluster_unlock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE); + if (ex) + up_write(&osb->nfs_sync_rwlock); + else + up_read(&osb->nfs_sync_rwlock); } int ocfs2_trim_fs_lock(struct ocfs2_super *osb, @@ -3340,7 +3355,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) local: ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); - ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); + ocfs2_nfs_sync_lock_init(osb); ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); osb->cconn = conn; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index ee5d98516212..2dd71d626196 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -395,6 +395,7 @@ struct ocfs2_super struct ocfs2_lock_res osb_super_lockres; struct ocfs2_lock_res osb_rename_lockres; struct ocfs2_lock_res osb_nfs_sync_lockres; + struct rw_semaphore nfs_sync_rwlock; struct ocfs2_lock_res osb_trim_fs_lockres; struct mutex obs_trim_fs_mutex; struct ocfs2_dlm_debug *osb_dlm_debug; diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 0dd8c41bafd4..19137c6d087b 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -290,7 +290,7 @@ #define OCFS2_MAX_SLOTS 255 /* Slot map indicator for an empty slot */ -#define OCFS2_INVALID_SLOT -1 +#define OCFS2_INVALID_SLOT ((u16)-1) #define OCFS2_VOL_UUID_LEN 16 #define OCFS2_MAX_VOL_LABEL_LEN 64 @@ -326,8 +326,8 @@ struct ocfs2_system_inode_info { enum { BAD_BLOCK_SYSTEM_INODE = 0, GLOBAL_INODE_ALLOC_SYSTEM_INODE, +#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE, -#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE HEARTBEAT_SYSTEM_INODE, GLOBAL_BITMAP_SYSTEM_INODE, USER_QUOTA_SYSTEM_INODE, diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 4836becb7578..45745cc3408a 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2825,9 +2825,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) goto bail; } - inode_alloc_inode = - ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, - suballoc_slot); + if (suballoc_slot == (u16)OCFS2_INVALID_SLOT) + inode_alloc_inode = ocfs2_get_system_file_inode(osb, + GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot); + else + inode_alloc_inode = ocfs2_get_system_file_inode(osb, + INODE_ALLOC_SYSTEM_INODE, suballoc_slot); if (!inode_alloc_inode) { /* the error code could be inaccurate, but we are not able to * get the correct one. */ |