summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c6
-rw-r--r--fs/9p/vfs_inode.c4
-rw-r--r--fs/Kconfig40
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/inode.c1
-rw-r--r--fs/adfs/super.c6
-rw-r--r--fs/affs/affs.h1
-rw-r--r--fs/affs/super.c7
-rw-r--r--fs/afs/inode.c1
-rw-r--r--fs/afs/proc.c2
-rw-r--r--fs/afs/vlocation.c3
-rw-r--r--fs/afs/volume.c3
-rw-r--r--fs/autofs/autofs_i.h2
-rw-r--r--fs/autofs/inode.c7
-rw-r--r--fs/autofs/symlink.c2
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/expire.c6
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c46
-rw-r--r--fs/befs/linuxvfs.c5
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/inode.c10
-rw-r--r--fs/binfmt_aout.c14
-rw-r--r--fs/binfmt_elf.c26
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/binfmt_misc.c15
-rw-r--r--fs/block_dev.c36
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/char_dev.c107
-rw-r--r--fs/cifs/cifsfs.c21
-rw-r--r--fs/cifs/readdir.c5
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/coda/inode.c3
-rw-r--r--fs/compat.c5
-rw-r--r--fs/configfs/file.c3
-rw-r--r--fs/configfs/inode.c4
-rw-r--r--fs/cramfs/inode.c15
-rw-r--r--fs/cramfs/uncompress.c3
-rw-r--r--fs/dcache.c164
-rw-r--r--fs/debugfs/file.c60
-rw-r--r--fs/debugfs/inode.c20
-rw-r--r--fs/devpts/inode.c6
-rw-r--r--fs/dquot.c5
-rw-r--r--fs/efs/super.c6
-rw-r--r--fs/eventpoll.c1
-rw-r--r--fs/exec.c56
-rw-r--r--fs/ext2/acl.c4
-rw-r--r--fs/ext2/ialloc.c1
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext2/super.c38
-rw-r--r--fs/ext2/xattr.c3
-rw-r--r--fs/ext3/acl.c6
-rw-r--r--fs/ext3/balloc.c350
-rw-r--r--fs/ext3/bitmap.c2
-rw-r--r--fs/ext3/dir.c19
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/fsync.c6
-rw-r--r--fs/ext3/hash.c8
-rw-r--r--fs/ext3/ialloc.c55
-rw-r--r--fs/ext3/inode.c77
-rw-r--r--fs/ext3/namei.c50
-rw-r--r--fs/ext3/resize.c42
-rw-r--r--fs/ext3/super.c110
-rw-r--r--fs/ext3/xattr.c16
-rw-r--r--fs/fat/cache.c3
-rw-r--r--fs/fat/file.c13
-rw-r--r--fs/fat/inode.c97
-rw-r--r--fs/file.c84
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/filesystems.c2
-rw-r--r--fs/freevxfs/vxfs.h2
-rw-r--r--fs/freevxfs/vxfs_inode.c5
-rw-r--r--fs/freevxfs/vxfs_super.c11
-rw-r--r--fs/fuse/control.c6
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/generic_acl.c197
-rw-r--r--fs/hfs/bnode.c3
-rw-r--r--fs/hfs/btree.c3
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/bnode.c3
-rw-r--r--fs/hfsplus/btree.c3
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/super.c3
-rw-r--r--fs/hostfs/hostfs_kern.c1
-rw-r--r--fs/hpfs/buffer.c2
-rw-r--r--fs/hpfs/hpfs_fn.h1
-rw-r--r--fs/hpfs/inode.c1
-rw-r--r--fs/hpfs/super.c7
-rw-r--r--fs/hppfs/hppfs_kern.c1
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--fs/inode.c7
-rw-r--r--fs/isofs/inode.c58
-rw-r--r--fs/jbd/checkpoint.c33
-rw-r--r--fs/jbd/commit.c182
-rw-r--r--fs/jbd/journal.c95
-rw-r--r--fs/jbd/recovery.c58
-rw-r--r--fs/jbd/revoke.c70
-rw-r--r--fs/jbd/transaction.c134
-rw-r--r--fs/jffs/inode-v23.c44
-rw-r--r--fs/jffs/intrep.c11
-rw-r--r--fs/jffs/jffs_fm.c6
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/jffs2/super.c3
-rw-r--r--fs/jfs/jfs_extent.c2
-rw-r--r--fs/jfs/jfs_imap.c1
-rw-r--r--fs/jfs/jfs_inode.c1
-rw-r--r--fs/jfs/jfs_metapage.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c4
-rw-r--r--fs/libfs.c14
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c12
-rw-r--r--fs/lockd/host.c55
-rw-r--r--fs/lockd/mon.c41
-rw-r--r--fs/lockd/svcsubs.c3
-rw-r--r--fs/mbcache.c1
-rw-r--r--fs/minix/bitmap.c2
-rw-r--r--fs/minix/inode.c13
-rw-r--r--fs/msdos/namei.c11
-rw-r--r--fs/namei.c142
-rw-r--r--fs/namespace.c22
-rw-r--r--fs/ncpfs/inode.c7
-rw-r--r--fs/ncpfs/symlink.c4
-rw-r--r--fs/nfs/Makefile6
-rw-r--r--fs/nfs/callback.c31
-rw-r--r--fs/nfs/callback.h7
-rw-r--r--fs/nfs/callback_proc.c13
-rw-r--r--fs/nfs/client.c1448
-rw-r--r--fs/nfs/delegation.c42
-rw-r--r--fs/nfs/delegation.h10
-rw-r--r--fs/nfs/dir.c341
-rw-r--r--fs/nfs/direct.c3
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/getroot.c311
-rw-r--r--fs/nfs/idmap.c45
-rw-r--r--fs/nfs/inode.c55
-rw-r--r--fs/nfs/internal.h105
-rw-r--r--fs/nfs/mount_clnt.c30
-rw-r--r--fs/nfs/namespace.c46
-rw-r--r--fs/nfs/nfs2xdr.c21
-rw-r--r--fs/nfs/nfs3proc.c44
-rw-r--r--fs/nfs/nfs3xdr.c7
-rw-r--r--fs/nfs/nfs4_fs.h78
-rw-r--r--fs/nfs/nfs4namespace.c118
-rw-r--r--fs/nfs/nfs4proc.c218
-rw-r--r--fs/nfs/nfs4renewd.c20
-rw-r--r--fs/nfs/nfs4state.c174
-rw-r--r--fs/nfs/nfs4xdr.c50
-rw-r--r--fs/nfs/pagelist.c3
-rw-r--r--fs/nfs/proc.c43
-rw-r--r--fs/nfs/read.c24
-rw-r--r--fs/nfs/super.c1421
-rw-r--r--fs/nfs/write.c14
-rw-r--r--fs/nfsd/nfs4callback.c66
-rw-r--r--fs/nfsd/nfs4idmap.c3
-rw-r--r--fs/nfsd/nfs4state.c8
-rw-r--r--fs/ntfs/dir.c5
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/ntfs/mft.c9
-rw-r--r--fs/ntfs/super.c28
-rw-r--r--fs/ntfs/unistr.c4
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h8
-rw-r--r--fs/ocfs2/dcache.c359
-rw-r--r--fs/ocfs2/dcache.h27
-rw-r--r--fs/ocfs2/dlm/dlmapi.h1
-rw-r--r--fs/ocfs2/dlm/dlmast.c6
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h1
-rw-r--r--fs/ocfs2/dlm/dlmfs.c6
-rw-r--r--fs/ocfs2/dlm/dlmlock.c10
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c4
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c3
-rw-r--r--fs/ocfs2/dlm/userdlm.c81
-rw-r--r--fs/ocfs2/dlm/userdlm.h1
-rw-r--r--fs/ocfs2/dlmglue.c1096
-rw-r--r--fs/ocfs2/dlmglue.h21
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/inode.c160
-rw-r--r--fs/ocfs2/inode.h8
-rw-r--r--fs/ocfs2/journal.c3
-rw-r--r--fs/ocfs2/namei.c116
-rw-r--r--fs/ocfs2/ocfs2_lockid.h25
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/ocfs2/sysfile.c6
-rw-r--r--fs/ocfs2/vote.c180
-rw-r--r--fs/ocfs2/vote.h5
-rw-r--r--fs/open.c15
-rw-r--r--fs/openpromfs/inode.c2
-rw-r--r--fs/partitions/efi.c9
-rw-r--r--fs/partitions/msdos.c31
-rw-r--r--fs/pipe.c1
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/base.c3
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/kcore.c10
-rw-r--r--fs/proc/nommu.c20
-rw-r--r--fs/proc/proc_misc.c11
-rw-r--r--fs/proc/task_mmu.c5
-rw-r--r--fs/proc/task_nommu.c74
-rw-r--r--fs/qnx4/inode.c8
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/reiserfs/Makefile2
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c18
-rw-r--r--fs/reiserfs/journal.c54
-rw-r--r--fs/reiserfs/super.c31
-rw-r--r--fs/romfs/inode.c3
-rw-r--r--fs/select.c8
-rw-r--r--fs/smbfs/inode.c5
-rw-r--r--fs/smbfs/proc.c1
-rw-r--r--fs/smbfs/request.c3
-rw-r--r--fs/stat.c3
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/bin.c13
-rw-r--r--fs/sysfs/dir.c2
-rw-r--r--fs/sysfs/inode.c12
-rw-r--r--fs/sysfs/symlink.c14
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/sysv/ialloc.c2
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/sysv/super.c6
-rw-r--r--fs/udf/ialloc.c7
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/udf/super.c7
-rw-r--r--fs/ufs/ialloc.c1
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/ufs/super.c6
-rw-r--r--fs/xfs/Makefile-linux-2.61
-rw-r--r--fs/xfs/linux-2.6/kmem.c29
-rw-r--r--fs/xfs/linux-2.6/kmem.h10
-rw-r--r--fs/xfs/linux-2.6/sema.h2
-rw-r--r--fs/xfs/linux-2.6/sv.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c51
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c29
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h14
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h2
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c26
-rw-r--r--fs/xfs/quota/xfs_qm.c14
-rw-r--r--fs/xfs/quota/xfs_qm.h6
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h2
-rw-r--r--fs/xfs/support/ktrace.c2
-rw-r--r--fs/xfs/xfs_ag.h2
-rw-r--r--fs/xfs/xfs_alloc.c10
-rw-r--r--fs/xfs/xfs_alloc_btree.c132
-rw-r--r--fs/xfs/xfs_attr.c181
-rw-r--r--fs/xfs/xfs_attr.h8
-rw-r--r--fs/xfs/xfs_attr_leaf.c351
-rw-r--r--fs/xfs/xfs_attr_leaf.h41
-rw-r--r--fs/xfs/xfs_behavior.c20
-rw-r--r--fs/xfs/xfs_behavior.h2
-rw-r--r--fs/xfs/xfs_bmap.c90
-rw-r--r--fs/xfs/xfs_bmap_btree.c113
-rw-r--r--fs/xfs/xfs_bmap_btree.h11
-rw-r--r--fs/xfs/xfs_btree.c8
-rw-r--r--fs/xfs/xfs_btree.h5
-rw-r--r--fs/xfs/xfs_buf_item.c22
-rw-r--r--fs/xfs/xfs_da_btree.c33
-rw-r--r--fs/xfs/xfs_error.h9
-rw-r--r--fs/xfs/xfs_extfree_item.c69
-rw-r--r--fs/xfs/xfs_extfree_item.h50
-rw-r--r--fs/xfs/xfs_fs.h8
-rw-r--r--fs/xfs/xfs_ialloc.c11
-rw-r--r--fs/xfs/xfs_ialloc_btree.c62
-rw-r--r--fs/xfs/xfs_ialloc_btree.h19
-rw-r--r--fs/xfs/xfs_iget.c44
-rw-r--r--fs/xfs/xfs_inode.c30
-rw-r--r--fs/xfs/xfs_inode.h12
-rw-r--r--fs/xfs/xfs_inode_item.c16
-rw-r--r--fs/xfs/xfs_inode_item.h66
-rw-r--r--fs/xfs/xfs_iomap.c89
-rw-r--r--fs/xfs/xfs_itable.c184
-rw-r--r--fs/xfs/xfs_itable.h16
-rw-r--r--fs/xfs/xfs_log.c19
-rw-r--r--fs/xfs/xfs_log.h8
-rw-r--r--fs/xfs/xfs_log_priv.h10
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_quota.h2
-rw-r--r--fs/xfs/xfs_rtalloc.c38
-rw-r--r--fs/xfs/xfs_sb.h22
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c4
-rw-r--r--fs/xfs/xfs_trans_priv.h12
-rw-r--r--fs/xfs/xfs_vfsops.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c26
295 files changed, 7621 insertions, 4941 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 22f7ccd58d38..0f628041e3f7 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -460,8 +460,10 @@ static int __init init_v9fs(void)
ret = v9fs_mux_global_init();
if (!ret)
- ret = register_filesystem(&v9fs_fs_type);
-
+ return ret;
+ ret = register_filesystem(&v9fs_fs_type);
+ if (!ret)
+ v9fs_mux_global_exit();
return ret;
}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index eae50c9d6dc4..7a7ec2d1d2f4 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -204,7 +204,6 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
- inode->i_blksize = sb->s_blocksize;
inode->i_blocks = 0;
inode->i_rdev = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -950,9 +949,8 @@ v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
inode->i_size = stat->length;
- inode->i_blksize = sb->s_blocksize;
inode->i_blocks =
- (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits;
+ (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
}
/**
diff --git a/fs/Kconfig b/fs/Kconfig
index 530581628311..4fd9efac29ab 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -826,6 +826,25 @@ config PROC_VMCORE
help
Exports the dump image of crashed kernel in ELF format.
+config PROC_SYSCTL
+ bool "Sysctl support (/proc/sys)" if EMBEDDED
+ depends on PROC_FS
+ select SYSCTL
+ default y
+ ---help---
+ The sysctl interface provides a means of dynamically changing
+ certain kernel parameters and variables on the fly without requiring
+ a recompile of the kernel or reboot of the system. The primary
+ interface is through /proc/sys. If you say Y here a tree of
+ modifiable sysctl entries will be generated beneath the
+ /proc/sys directory. They are explained in the files
+ in <file:Documentation/sysctl/>. Note that enabling this
+ option will enlarge the kernel by at least 8 KB.
+
+ As it is generally a good thing, you should say Y here unless
+ building a kernel for install/rescue disks or your system is very
+ limited in memory.
+
config SYSFS
bool "sysfs file system support" if EMBEDDED
default y
@@ -862,6 +881,19 @@ config TMPFS
See <file:Documentation/filesystems/tmpfs.txt> for details.
+config TMPFS_POSIX_ACL
+ bool "Tmpfs POSIX Access Control Lists"
+ depends on TMPFS
+ select GENERIC_ACL
+ help
+ POSIX Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the POSIX ACLs for
+ Linux website <http://acl.bestbits.at/>.
+
+ If you don't know what Access Control Lists are, say N.
+
config HUGETLBFS
bool "HugeTLB file system support"
depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
@@ -1471,8 +1503,8 @@ config NFS_V4
If unsure, say N.
config NFS_DIRECTIO
- bool "Allow direct I/O on NFS files (EXPERIMENTAL)"
- depends on NFS_FS && EXPERIMENTAL
+ bool "Allow direct I/O on NFS files"
+ depends on NFS_FS
help
This option enables applications to perform uncached I/O on files
in NFS file systems using the O_DIRECT open() flag. When O_DIRECT
@@ -1921,6 +1953,10 @@ config 9P_FS
If unsure, say N.
+config GENERIC_ACL
+ bool
+ select FS_POSIX_ACL
+
endmenu
menu "Partition Types"
diff --git a/fs/Makefile b/fs/Makefile
index 89135428a539..46b8cfe497b2 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o
obj-$(CONFIG_FS_MBCACHE) += mbcache.o
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
+obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
obj-$(CONFIG_QUOTA) += dquot.o
obj-$(CONFIG_QFMT_V1) += quota_v1.o
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 534f3eecc985..7e7a04be1278 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -269,7 +269,6 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
inode->i_ino = obj->file_id;
inode->i_size = obj->size;
inode->i_nlink = 2;
- inode->i_blksize = PAGE_SIZE;
inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >>
sb->s_blocksize_bits;
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 82011019494c..9ade139086fc 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -251,8 +251,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(adfs_inode_cachep))
- printk(KERN_INFO "adfs_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(adfs_inode_cachep);
}
static struct super_operations adfs_sops = {
@@ -339,11 +338,10 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags |= MS_NODIRATIME;
- asb = kmalloc(sizeof(*asb), GFP_KERNEL);
+ asb = kzalloc(sizeof(*asb), GFP_KERNEL);
if (!asb)
return -ENOMEM;
sb->s_fs_info = asb;
- memset(asb, 0, sizeof(*asb));
/* set default options */
asb->s_uid = 0;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0ddd4cc0d1a0..1dc8438ef389 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -1,7 +1,6 @@
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
-#include <linux/affs_fs.h>
#include <linux/amigaffs.h>
/* AmigaOS allows file names with up to 30 characters length.
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 5200f4938df0..5ea72c3a16c3 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/statfs.h>
#include <linux/parser.h>
+#include <linux/magic.h>
#include "affs.h"
extern struct timezone sys_tz;
@@ -108,8 +109,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(affs_inode_cachep))
- printk(KERN_INFO "affs_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(affs_inode_cachep);
}
static struct super_operations affs_sops = {
@@ -279,11 +279,10 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &affs_sops;
sb->s_flags |= MS_NODIRATIME;
- sbi = kmalloc(sizeof(struct affs_sb_info), GFP_KERNEL);
+ sbi = kzalloc(sizeof(struct affs_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
- memset(sbi, 0, sizeof(*sbi));
init_MUTEX(&sbi->s_bmlock);
if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4ebb30a50ed5..6f37754906c2 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -72,7 +72,6 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
inode->i_ctime.tv_sec = vnode->status.mtime_server;
inode->i_ctime.tv_nsec = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_version = vnode->fid.unique;
inode->i_mapping->a_ops = &afs_fs_aops;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 101d21b6c037..86463ec9ccb4 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -775,6 +775,7 @@ static int afs_proc_cell_servers_release(struct inode *inode,
* first item
*/
static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
+ __acquires(m->private->sv_lock)
{
struct list_head *_p;
struct afs_cell *cell = m->private;
@@ -823,6 +824,7 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
* clean up after reading from the cells list
*/
static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
+ __releases(p->private->sv_lock)
{
struct afs_cell *cell = p->private;
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 331f730a1fb3..782ee7c600ca 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -281,11 +281,10 @@ int afs_vlocation_lookup(struct afs_cell *cell,
spin_unlock(&cell->vl_gylock);
/* not in the cell's in-memory lists - create a new record */
- vlocation = kmalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
+ vlocation = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
if (!vlocation)
return -ENOMEM;
- memset(vlocation, 0, sizeof(struct afs_vlocation));
atomic_set(&vlocation->usage, 1);
INIT_LIST_HEAD(&vlocation->link);
rwlock_init(&vlocation->lock);
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 0ff4b86476e3..768c6dbd323a 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -186,11 +186,10 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
_debug("creating new volume record");
ret = -ENOMEM;
- volume = kmalloc(sizeof(struct afs_volume), GFP_KERNEL);
+ volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
if (!volume)
goto error_up;
- memset(volume, 0, sizeof(struct afs_volume));
atomic_set(&volume->usage, 1);
volume->type = type;
volume->type_force = force;
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index a62327f1bdff..c7700d9b3f96 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -37,8 +37,6 @@
#define DPRINTK(D) ((void)0)
#endif
-#define AUTOFS_SUPER_MAGIC 0x0187
-
/*
* If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
* kernel will keep the negative response cached for up to the time given
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 65e5ed42190e..2c9759baad61 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -16,6 +16,7 @@
#include <linux/file.h>
#include <linux/parser.h>
#include <linux/bitops.h>
+#include <linux/magic.h>
#include "autofs_i.h"
#include <linux/module.h>
@@ -128,10 +129,9 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
struct autofs_sb_info *sbi;
int minproto, maxproto;
- sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if ( !sbi )
goto fail_unlock;
- memset(sbi, 0, sizeof(*sbi));
DPRINTK(("autofs: starting up, sbi = %p\n",sbi));
s->s_fs_info = sbi;
@@ -216,7 +216,6 @@ static void autofs_read_inode(struct inode *inode)
inode->i_nlink = 2;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_blocks = 0;
- inode->i_blksize = 1024;
if ( ino == AUTOFS_ROOT_INO ) {
inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
@@ -241,7 +240,7 @@ static void autofs_read_inode(struct inode *inode)
inode->i_op = &autofs_symlink_inode_operations;
sl = &sbi->symlink[n];
- inode->u.generic_ip = sl;
+ inode->i_private = sl;
inode->i_mode = S_IFLNK | S_IRWXUGO;
inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = sl->mtime;
inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
index 52e8772b066e..c74f2eb65775 100644
--- a/fs/autofs/symlink.c
+++ b/fs/autofs/symlink.c
@@ -15,7 +15,7 @@
/* Nothing to release.. */
static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
- char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data;
+ char *s=((struct autofs_symlink *)dentry->d_inode->i_private)->data;
nd_set_link(nd, s);
return NULL;
}
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d6603d02304c..480ab178cba5 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -40,8 +40,6 @@
#define DPRINTK(fmt,args...) do {} while(0)
#endif
-#define AUTOFS_SUPER_MAGIC 0x0187
-
/* Unified info structure. This is pointed to by both the dentry and
inode structures. Each file in the filesystem has an instance of this
structure. It holds a reference to the dentry, so dentries are never
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 8dbd44f10e9d..d96e5c14a9ca 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -32,7 +32,7 @@ static inline int autofs4_can_expire(struct dentry *dentry,
if (!do_now) {
/* Too young to die */
- if (time_after(ino->last_used + timeout, now))
+ if (!timeout || time_after(ino->last_used + timeout, now))
return 0;
/* update last_used here :-
@@ -253,7 +253,7 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
struct dentry *root = dget(sb->s_root);
int do_now = how & AUTOFS_EXP_IMMEDIATE;
- if (!sbi->exp_timeout || !root)
+ if (!root)
return NULL;
now = jiffies;
@@ -293,7 +293,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
int do_now = how & AUTOFS_EXP_IMMEDIATE;
int exp_leaves = how & AUTOFS_EXP_LEAVES;
- if ( !sbi->exp_timeout || !root )
+ if (!root)
return NULL;
now = jiffies;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index fde78b110ddd..800ce876caec 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -19,6 +19,7 @@
#include <linux/parser.h>
#include <linux/bitops.h>
#include <linux/smp_lock.h>
+#include <linux/magic.h>
#include "autofs_i.h"
#include <linux/module.h>
@@ -446,7 +447,6 @@ struct inode *autofs4_get_inode(struct super_block *sb,
inode->i_uid = 0;
inode->i_gid = 0;
}
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 5100f984783f..563ef9d7da9f 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -137,7 +137,9 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
nd.flags = LOOKUP_DIRECTORY;
ret = (dentry->d_op->d_revalidate)(dentry, &nd);
- if (!ret) {
+ if (ret <= 0) {
+ if (ret < 0)
+ status = ret;
dcache_dir_close(inode, file);
goto out;
}
@@ -279,9 +281,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
DPRINTK("mount done status=%d", status);
- if (status && dentry->d_inode)
- return status; /* Try to get the kernel to invalidate this dentry */
-
/* Turn this into a real negative dentry? */
if (status == -ENOENT) {
spin_lock(&dentry->d_lock);
@@ -357,7 +356,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
* don't try to mount it again.
*/
spin_lock(&dcache_lock);
- if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
+ if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
spin_unlock(&dcache_lock);
status = try_to_fill_dentry(dentry, 0);
@@ -400,13 +399,23 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
int oz_mode = autofs4_oz_mode(sbi);
int flags = nd ? nd->flags : 0;
- int status = 0;
+ int status = 1;
/* Pending dentry */
if (autofs4_ispending(dentry)) {
- if (!oz_mode)
- status = try_to_fill_dentry(dentry, flags);
- return !status;
+ /* The daemon never causes a mount to trigger */
+ if (oz_mode)
+ return 1;
+
+ /*
+ * A zero status is success otherwise we have a
+ * negative error code.
+ */
+ status = try_to_fill_dentry(dentry, flags);
+ if (status == 0)
+ return 1;
+
+ return status;
}
/* Negative dentry.. invalidate if "old" */
@@ -421,9 +430,19 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
DPRINTK("dentry=%p %.*s, emptydir",
dentry, dentry->d_name.len, dentry->d_name.name);
spin_unlock(&dcache_lock);
- if (!oz_mode)
- status = try_to_fill_dentry(dentry, flags);
- return !status;
+ /* The daemon never causes a mount to trigger */
+ if (oz_mode)
+ return 1;
+
+ /*
+ * A zero status is success otherwise we have a
+ * negative error code.
+ */
+ status = try_to_fill_dentry(dentry, flags);
+ if (status == 0)
+ return 1;
+
+ return status;
}
spin_unlock(&dcache_lock);
@@ -518,6 +537,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
return ERR_PTR(-ERESTARTNOINTR);
}
}
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
+ spin_unlock(&dentry->d_lock);
}
/*
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 50cfca5c7efd..57020c7a7e65 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -365,7 +365,6 @@ befs_read_inode(struct inode *inode)
inode->i_mtime.tv_nsec = 0; /* lower 16 bits are not a time */
inode->i_ctime = inode->i_mtime;
inode->i_atime = inode->i_mtime;
- inode->i_blksize = befs_sb->block_size;
befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num);
befs_ino->i_parent = fsrun_to_cpu(sb, raw_inode->parent);
@@ -446,9 +445,7 @@ befs_init_inodecache(void)
static void
befs_destroy_inodecache(void)
{
- if (kmem_cache_destroy(befs_inode_cachep))
- printk(KERN_ERR "befs_destroy_inodecache: "
- "not all structures were freed\n");
+ kmem_cache_destroy(befs_inode_cachep);
}
/*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 26fad9621738..dcf04cb13283 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -102,7 +102,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode,
inode->i_uid = current->fsuid;
inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
- inode->i_blocks = inode->i_blksize = 0;
+ inode->i_blocks = 0;
inode->i_op = &bfs_file_inops;
inode->i_fop = &bfs_file_operations;
inode->i_mapping->a_ops = &bfs_aops;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index cf74f3d4d966..ed27ffb3459e 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -76,7 +76,6 @@ static void bfs_read_inode(struct inode * inode)
inode->i_size = BFS_FILESIZE(di);
inode->i_blocks = BFS_FILEBLOCKS(di);
if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks);
- inode->i_blksize = PAGE_SIZE;
inode->i_atime.tv_sec = le32_to_cpu(di->i_atime);
inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime);
inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime);
@@ -268,8 +267,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(bfs_inode_cachep))
- printk(KERN_INFO "bfs_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(bfs_inode_cachep);
}
static struct super_operations bfs_sops = {
@@ -311,11 +309,10 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
unsigned i, imap_len;
struct bfs_sb_info * info;
- info = kmalloc(sizeof(*info), GFP_KERNEL);
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
s->s_fs_info = info;
- memset(info, 0, sizeof(*info));
sb_set_blocksize(s, BFS_BSIZE);
@@ -338,10 +335,9 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
+ BFS_ROOT_INO - 1;
imap_len = info->si_lasti/8 + 1;
- info->si_imap = kmalloc(imap_len, GFP_KERNEL);
+ info->si_imap = kzalloc(imap_len, GFP_KERNEL);
if (!info->si_imap)
goto out;
- memset(info->si_imap, 0, imap_len);
for (i=0; i<BFS_ROOT_INO; i++)
set_bit(i, info->si_imap);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index f312103434d4..517e111bb7ef 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -278,6 +278,13 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
return -ENOEXEC;
}
+ /*
+ * Requires a mmap handler. This prevents people from using a.out
+ * as part of an exploit attack against /proc-related vulnerabilities.
+ */
+ if (!bprm->file->f_op || !bprm->file->f_op->mmap)
+ return -ENOEXEC;
+
fd_offset = N_TXTOFF(ex);
/* Check initial limits. This avoids letting people circumvent
@@ -476,6 +483,13 @@ static int load_aout_library(struct file *file)
goto out;
}
+ /*
+ * Requires a mmap handler. This prevents people from using a.out
+ * as part of an exploit attack against /proc-related vulnerabilities.
+ */
+ if (!file->f_op || !file->f_op->mmap)
+ goto out;
+
if (N_FLAGS(ex))
goto out;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 672a3b90bc55..6eb48e1446ec 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -515,7 +515,8 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
{
unsigned int random_variable = 0;
- if (current->flags & PF_RANDOMIZE) {
+ if ((current->flags & PF_RANDOMIZE) &&
+ !(current->personality & ADDR_NO_RANDOMIZE)) {
random_variable = get_random_int() & STACK_RND_MASK;
random_variable <<= PAGE_SHIFT;
}
@@ -1037,10 +1038,8 @@ out_free_interp:
out_free_file:
sys_close(elf_exec_fileno);
out_free_fh:
- if (files) {
- put_files_struct(current->files);
- current->files = files;
- }
+ if (files)
+ reset_files_struct(current, files);
out_free_ph:
kfree(elf_phdata);
goto out;
@@ -1262,7 +1261,7 @@ static void fill_elf_header(struct elfhdr *elf, int segs)
return;
}
-static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
+static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
{
phdr->p_type = PT_NOTE;
phdr->p_offset = offset;
@@ -1428,7 +1427,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
int i;
struct vm_area_struct *vma;
struct elfhdr *elf = NULL;
- off_t offset = 0, dataoff;
+ loff_t offset = 0, dataoff;
unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
int numnote;
struct memelfnote *notes = NULL;
@@ -1480,20 +1479,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
if (signr) {
struct elf_thread_status *tmp;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
do_each_thread(g,p)
if (current->mm == p->mm && current != p) {
tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
if (!tmp) {
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
goto cleanup;
}
- INIT_LIST_HEAD(&tmp->list);
tmp->thread = p;
list_add(&tmp->list, &thread_list);
}
while_each_thread(g,p);
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
list_for_each(t, &thread_list) {
struct elf_thread_status *tmp;
int sz;
@@ -1661,11 +1659,11 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
ELF_CORE_WRITE_EXTRA_DATA;
#endif
- if ((off_t)file->f_pos != offset) {
+ if (file->f_pos != offset) {
/* Sanity check */
printk(KERN_WARNING
- "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
- (off_t)file->f_pos, offset);
+ "elf_core_dump: file->f_pos (%Ld) != offset (%Ld)\n",
+ file->f_pos, offset);
}
end_coredump:
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2f3365829229..f86d5c9ce5eb 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1597,20 +1597,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
if (signr) {
struct elf_thread_status *tmp;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
do_each_thread(g,p)
if (current->mm == p->mm && current != p) {
tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
if (!tmp) {
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
goto cleanup;
}
- INIT_LIST_HEAD(&tmp->list);
tmp->thread = p;
list_add(&tmp->list, &thread_list);
}
while_each_thread(g,p);
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
list_for_each(t, &thread_list) {
struct elf_thread_status *tmp;
int sz;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 34ebbc191e46..1713c48fef54 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -215,10 +215,8 @@ _error:
bprm->interp_flags = 0;
bprm->interp_data = 0;
_unshare:
- if (files) {
- put_files_struct(current->files);
- current->files = files;
- }
+ if (files)
+ reset_files_struct(current, files);
goto _ret;
}
@@ -507,7 +505,6 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
inode->i_mode = mode;
inode->i_uid = 0;
inode->i_gid = 0;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime =
current_fs_time(inode->i_sb);
@@ -517,7 +514,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
static void bm_clear_inode(struct inode *inode)
{
- kfree(inode->u.generic_ip);
+ kfree(inode->i_private);
}
static void kill_node(Node *e)
@@ -545,7 +542,7 @@ static void kill_node(Node *e)
static ssize_t
bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
{
- Node *e = file->f_dentry->d_inode->u.generic_ip;
+ Node *e = file->f_dentry->d_inode->i_private;
loff_t pos = *ppos;
ssize_t res;
char *page;
@@ -579,7 +576,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
struct dentry *root;
- Node *e = file->f_dentry->d_inode->u.generic_ip;
+ Node *e = file->f_dentry->d_inode->i_private;
int res = parse_command(buffer, count);
switch (res) {
@@ -646,7 +643,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
}
e->dentry = dget(dentry);
- inode->u.generic_ip = e;
+ inode->i_private = e;
inode->i_fop = &bm_entry_operations;
d_instantiate(dentry, inode);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 045f98854f14..4346468139e8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -543,11 +543,11 @@ static struct kobject *bdev_get_holder(struct block_device *bdev)
return kobject_get(bdev->bd_disk->holder_dir);
}
-static void add_symlink(struct kobject *from, struct kobject *to)
+static int add_symlink(struct kobject *from, struct kobject *to)
{
if (!from || !to)
- return;
- sysfs_create_link(from, to, kobject_name(to));
+ return 0;
+ return sysfs_create_link(from, to, kobject_name(to));
}
static void del_symlink(struct kobject *from, struct kobject *to)
@@ -648,30 +648,38 @@ static void free_bd_holder(struct bd_holder *bo)
* If there is no matching entry with @bo in @bdev->bd_holder_list,
* add @bo to the list, create symlinks.
*
- * Returns 1 if @bo was added to the list.
- * Returns 0 if @bo wasn't used by any reason and should be freed.
+ * Returns 0 if symlinks are created or already there.
+ * Returns -ve if something fails and @bo can be freed.
*/
static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
{
struct bd_holder *tmp;
+ int ret;
if (!bo)
- return 0;
+ return -EINVAL;
list_for_each_entry(tmp, &bdev->bd_holder_list, list) {
if (tmp->sdir == bo->sdir) {
tmp->count++;
+ /* We've already done what we need to do here. */
+ free_bd_holder(bo);
return 0;
}
}
if (!bd_holder_grab_dirs(bdev, bo))
- return 0;
+ return -EBUSY;
- add_symlink(bo->sdir, bo->sdev);
- add_symlink(bo->hdir, bo->hdev);
- list_add_tail(&bo->list, &bdev->bd_holder_list);
- return 1;
+ ret = add_symlink(bo->sdir, bo->sdev);
+ if (ret == 0) {
+ ret = add_symlink(bo->hdir, bo->hdev);
+ if (ret)
+ del_symlink(bo->sdir, bo->sdev);
+ }
+ if (ret == 0)
+ list_add_tail(&bo->list, &bdev->bd_holder_list);
+ return ret;
}
/**
@@ -741,7 +749,9 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
res = bd_claim(bdev, holder);
- if (res || !add_bd_holder(bdev, bo))
+ if (res == 0)
+ res = add_bd_holder(bdev, bo);
+ if (res)
free_bd_holder(bo);
mutex_unlock(&bdev->bd_mutex);
@@ -1021,7 +1031,7 @@ do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
rescan_partitions(bdev->bd_disk, bdev);
} else {
mutex_lock_nested(&bdev->bd_contains->bd_mutex,
- BD_MUTEX_PARTITION);
+ BD_MUTEX_WHOLE);
bdev->bd_contains->bd_part_count++;
mutex_unlock(&bdev->bd_contains->bd_mutex);
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 71649ef9b658..3b6d701073e7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2987,6 +2987,7 @@ int try_to_free_buffers(struct page *page)
spin_lock(&mapping->private_lock);
ret = drop_buffers(page, &buffers_to_free);
+ spin_unlock(&mapping->private_lock);
if (ret) {
/*
* If the filesystem writes its buffers by hand (eg ext3)
@@ -2998,7 +2999,6 @@ int try_to_free_buffers(struct page *page)
*/
clear_page_dirty(page);
}
- spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {
struct buffer_head *bh = buffers_to_free;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 3483d3cf8087..1f3285affa39 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -19,11 +19,30 @@
#include <linux/kobj_map.h>
#include <linux/cdev.h>
#include <linux/mutex.h>
+#include <linux/backing-dev.h>
#ifdef CONFIG_KMOD
#include <linux/kmod.h>
#endif
+/*
+ * capabilities for /dev/mem, /dev/kmem and similar directly mappable character
+ * devices
+ * - permits shared-mmap for read, write and/or exec
+ * - does not permit private mmap in NOMMU mode (can't do COW)
+ * - no readahead or I/O queue unplugging required
+ */
+struct backing_dev_info directly_mappable_cdev_bdi = {
+ .capabilities = (
+#ifdef CONFIG_MMU
+ /* permit private copies of the data to be taken */
+ BDI_CAP_MAP_COPY |
+#endif
+ /* permit direct mmap, for read, write or exec */
+ BDI_CAP_MAP_DIRECT |
+ BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP),
+};
+
static struct kobj_map *cdev_map;
static DEFINE_MUTEX(chrdevs_lock);
@@ -109,13 +128,31 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
if ((*cp)->major > major ||
- ((*cp)->major == major && (*cp)->baseminor >= baseminor))
+ ((*cp)->major == major &&
+ (((*cp)->baseminor >= baseminor) ||
+ ((*cp)->baseminor + (*cp)->minorct > baseminor))))
break;
- if (*cp && (*cp)->major == major &&
- (*cp)->baseminor < baseminor + minorct) {
- ret = -EBUSY;
- goto out;
+
+ /* Check for overlapping minor ranges. */
+ if (*cp && (*cp)->major == major) {
+ int old_min = (*cp)->baseminor;
+ int old_max = (*cp)->baseminor + (*cp)->minorct - 1;
+ int new_min = baseminor;
+ int new_max = baseminor + minorct - 1;
+
+ /* New driver overlaps from the left. */
+ if (new_max >= old_min && new_max <= old_max) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ /* New driver overlaps from the right. */
+ if (new_min <= old_max && new_min >= old_min) {
+ ret = -EBUSY;
+ goto out;
+ }
}
+
cd->next = *cp;
*cp = cd;
mutex_unlock(&chrdevs_lock);
@@ -146,6 +183,15 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
return cd;
}
+/**
+ * register_chrdev_region() - register a range of device numbers
+ * @from: the first in the desired range of device numbers; must include
+ * the major number.
+ * @count: the number of consecutive device numbers required
+ * @name: the name of the device or driver.
+ *
+ * Return value is zero on success, a negative error code on failure.
+ */
int register_chrdev_region(dev_t from, unsigned count, const char *name)
{
struct char_device_struct *cd;
@@ -171,6 +217,17 @@ fail:
return PTR_ERR(cd);
}
+/**
+ * alloc_chrdev_region() - register a range of char device numbers
+ * @dev: output parameter for first assigned number
+ * @baseminor: first of the requested range of minor numbers
+ * @count: the number of minor numbers required
+ * @name: the name of the associated device or driver
+ *
+ * Allocates a range of char device numbers. The major number will be
+ * chosen dynamically, and returned (along with the first minor number)
+ * in @dev. Returns zero or a negative error code.
+ */
int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
const char *name)
{
@@ -240,6 +297,15 @@ out2:
return err;
}
+/**
+ * unregister_chrdev_region() - return a range of device numbers
+ * @from: the first in the range of numbers to unregister
+ * @count: the number of device numbers to unregister
+ *
+ * This function will unregister a range of @count device numbers,
+ * starting with @from. The caller should normally be the one who
+ * allocated those numbers in the first place...
+ */
void unregister_chrdev_region(dev_t from, unsigned count)
{
dev_t to = from + count;
@@ -377,6 +443,16 @@ static int exact_lock(dev_t dev, void *data)
return cdev_get(p) ? 0 : -1;
}
+/**
+ * cdev_add() - add a char device to the system
+ * @p: the cdev structure for the device
+ * @dev: the first device number for which this device is responsible
+ * @count: the number of consecutive minor numbers corresponding to this
+ * device
+ *
+ * cdev_add() adds the device represented by @p to the system, making it
+ * live immediately. A negative error code is returned on failure.
+ */
int cdev_add(struct cdev *p, dev_t dev, unsigned count)
{
p->dev = dev;
@@ -389,6 +465,13 @@ static void cdev_unmap(dev_t dev, unsigned count)
kobj_unmap(cdev_map, dev, count);
}
+/**
+ * cdev_del() - remove a cdev from the system
+ * @p: the cdev structure to be removed
+ *
+ * cdev_del() removes @p from the system, possibly freeing the structure
+ * itself.
+ */
void cdev_del(struct cdev *p)
{
cdev_unmap(p->dev, p->count);
@@ -417,6 +500,11 @@ static struct kobj_type ktype_cdev_dynamic = {
.release = cdev_dynamic_release,
};
+/**
+ * cdev_alloc() - allocate a cdev structure
+ *
+ * Allocates and returns a cdev structure, or NULL on failure.
+ */
struct cdev *cdev_alloc(void)
{
struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL);
@@ -428,6 +516,14 @@ struct cdev *cdev_alloc(void)
return p;
}
+/**
+ * cdev_init() - initialize a cdev structure
+ * @cdev: the structure to initialize
+ * @fops: the file_operations for this device
+ *
+ * Initializes @cdev, remembering @fops, making it ready to add to the
+ * system with cdev_add().
+ */
void cdev_init(struct cdev *cdev, const struct file_operations *fops)
{
memset(cdev, 0, sizeof *cdev);
@@ -461,3 +557,4 @@ EXPORT_SYMBOL(cdev_del);
EXPORT_SYMBOL(cdev_add);
EXPORT_SYMBOL(register_chrdev);
EXPORT_SYMBOL(unregister_chrdev);
+EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c3ef1c0d0e68..22bcf4d7e7ae 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -253,7 +253,6 @@ cifs_alloc_inode(struct super_block *sb)
file data or metadata */
cifs_inode->clientCanCacheRead = FALSE;
cifs_inode->clientCanCacheAll = FALSE;
- cifs_inode->vfs_inode.i_blksize = CIFS_MAX_MSGSIZE;
cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;
INIT_LIST_HEAD(&cifs_inode->openFileList);
@@ -699,8 +698,7 @@ cifs_init_inodecache(void)
static void
cifs_destroy_inodecache(void)
{
- if (kmem_cache_destroy(cifs_inode_cachep))
- printk(KERN_WARNING "cifs_inode_cache: error freeing\n");
+ kmem_cache_destroy(cifs_inode_cachep);
}
static int
@@ -778,13 +776,9 @@ static void
cifs_destroy_request_bufs(void)
{
mempool_destroy(cifs_req_poolp);
- if (kmem_cache_destroy(cifs_req_cachep))
- printk(KERN_WARNING
- "cifs_destroy_request_cache: error not all structures were freed\n");
+ kmem_cache_destroy(cifs_req_cachep);
mempool_destroy(cifs_sm_req_poolp);
- if (kmem_cache_destroy(cifs_sm_req_cachep))
- printk(KERN_WARNING
- "cifs_destroy_request_cache: cifs_small_rq free error\n");
+ kmem_cache_destroy(cifs_sm_req_cachep);
}
static int
@@ -819,13 +813,8 @@ static void
cifs_destroy_mids(void)
{
mempool_destroy(cifs_mid_poolp);
- if (kmem_cache_destroy(cifs_mid_cachep))
- printk(KERN_WARNING
- "cifs_destroy_mids: error not all structures were freed\n");
-
- if (kmem_cache_destroy(cifs_oplock_cachep))
- printk(KERN_WARNING
- "error not all oplock structures were freed\n");
+ kmem_cache_destroy(cifs_mid_cachep);
+ kmem_cache_destroy(cifs_oplock_cachep);
}
static int cifs_oplock_thread(void * dummyarg)
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 9aeb58a7d369..b27b34537bf2 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -216,10 +216,9 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
if (allocation_size < end_of_file)
cFYI(1, ("May be sparse file, allocation less than file size"));
- cFYI(1, ("File Size %ld and blocks %llu and blocksize %ld",
+ cFYI(1, ("File Size %ld and blocks %llu",
(unsigned long)tmp_inode->i_size,
- (unsigned long long)tmp_inode->i_blocks,
- tmp_inode->i_blksize));
+ (unsigned long long)tmp_inode->i_blocks));
if (S_ISREG(tmp_inode->i_mode)) {
cFYI(1, ("File inode"));
tmp_inode->i_op = &cifs_file_inode_ops;
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 5597080cb811..95a54253c047 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -110,8 +110,6 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
inode->i_nlink = attr->va_nlink;
if (attr->va_size != -1)
inode->i_size = attr->va_size;
- if (attr->va_blocksize != -1)
- inode->i_blksize = attr->va_blocksize;
if (attr->va_size != -1)
inode->i_blocks = (attr->va_size + 511) >> 9;
if (attr->va_atime.tv_sec != -1)
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 71f2ea632e53..8651ea6a23b7 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -513,7 +513,7 @@ static int coda_venus_readdir(struct file *filp, filldir_t filldir,
ino_t ino;
int ret, i;
- vdir = (struct venus_dirent *)kmalloc(sizeof(*vdir), GFP_KERNEL);
+ vdir = kmalloc(sizeof(*vdir), GFP_KERNEL);
if (!vdir) return -ENOMEM;
i = filp->f_pos;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 87f1dc8aa24b..88d123321164 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -80,8 +80,7 @@ int coda_init_inodecache(void)
void coda_destroy_inodecache(void)
{
- if (kmem_cache_destroy(coda_inode_cachep))
- printk(KERN_INFO "coda_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(coda_inode_cachep);
}
static int coda_remount(struct super_block *sb, int *flags, char *data)
diff --git a/fs/compat.c b/fs/compat.c
index e31e9cf96647..ce982f6e8c80 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1855,7 +1855,7 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
} while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
- if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
+ if (ret == 0 && tsp && !(current->personality & STICKY_TIMEOUTS)) {
struct compat_timespec rts;
rts.tv_sec = timeout / HZ;
@@ -1866,7 +1866,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
}
if (compat_timespec_compare(&rts, &ts) >= 0)
rts = ts;
- copy_to_user(tsp, &rts, sizeof(rts));
+ if (copy_to_user(tsp, &rts, sizeof(rts)))
+ ret = -EFAULT;
}
if (ret == -ERESTARTNOHAND) {
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index f499803743e0..85105e50f7db 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -274,9 +274,8 @@ static int check_perm(struct inode * inode, struct file * file)
/* No error? Great, allocate a buffer for the file, and store it
* it in file->private_data for easy access.
*/
- buffer = kmalloc(sizeof(struct configfs_buffer),GFP_KERNEL);
+ buffer = kzalloc(sizeof(struct configfs_buffer),GFP_KERNEL);
if (buffer) {
- memset(buffer,0,sizeof(struct configfs_buffer));
init_MUTEX(&buffer->sem);
buffer->needs_read_fill = 1;
buffer->ops = ops;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index e14488ca6411..fb18917954a9 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -76,11 +76,10 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
if (!sd_iattr) {
/* setting attributes for the first time, allocate now */
- sd_iattr = kmalloc(sizeof(struct iattr), GFP_KERNEL);
+ sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
if (!sd_iattr)
return -ENOMEM;
/* assign default attributes */
- memset(sd_iattr, 0, sizeof(struct iattr));
sd_iattr->ia_mode = sd->s_mode;
sd_iattr->ia_uid = 0;
sd_iattr->ia_gid = 0;
@@ -136,7 +135,6 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
{
struct inode * inode = new_inode(configfs_sb);
if (inode) {
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_mapping->a_ops = &configfs_aops;
inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 223c0431042d..a624c3ec8189 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -73,7 +73,6 @@ static int cramfs_iget5_set(struct inode *inode, void *opaque)
inode->i_uid = cramfs_inode->uid;
inode->i_size = cramfs_inode->size;
inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_gid = cramfs_inode->gid;
/* Struct copy intentional */
inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
@@ -242,11 +241,10 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags |= MS_RDONLY;
- sbi = kmalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL);
+ sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
- memset(sbi, 0, sizeof(struct cramfs_sb_info));
/* Invalidate the read buffers on mount: think disk change.. */
mutex_lock(&read_mutex);
@@ -545,8 +543,15 @@ static struct file_system_type cramfs_fs_type = {
static int __init init_cramfs_fs(void)
{
- cramfs_uncompress_init();
- return register_filesystem(&cramfs_fs_type);
+ int rv;
+
+ rv = cramfs_uncompress_init();
+ if (rv < 0)
+ return rv;
+ rv = register_filesystem(&cramfs_fs_type);
+ if (rv < 0)
+ cramfs_uncompress_exit();
+ return rv;
}
static void __exit exit_cramfs_fs(void)
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 8def89f2c438..fc3ccb74626f 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -68,11 +68,10 @@ int cramfs_uncompress_init(void)
return 0;
}
-int cramfs_uncompress_exit(void)
+void cramfs_uncompress_exit(void)
{
if (!--initialized) {
zlib_inflateEnd(&stream);
vfree(stream.workspace);
}
- return 0;
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 1b4a3a34ec57..17b392a2049e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -828,17 +828,19 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
* (or otherwise set) by the caller to indicate that it is now
* in use by the dcache.
*/
-struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+static struct dentry *__d_instantiate_unique(struct dentry *entry,
+ struct inode *inode)
{
struct dentry *alias;
int len = entry->d_name.len;
const char *name = entry->d_name.name;
unsigned int hash = entry->d_name.hash;
- BUG_ON(!list_empty(&entry->d_alias));
- spin_lock(&dcache_lock);
- if (!inode)
- goto do_negative;
+ if (!inode) {
+ entry->d_inode = NULL;
+ return NULL;
+ }
+
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct qstr *qstr = &alias->d_name;
@@ -851,19 +853,35 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
if (memcmp(qstr->name, name, len))
continue;
dget_locked(alias);
- spin_unlock(&dcache_lock);
- BUG_ON(!d_unhashed(alias));
- iput(inode);
return alias;
}
+
list_add(&entry->d_alias, &inode->i_dentry);
-do_negative:
entry->d_inode = inode;
fsnotify_d_instantiate(entry, inode);
- spin_unlock(&dcache_lock);
- security_d_instantiate(entry, inode);
return NULL;
}
+
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+ struct dentry *result;
+
+ BUG_ON(!list_empty(&entry->d_alias));
+
+ spin_lock(&dcache_lock);
+ result = __d_instantiate_unique(entry, inode);
+ spin_unlock(&dcache_lock);
+
+ if (!result) {
+ security_d_instantiate(entry, inode);
+ return NULL;
+ }
+
+ BUG_ON(!d_unhashed(result));
+ iput(inode);
+ return result;
+}
+
EXPORT_SYMBOL(d_instantiate_unique);
/**
@@ -1235,6 +1253,11 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
hlist_add_head_rcu(&entry->d_hash, list);
}
+static void _d_rehash(struct dentry * entry)
+{
+ __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
+}
+
/**
* d_rehash - add an entry back to the hash
* @entry: dentry to add to the hash
@@ -1244,11 +1267,9 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
void d_rehash(struct dentry * entry)
{
- struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-
spin_lock(&dcache_lock);
spin_lock(&entry->d_lock);
- __d_rehash(entry, list);
+ _d_rehash(entry);
spin_unlock(&entry->d_lock);
spin_unlock(&dcache_lock);
}
@@ -1386,6 +1407,120 @@ already_unhashed:
spin_unlock(&dcache_lock);
}
+/*
+ * Prepare an anonymous dentry for life in the superblock's dentry tree as a
+ * named dentry in place of the dentry to be replaced.
+ */
+static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
+{
+ struct dentry *dparent, *aparent;
+
+ switch_names(dentry, anon);
+ do_switch(dentry->d_name.len, anon->d_name.len);
+ do_switch(dentry->d_name.hash, anon->d_name.hash);
+
+ dparent = dentry->d_parent;
+ aparent = anon->d_parent;
+
+ dentry->d_parent = (aparent == anon) ? dentry : aparent;
+ list_del(&dentry->d_u.d_child);
+ if (!IS_ROOT(dentry))
+ list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+ else
+ INIT_LIST_HEAD(&dentry->d_u.d_child);
+
+ anon->d_parent = (dparent == dentry) ? anon : dparent;
+ list_del(&anon->d_u.d_child);
+ if (!IS_ROOT(anon))
+ list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
+ else
+ INIT_LIST_HEAD(&anon->d_u.d_child);
+
+ anon->d_flags &= ~DCACHE_DISCONNECTED;
+}
+
+/**
+ * d_materialise_unique - introduce an inode into the tree
+ * @dentry: candidate dentry
+ * @inode: inode to bind to the dentry, to which aliases may be attached
+ *
+ * Introduces an dentry into the tree, substituting an extant disconnected
+ * root directory alias in its place if there is one
+ */
+struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
+{
+ struct dentry *alias, *actual;
+
+ BUG_ON(!d_unhashed(dentry));
+
+ spin_lock(&dcache_lock);
+
+ if (!inode) {
+ actual = dentry;
+ dentry->d_inode = NULL;
+ goto found_lock;
+ }
+
+ /* See if a disconnected directory already exists as an anonymous root
+ * that we should splice into the tree instead */
+ if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
+ spin_lock(&alias->d_lock);
+
+ /* Is this a mountpoint that we could splice into our tree? */
+ if (IS_ROOT(alias))
+ goto connect_mountpoint;
+
+ if (alias->d_name.len == dentry->d_name.len &&
+ alias->d_parent == dentry->d_parent &&
+ memcmp(alias->d_name.name,
+ dentry->d_name.name,
+ dentry->d_name.len) == 0)
+ goto replace_with_alias;
+
+ spin_unlock(&alias->d_lock);
+
+ /* Doh! Seem to be aliasing directories for some reason... */
+ dput(alias);
+ }
+
+ /* Add a unique reference */
+ actual = __d_instantiate_unique(dentry, inode);
+ if (!actual)
+ actual = dentry;
+ else if (unlikely(!d_unhashed(actual)))
+ goto shouldnt_be_hashed;
+
+found_lock:
+ spin_lock(&actual->d_lock);
+found:
+ _d_rehash(actual);
+ spin_unlock(&actual->d_lock);
+ spin_unlock(&dcache_lock);
+
+ if (actual == dentry) {
+ security_d_instantiate(dentry, inode);
+ return NULL;
+ }
+
+ iput(inode);
+ return actual;
+
+ /* Convert the anonymous/root alias into an ordinary dentry */
+connect_mountpoint:
+ __d_materialise_dentry(dentry, alias);
+
+ /* Replace the candidate dentry with the alias in the tree */
+replace_with_alias:
+ __d_drop(alias);
+ actual = alias;
+ goto found;
+
+shouldnt_be_hashed:
+ spin_unlock(&dcache_lock);
+ BUG();
+ goto shouldnt_be_hashed;
+}
+
/**
* d_path - return the path of a dentry
* @dentry: dentry to report
@@ -1784,6 +1919,7 @@ EXPORT_SYMBOL(d_instantiate);
EXPORT_SYMBOL(d_invalidate);
EXPORT_SYMBOL(d_lookup);
EXPORT_SYMBOL(d_move);
+EXPORT_SYMBOL_GPL(d_materialise_unique);
EXPORT_SYMBOL(d_path);
EXPORT_SYMBOL(d_prune_aliases);
EXPORT_SYMBOL(d_rehash);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 39640fd03458..bf3901ab1744 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -32,8 +32,8 @@ static ssize_t default_write_file(struct file *file, const char __user *buf,
static int default_open(struct inode *inode, struct file *file)
{
- if (inode->u.generic_ip)
- file->private_data = inode->u.generic_ip;
+ if (inode->i_private)
+ file->private_data = inode->i_private;
return 0;
}
@@ -55,12 +55,11 @@ static u64 debugfs_u8_get(void *data)
DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
/**
- * debugfs_create_u8 - create a file in the debugfs filesystem that is used to read and write an unsigned 8 bit value.
- *
+ * debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value
* @name: a pointer to a string containing the name of the file to create.
* @mode: the permission that the file should have
* @parent: a pointer to the parent dentry for this file. This should be a
- * directory dentry if set. If this paramater is NULL, then the
+ * directory dentry if set. If this parameter is %NULL, then the
* file will be created in the root of the debugfs filesystem.
* @value: a pointer to the variable that the file should read to and write
* from.
@@ -72,11 +71,11 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n");
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, NULL will be returned.
+ * you are responsible here.) If an error occurs, %NULL will be returned.
*
- * If debugfs is not enabled in the kernel, the value -ENODEV will be
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
* returned. It is not wise to check for this value, but rather, check for
- * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
* code.
*/
struct dentry *debugfs_create_u8(const char *name, mode_t mode,
@@ -97,12 +96,11 @@ static u64 debugfs_u16_get(void *data)
DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
/**
- * debugfs_create_u16 - create a file in the debugfs filesystem that is used to read and write an unsigned 16 bit value.
- *
+ * debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value
* @name: a pointer to a string containing the name of the file to create.
* @mode: the permission that the file should have
* @parent: a pointer to the parent dentry for this file. This should be a
- * directory dentry if set. If this paramater is NULL, then the
+ * directory dentry if set. If this parameter is %NULL, then the
* file will be created in the root of the debugfs filesystem.
* @value: a pointer to the variable that the file should read to and write
* from.
@@ -114,11 +112,11 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n");
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, NULL will be returned.
+ * you are responsible here.) If an error occurs, %NULL will be returned.
*
- * If debugfs is not enabled in the kernel, the value -ENODEV will be
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
* returned. It is not wise to check for this value, but rather, check for
- * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
* code.
*/
struct dentry *debugfs_create_u16(const char *name, mode_t mode,
@@ -139,12 +137,11 @@ static u64 debugfs_u32_get(void *data)
DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
/**
- * debugfs_create_u32 - create a file in the debugfs filesystem that is used to read and write an unsigned 32 bit value.
- *
+ * debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value
* @name: a pointer to a string containing the name of the file to create.
* @mode: the permission that the file should have
* @parent: a pointer to the parent dentry for this file. This should be a
- * directory dentry if set. If this paramater is NULL, then the
+ * directory dentry if set. If this parameter is %NULL, then the
* file will be created in the root of the debugfs filesystem.
* @value: a pointer to the variable that the file should read to and write
* from.
@@ -156,11 +153,11 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n");
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, NULL will be returned.
+ * you are responsible here.) If an error occurs, %NULL will be returned.
*
- * If debugfs is not enabled in the kernel, the value -ENODEV will be
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
* returned. It is not wise to check for this value, but rather, check for
- * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
* code.
*/
struct dentry *debugfs_create_u32(const char *name, mode_t mode,
@@ -219,12 +216,11 @@ static const struct file_operations fops_bool = {
};
/**
- * debugfs_create_bool - create a file in the debugfs filesystem that is used to read and write a boolean value.
- *
+ * debugfs_create_bool - create a debugfs file that is used to read and write a boolean value
* @name: a pointer to a string containing the name of the file to create.
* @mode: the permission that the file should have
* @parent: a pointer to the parent dentry for this file. This should be a
- * directory dentry if set. If this paramater is NULL, then the
+ * directory dentry if set. If this parameter is %NULL, then the
* file will be created in the root of the debugfs filesystem.
* @value: a pointer to the variable that the file should read to and write
* from.
@@ -236,11 +232,11 @@ static const struct file_operations fops_bool = {
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, NULL will be returned.
+ * you are responsible here.) If an error occurs, %NULL will be returned.
*
- * If debugfs is not enabled in the kernel, the value -ENODEV will be
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
* returned. It is not wise to check for this value, but rather, check for
- * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
* code.
*/
struct dentry *debugfs_create_bool(const char *name, mode_t mode,
@@ -264,13 +260,11 @@ static struct file_operations fops_blob = {
};
/**
- * debugfs_create_blob - create a file in the debugfs filesystem that is
- * used to read and write a binary blob.
- *
+ * debugfs_create_blob - create a debugfs file that is used to read and write a binary blob
* @name: a pointer to a string containing the name of the file to create.
* @mode: the permission that the file should have
* @parent: a pointer to the parent dentry for this file. This should be a
- * directory dentry if set. If this paramater is NULL, then the
+ * directory dentry if set. If this parameter is %NULL, then the
* file will be created in the root of the debugfs filesystem.
* @blob: a pointer to a struct debugfs_blob_wrapper which contains a pointer
* to the blob data and the size of the data.
@@ -282,11 +276,11 @@ static struct file_operations fops_blob = {
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, NULL will be returned.
+ * you are responsible here.) If an error occurs, %NULL will be returned.
*
- * If debugfs is not enabled in the kernel, the value -ENODEV will be
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
* returned. It is not wise to check for this value, but rather, check for
- * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
* code.
*/
struct dentry *debugfs_create_blob(const char *name, mode_t mode,
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e8ae3042b806..269e649e6dc6 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -40,7 +40,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
inode->i_mode = mode;
inode->i_uid = 0;
inode->i_gid = 0;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
switch (mode & S_IFMT) {
@@ -162,14 +161,13 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
/**
* debugfs_create_file - create a file in the debugfs filesystem
- *
* @name: a pointer to a string containing the name of the file to create.
* @mode: the permission that the file should have
* @parent: a pointer to the parent dentry for this file. This should be a
* directory dentry if set. If this paramater is NULL, then the
* file will be created in the root of the debugfs filesystem.
* @data: a pointer to something that the caller will want to get to later
- * on. The inode.u.generic_ip pointer will point to this value on
+ * on. The inode.i_private pointer will point to this value on
* the open() call.
* @fops: a pointer to a struct file_operations that should be used for
* this file.
@@ -182,11 +180,11 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, NULL will be returned.
+ * you are responsible here.) If an error occurs, %NULL will be returned.
*
- * If debugfs is not enabled in the kernel, the value -ENODEV will be
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
* returned. It is not wise to check for this value, but rather, check for
- * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
* code.
*/
struct dentry *debugfs_create_file(const char *name, mode_t mode,
@@ -210,7 +208,7 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode,
if (dentry->d_inode) {
if (data)
- dentry->d_inode->u.generic_ip = data;
+ dentry->d_inode->i_private = data;
if (fops)
dentry->d_inode->i_fop = fops;
}
@@ -221,7 +219,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_file);
/**
* debugfs_create_dir - create a directory in the debugfs filesystem
- *
* @name: a pointer to a string containing the name of the directory to
* create.
* @parent: a pointer to the parent dentry for this file. This should be a
@@ -233,11 +230,11 @@ EXPORT_SYMBOL_GPL(debugfs_create_file);
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, NULL will be returned.
+ * you are responsible here.) If an error occurs, %NULL will be returned.
*
- * If debugfs is not enabled in the kernel, the value -ENODEV will be
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
* returned. It is not wise to check for this value, but rather, check for
- * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
* code.
*/
struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
@@ -250,7 +247,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir);
/**
* debugfs_remove - removes a file or directory from the debugfs filesystem
- *
* @dentry: a pointer to a the dentry of the file or directory to be
* removed.
*
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f7aef5bb584a..5f7b5a6025bf 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -113,7 +113,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
inode->i_ino = 1;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_blocks = 0;
- inode->i_blksize = 1024;
inode->i_uid = inode->i_gid = 0;
inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
inode->i_op = &simple_dir_inode_operations;
@@ -172,12 +171,11 @@ int devpts_pty_new(struct tty_struct *tty)
return -ENOMEM;
inode->i_ino = number+2;
- inode->i_blksize = 1024;
inode->i_uid = config.setuid ? config.uid : current->fsuid;
inode->i_gid = config.setgid ? config.gid : current->fsgid;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
init_special_inode(inode, S_IFCHR|config.mode, device);
- inode->u.generic_ip = tty;
+ inode->i_private = tty;
dentry = get_node(number);
if (!IS_ERR(dentry) && !dentry->d_inode)
@@ -196,7 +194,7 @@ struct tty_struct *devpts_get_tty(int number)
tty = NULL;
if (!IS_ERR(dentry)) {
if (dentry->d_inode)
- tty = dentry->d_inode->u.generic_ip;
+ tty = dentry->d_inode->i_private;
dput(dentry);
}
diff --git a/fs/dquot.c b/fs/dquot.c
index 0122a279106a..9af789567e51 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -834,6 +834,9 @@ static void print_warning(struct dquot *dquot, const char warntype)
if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags)))
return;
+ mutex_lock(&tty_mutex);
+ if (!current->signal->tty)
+ goto out_lock;
tty_write_message(current->signal->tty, dquot->dq_sb->s_id);
if (warntype == ISOFTWARN || warntype == BSOFTWARN)
tty_write_message(current->signal->tty, ": warning, ");
@@ -861,6 +864,8 @@ static void print_warning(struct dquot *dquot, const char warntype)
break;
}
tty_write_message(current->signal->tty, msg);
+out_lock:
+ mutex_unlock(&tty_mutex);
}
static inline void flush_warnings(struct dquot **dquots, char *warntype)
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 8ac2462ae5dd..b3f50651eb6b 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -90,8 +90,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(efs_inode_cachep))
- printk(KERN_INFO "efs_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(efs_inode_cachep);
}
static void efs_put_super(struct super_block *s)
@@ -248,11 +247,10 @@ static int efs_fill_super(struct super_block *s, void *d, int silent)
struct buffer_head *bh;
struct inode *root;
- sb = kmalloc(sizeof(struct efs_sb_info), GFP_KERNEL);
+ sb = kzalloc(sizeof(struct efs_sb_info), GFP_KERNEL);
if (!sb)
return -ENOMEM;
s->s_fs_info = sb;
- memset(sb, 0, sizeof(struct efs_sb_info));
s->s_magic = EFS_SUPER_MAGIC;
if (!sb_set_blocksize(s, EFS_BLOCKSIZE)) {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 3a3567433b92..8d544334bcd2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1590,7 +1590,6 @@ static struct inode *ep_eventpoll_inode(void)
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_blksize = PAGE_SIZE;
return inode;
eexit_1:
diff --git a/fs/exec.c b/fs/exec.c
index 54135df2a966..a8efe35176b0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -595,7 +595,7 @@ static int de_thread(struct task_struct *tsk)
if (!newsighand)
return -ENOMEM;
- if (thread_group_empty(current))
+ if (thread_group_empty(tsk))
goto no_thread_group;
/*
@@ -620,17 +620,17 @@ static int de_thread(struct task_struct *tsk)
* Reparenting needs write_lock on tasklist_lock,
* so it is safe to do it under read_lock.
*/
- if (unlikely(current->group_leader == child_reaper))
- child_reaper = current;
+ if (unlikely(tsk->group_leader == child_reaper))
+ child_reaper = tsk;
- zap_other_threads(current);
+ zap_other_threads(tsk);
read_unlock(&tasklist_lock);
/*
* Account for the thread group leader hanging around:
*/
count = 1;
- if (!thread_group_leader(current)) {
+ if (!thread_group_leader(tsk)) {
count = 2;
/*
* The SIGALRM timer survives the exec, but needs to point
@@ -639,14 +639,14 @@ static int de_thread(struct task_struct *tsk)
* synchronize with any firing (by calling del_timer_sync)
* before we can safely let the old group leader die.
*/
- sig->tsk = current;
+ sig->tsk = tsk;
spin_unlock_irq(lock);
if (hrtimer_cancel(&sig->real_timer))
hrtimer_restart(&sig->real_timer);
spin_lock_irq(lock);
}
while (atomic_read(&sig->count) > count) {
- sig->group_exit_task = current;
+ sig->group_exit_task = tsk;
sig->notify_count = count;
__set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock_irq(lock);
@@ -662,13 +662,13 @@ static int de_thread(struct task_struct *tsk)
* do is to wait for the thread group leader to become inactive,
* and to assume its PID:
*/
- if (!thread_group_leader(current)) {
+ if (!thread_group_leader(tsk)) {
/*
* Wait for the thread group leader to be a zombie.
* It should already be zombie at this point, most
* of the time.
*/
- leader = current->group_leader;
+ leader = tsk->group_leader;
while (leader->exit_state != EXIT_ZOMBIE)
yield();
@@ -682,12 +682,12 @@ static int de_thread(struct task_struct *tsk)
* When we take on its identity by switching to its PID, we
* also take its birthdate (always earlier than our own).
*/
- current->start_time = leader->start_time;
+ tsk->start_time = leader->start_time;
write_lock_irq(&tasklist_lock);
- BUG_ON(leader->tgid != current->tgid);
- BUG_ON(current->pid == current->tgid);
+ BUG_ON(leader->tgid != tsk->tgid);
+ BUG_ON(tsk->pid == tsk->tgid);
/*
* An exec() starts a new thread group with the
* TGID of the previous thread group. Rehash the
@@ -696,24 +696,21 @@ static int de_thread(struct task_struct *tsk)
*/
/* Become a process group leader with the old leader's pid.
- * Note: The old leader also uses thispid until release_task
+ * The old leader becomes a thread of the this thread group.
+ * Note: The old leader also uses this pid until release_task
* is called. Odd but simple and correct.
*/
- detach_pid(current, PIDTYPE_PID);
- current->pid = leader->pid;
- attach_pid(current, PIDTYPE_PID, current->pid);
- attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
- attach_pid(current, PIDTYPE_SID, current->signal->session);
- list_replace_rcu(&leader->tasks, &current->tasks);
+ detach_pid(tsk, PIDTYPE_PID);
+ tsk->pid = leader->pid;
+ attach_pid(tsk, PIDTYPE_PID, tsk->pid);
+ transfer_pid(leader, tsk, PIDTYPE_PGID);
+ transfer_pid(leader, tsk, PIDTYPE_SID);
+ list_replace_rcu(&leader->tasks, &tsk->tasks);
- current->group_leader = current;
- leader->group_leader = current;
+ tsk->group_leader = tsk;
+ leader->group_leader = tsk;
- /* Reduce leader to a thread */
- detach_pid(leader, PIDTYPE_PGID);
- detach_pid(leader, PIDTYPE_SID);
-
- current->exit_signal = SIGCHLD;
+ tsk->exit_signal = SIGCHLD;
BUG_ON(leader->exit_state != EXIT_ZOMBIE);
leader->exit_state = EXIT_DEAD;
@@ -753,7 +750,7 @@ no_thread_group:
spin_lock(&oldsighand->siglock);
spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
- rcu_assign_pointer(current->sighand, newsighand);
+ rcu_assign_pointer(tsk->sighand, newsighand);
recalc_sigpending();
spin_unlock(&newsighand->siglock);
@@ -764,7 +761,7 @@ no_thread_group:
kmem_cache_free(sighand_cachep, oldsighand);
}
- BUG_ON(!thread_group_leader(current));
+ BUG_ON(!thread_group_leader(tsk));
return 0;
}
@@ -901,8 +898,7 @@ int flush_old_exec(struct linux_binprm * bprm)
return 0;
mmap_failed:
- put_files_struct(current->files);
- current->files = files;
+ reset_files_struct(current, files);
out:
return retval;
}
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index da52b4a5db64..7c420b800c34 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -89,8 +89,8 @@ ext2_acl_to_disk(const struct posix_acl *acl, size_t *size)
size_t n;
*size = ext2_acl_size(acl->a_count);
- ext_acl = (ext2_acl_header *)kmalloc(sizeof(ext2_acl_header) +
- acl->a_count * sizeof(ext2_acl_entry), GFP_KERNEL);
+ ext_acl = kmalloc(sizeof(ext2_acl_header) + acl->a_count *
+ sizeof(ext2_acl_entry), GFP_KERNEL);
if (!ext_acl)
return ERR_PTR(-ENOMEM);
ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 695f69ccf908..2cb545bf0f3c 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -574,7 +574,6 @@ got:
inode->i_mode = mode;
inode->i_ino = ino;
- inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
memset(ei->i_data, 0, sizeof(ei->i_data));
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index fb4d3220eb8d..dd4e14c221e0 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1094,7 +1094,6 @@ void ext2_read_inode (struct inode * inode)
brelse (bh);
goto bad_inode;
}
- inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 4286ff6330b6..513cd421ac0b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -184,8 +184,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(ext2_inode_cachep))
- printk(KERN_INFO "ext2_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(ext2_inode_cachep);
}
static void ext2_clear_inode(struct inode *inode)
@@ -544,17 +543,24 @@ static int ext2_check_descriptors (struct super_block * sb)
int i;
int desc_block = 0;
struct ext2_sb_info *sbi = EXT2_SB(sb);
- unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block);
+ unsigned long first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
+ unsigned long last_block;
struct ext2_group_desc * gdp = NULL;
ext2_debug ("Checking group descriptors");
for (i = 0; i < sbi->s_groups_count; i++)
{
+ if (i == sbi->s_groups_count - 1)
+ last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
+ else
+ last_block = first_block +
+ (EXT2_BLOCKS_PER_GROUP(sb) - 1);
+
if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
gdp = (struct ext2_group_desc *) sbi->s_group_desc[desc_block++]->b_data;
- if (le32_to_cpu(gdp->bg_block_bitmap) < block ||
- le32_to_cpu(gdp->bg_block_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb))
+ if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
+ le32_to_cpu(gdp->bg_block_bitmap) > last_block)
{
ext2_error (sb, "ext2_check_descriptors",
"Block bitmap for group %d"
@@ -562,8 +568,8 @@ static int ext2_check_descriptors (struct super_block * sb)
i, (unsigned long) le32_to_cpu(gdp->bg_block_bitmap));
return 0;
}
- if (le32_to_cpu(gdp->bg_inode_bitmap) < block ||
- le32_to_cpu(gdp->bg_inode_bitmap) >= block + EXT2_BLOCKS_PER_GROUP(sb))
+ if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
+ le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
{
ext2_error (sb, "ext2_check_descriptors",
"Inode bitmap for group %d"
@@ -571,9 +577,9 @@ static int ext2_check_descriptors (struct super_block * sb)
i, (unsigned long) le32_to_cpu(gdp->bg_inode_bitmap));
return 0;
}
- if (le32_to_cpu(gdp->bg_inode_table) < block ||
- le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >=
- block + EXT2_BLOCKS_PER_GROUP(sb))
+ if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
+ le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
+ last_block)
{
ext2_error (sb, "ext2_check_descriptors",
"Inode table for group %d"
@@ -581,7 +587,7 @@ static int ext2_check_descriptors (struct super_block * sb)
i, (unsigned long) le32_to_cpu(gdp->bg_inode_table));
return 0;
}
- block += EXT2_BLOCKS_PER_GROUP(sb);
+ first_block += EXT2_BLOCKS_PER_GROUP(sb);
gdp++;
}
return 1;
@@ -648,11 +654,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
int i, j;
__le32 features;
- sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
- memset(sbi, 0, sizeof(*sbi));
/*
* See what the current blocksize for the device is, and
@@ -861,10 +866,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if (EXT2_BLOCKS_PER_GROUP(sb) == 0)
goto cantfind_ext2;
- sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) -
- le32_to_cpu(es->s_first_data_block) +
- EXT2_BLOCKS_PER_GROUP(sb) - 1) /
- EXT2_BLOCKS_PER_GROUP(sb);
+ sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
+ le32_to_cpu(es->s_first_data_block) - 1)
+ / EXT2_BLOCKS_PER_GROUP(sb)) + 1;
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
EXT2_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 86ae8e93adb9..af52a7f8b291 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -521,11 +521,10 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
}
} else {
/* Allocate a buffer where we construct the new block. */
- header = kmalloc(sb->s_blocksize, GFP_KERNEL);
+ header = kzalloc(sb->s_blocksize, GFP_KERNEL);
error = -ENOMEM;
if (header == NULL)
goto cleanup;
- memset(header, 0, sb->s_blocksize);
end = (char *)header + sb->s_blocksize;
header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC);
header->h_blocks = header->h_refcount = cpu_to_le32(1);
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 0d21d558b87a..1e5038d9a01b 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -90,8 +90,8 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
size_t n;
*size = ext3_acl_size(acl->a_count);
- ext_acl = (ext3_acl_header *)kmalloc(sizeof(ext3_acl_header) +
- acl->a_count * sizeof(ext3_acl_entry), GFP_KERNEL);
+ ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count *
+ sizeof(ext3_acl_entry), GFP_KERNEL);
if (!ext_acl)
return ERR_PTR(-ENOMEM);
ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
@@ -258,7 +258,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
default:
return -EINVAL;
}
- if (acl) {
+ if (acl) {
value = ext3_acl_to_disk(acl, &size);
if (IS_ERR(value))
return (int)PTR_ERR(value);
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 063d994bda0b..b41a7d7e20f0 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -38,6 +38,13 @@
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+/**
+ * ext3_get_group_desc() -- load group descriptor from disk
+ * @sb: super block
+ * @block_group: given block group
+ * @bh: pointer to the buffer head to store the block
+ * group descriptor
+ */
struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
unsigned int block_group,
struct buffer_head ** bh)
@@ -73,8 +80,12 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
return desc + offset;
}
-/*
- * Read the bitmap for a given block_group, reading into the specified
+/**
+ * read_block_bitmap()
+ * @sb: super block
+ * @block_group: given block group
+ *
+ * Read the bitmap for a given block_group, reading into the specified
* slot in the superblock's bitmap cache.
*
* Return buffer_head on success or NULL in case of failure.
@@ -103,15 +114,22 @@ error_out:
* Operations include:
* dump, find, add, remove, is_empty, find_next_reservable_window, etc.
*
- * We use sorted double linked list for the per-filesystem reservation
- * window list. (like in vm_region).
+ * We use a red-black tree to represent per-filesystem reservation
+ * windows.
+ *
+ */
+
+/**
+ * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
+ * @rb_root: root of per-filesystem reservation rb tree
+ * @verbose: verbose mode
+ * @fn: function which wishes to dump the reservation map
*
- * Initially, we keep those small operations in the abstract functions,
- * so later if we need a better searching tree than double linked-list,
- * we could easily switch to that without changing too much
- * code.
+ * If verbose is turned on, it will print the whole block reservation
+ * windows(start, end). Otherwise, it will only print out the "bad" windows,
+ * those windows that overlap with their immediate neighbors.
*/
-#if 0
+#if 1
static void __rsv_window_dump(struct rb_root *root, int verbose,
const char *fn)
{
@@ -129,7 +147,7 @@ restart:
rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node);
if (verbose)
printk("reservation window 0x%p "
- "start: %d, end: %d\n",
+ "start: %lu, end: %lu\n",
rsv, rsv->rsv_start, rsv->rsv_end);
if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
printk("Bad reservation %p (start >= end)\n",
@@ -161,6 +179,22 @@ restart:
#define rsv_window_dump(root, verbose) do {} while (0)
#endif
+/**
+ * goal_in_my_reservation()
+ * @rsv: inode's reservation window
+ * @grp_goal: given goal block relative to the allocation block group
+ * @group: the current allocation block group
+ * @sb: filesystem super block
+ *
+ * Test if the given goal block (group relative) is within the file's
+ * own block reservation window range.
+ *
+ * If the reservation window is outside the goal allocation group, return 0;
+ * grp_goal (given goal block) could be -1, which means no specific
+ * goal block. In this case, always return 1.
+ * If the goal block is within the reservation window, return 1;
+ * otherwise, return 0;
+ */
static int
goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
unsigned int group, struct super_block * sb)
@@ -168,7 +202,7 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
ext3_fsblk_t group_first_block, group_last_block;
group_first_block = ext3_group_first_block_no(sb, group);
- group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1;
+ group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
if ((rsv->_rsv_start > group_last_block) ||
(rsv->_rsv_end < group_first_block))
@@ -179,7 +213,11 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
return 1;
}
-/*
+/**
+ * search_reserve_window()
+ * @rb_root: root of reservation tree
+ * @goal: target allocation block
+ *
* Find the reserved window which includes the goal, or the previous one
* if the goal is not in any window.
* Returns NULL if there are no windows or if all windows start after the goal.
@@ -216,6 +254,13 @@ search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
return rsv;
}
+/**
+ * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree.
+ * @sb: super block
+ * @rsv: reservation window to add
+ *
+ * Must be called with rsv_lock hold.
+ */
void ext3_rsv_window_add(struct super_block *sb,
struct ext3_reserve_window_node *rsv)
{
@@ -236,14 +281,25 @@ void ext3_rsv_window_add(struct super_block *sb,
p = &(*p)->rb_left;
else if (start > this->rsv_end)
p = &(*p)->rb_right;
- else
+ else {
+ rsv_window_dump(root, 1);
BUG();
+ }
}
rb_link_node(node, parent, p);
rb_insert_color(node, root);
}
+/**
+ * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree
+ * @sb: super block
+ * @rsv: reservation window to remove
+ *
+ * Mark the block reservation window as not allocated, and unlink it
+ * from the filesystem reservation window rb tree. Must be called with
+ * rsv_lock hold.
+ */
static void rsv_window_remove(struct super_block *sb,
struct ext3_reserve_window_node *rsv)
{
@@ -253,11 +309,39 @@ static void rsv_window_remove(struct super_block *sb,
rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
}
+/*
+ * rsv_is_empty() -- Check if the reservation window is allocated.
+ * @rsv: given reservation window to check
+ *
+ * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED.
+ */
static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
{
/* a valid reservation end block could not be 0 */
- return (rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED);
+ return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
}
+
+/**
+ * ext3_init_block_alloc_info()
+ * @inode: file inode structure
+ *
+ * Allocate and initialize the reservation window structure, and
+ * link the window to the ext3 inode structure at last
+ *
+ * The reservation window structure is only dynamically allocated
+ * and linked to ext3 inode the first time the open file
+ * needs a new block. So, before every ext3_new_block(s) call, for
+ * regular files, we should check whether the reservation window
+ * structure exists or not. In the latter case, this function is called.
+ * Fail to do so will result in block reservation being turned off for that
+ * open file.
+ *
+ * This function is called from ext3_get_blocks_handle(), also called
+ * when setting the reservation window size through ioctl before the file
+ * is open for write (needs block allocation).
+ *
+ * Needs truncate_mutex protection prior to call this function.
+ */
void ext3_init_block_alloc_info(struct inode *inode)
{
struct ext3_inode_info *ei = EXT3_I(inode);
@@ -271,7 +355,7 @@ void ext3_init_block_alloc_info(struct inode *inode)
rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
- /*
+ /*
* if filesystem is mounted with NORESERVATION, the goal
* reservation window size is set to zero to indicate
* block reservation is off
@@ -287,6 +371,19 @@ void ext3_init_block_alloc_info(struct inode *inode)
ei->i_block_alloc_info = block_i;
}
+/**
+ * ext3_discard_reservation()
+ * @inode: inode
+ *
+ * Discard(free) block reservation window on last file close, or truncate
+ * or at last iput().
+ *
+ * It is being called in three cases:
+ * ext3_release_file(): last writer close the file
+ * ext3_clear_inode(): last iput(), when nobody link to this file.
+ * ext3_truncate(): when the block indirect map is about to change.
+ *
+ */
void ext3_discard_reservation(struct inode *inode)
{
struct ext3_inode_info *ei = EXT3_I(inode);
@@ -306,7 +403,14 @@ void ext3_discard_reservation(struct inode *inode)
}
}
-/* Free given blocks, update quota and i_blocks field */
+/**
+ * ext3_free_blocks_sb() -- Free given blocks and update quota
+ * @handle: handle to this transaction
+ * @sb: super block
+ * @block: start physcial block to free
+ * @count: number of blocks to free
+ * @pdquot_freed_blocks: pointer to quota
+ */
void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
ext3_fsblk_t block, unsigned long count,
unsigned long *pdquot_freed_blocks)
@@ -419,8 +523,8 @@ do_more:
}
/* @@@ This prevents newly-allocated data from being
* freed and then reallocated within the same
- * transaction.
- *
+ * transaction.
+ *
* Ideally we would want to allow that to happen, but to
* do so requires making journal_forget() capable of
* revoking the queued write of a data block, which
@@ -433,7 +537,7 @@ do_more:
* safe not to set the allocation bit in the committed
* bitmap, because we know that there is no outstanding
* activity on the buffer any more and so it is safe to
- * reallocate it.
+ * reallocate it.
*/
BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
J_ASSERT_BH(bitmap_bh,
@@ -490,7 +594,13 @@ error_return:
return;
}
-/* Free given blocks, update quota and i_blocks field */
+/**
+ * ext3_free_blocks() -- Free given blocks and update quota
+ * @handle: handle for this transaction
+ * @inode: inode
+ * @block: start physical block to free
+ * @count: number of blocks to count
+ */
void ext3_free_blocks(handle_t *handle, struct inode *inode,
ext3_fsblk_t block, unsigned long count)
{
@@ -508,7 +618,11 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
return;
}
-/*
+/**
+ * ext3_test_allocatable()
+ * @nr: given allocation block group
+ * @bh: bufferhead contains the bitmap of the given block group
+ *
* For ext3 allocations, we must not reuse any blocks which are
* allocated in the bitmap buffer's "last committed data" copy. This
* prevents deletes from freeing up the page for reuse until we have
@@ -518,7 +632,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
* data would allow the old block to be overwritten before the
* transaction committed (because we force data to disk before commit).
* This would lead to corruption if we crashed between overwriting the
- * data and committing the delete.
+ * data and committing the delete.
*
* @@@ We may want to make this allocation behaviour conditional on
* data-writes at some point, and disable it for metadata allocations or
@@ -541,6 +655,16 @@ static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
return ret;
}
+/**
+ * bitmap_search_next_usable_block()
+ * @start: the starting block (group relative) of the search
+ * @bh: bufferhead contains the block group bitmap
+ * @maxblocks: the ending block (group relative) of the reservation
+ *
+ * The bitmap search --- search forward alternately through the actual
+ * bitmap on disk and the last-committed copy in journal, until we find a
+ * bit free in both bitmaps.
+ */
static ext3_grpblk_t
bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
ext3_grpblk_t maxblocks)
@@ -548,11 +672,6 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
ext3_grpblk_t next;
struct journal_head *jh = bh2jh(bh);
- /*
- * The bitmap search --- search forward alternately through the actual
- * bitmap and the last-committed copy until we find a bit free in
- * both
- */
while (start < maxblocks) {
next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
if (next >= maxblocks)
@@ -562,14 +681,20 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
jbd_lock_bh_state(bh);
if (jh->b_committed_data)
start = ext3_find_next_zero_bit(jh->b_committed_data,
- maxblocks, next);
+ maxblocks, next);
jbd_unlock_bh_state(bh);
}
return -1;
}
-/*
- * Find an allocatable block in a bitmap. We honour both the bitmap and
+/**
+ * find_next_usable_block()
+ * @start: the starting block (group relative) to find next
+ * allocatable block in bitmap.
+ * @bh: bufferhead contains the block group bitmap
+ * @maxblocks: the ending block (group relative) for the search
+ *
+ * Find an allocatable block in a bitmap. We honor both the bitmap and
* its last-committed copy (if that exists), and perform the "most
* appropriate allocation" algorithm of looking for a free block near
* the initial goal; then for a free byte somewhere in the bitmap; then
@@ -584,7 +709,7 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
if (start > 0) {
/*
- * The goal was occupied; search forward for a free
+ * The goal was occupied; search forward for a free
* block within the next XX blocks.
*
* end_goal is more or less random, but it has to be
@@ -620,7 +745,11 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
return here;
}
-/*
+/**
+ * claim_block()
+ * @block: the free block (group relative) to allocate
+ * @bh: the bufferhead containts the block group bitmap
+ *
* We think we can allocate this block in this bitmap. Try to set the bit.
* If that succeeds then check that nobody has allocated and then freed the
* block since we saw that is was not marked in b_committed_data. If it _was_
@@ -646,7 +775,26 @@ claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
return ret;
}
-/*
+/**
+ * ext3_try_to_allocate()
+ * @sb: superblock
+ * @handle: handle to this transaction
+ * @group: given allocation block group
+ * @bitmap_bh: bufferhead holds the block bitmap
+ * @grp_goal: given target block within the group
+ * @count: target number of blocks to allocate
+ * @my_rsv: reservation window
+ *
+ * Attempt to allocate blocks within a give range. Set the range of allocation
+ * first, then find the first free bit(s) from the bitmap (within the range),
+ * and at last, allocate the blocks by claiming the found free bit as allocated.
+ *
+ * To set the range of this allocation:
+ * if there is a reservation window, only try to allocate block(s) from the
+ * file's own reservation window;
+ * Otherwise, the allocation range starts from the give goal block, ends at
+ * the block group's last block.
+ *
* If we failed to allocate the desired block then we may end up crossing to a
* new bitmap. In that case we must release write access to the old one via
* ext3_journal_release_buffer(), else we'll run out of credits.
@@ -703,7 +851,8 @@ repeat:
}
start = grp_goal;
- if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) {
+ if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group),
+ grp_goal, bitmap_bh)) {
/*
* The block was allocated by another thread, or it was
* allocated and then freed by another thread
@@ -718,7 +867,8 @@ repeat:
grp_goal++;
while (num < *count && grp_goal < end
&& ext3_test_allocatable(grp_goal, bitmap_bh)
- && claim_block(sb_bgl_lock(EXT3_SB(sb), group), grp_goal, bitmap_bh)) {
+ && claim_block(sb_bgl_lock(EXT3_SB(sb), group),
+ grp_goal, bitmap_bh)) {
num++;
grp_goal++;
}
@@ -730,12 +880,12 @@ fail_access:
}
/**
- * find_next_reservable_window():
+ * find_next_reservable_window():
* find a reservable space within the given range.
* It does not allocate the reservation window for now:
* alloc_new_reservation() will do the work later.
*
- * @search_head: the head of the searching list;
+ * @search_head: the head of the searching list;
* This is not necessarily the list head of the whole filesystem
*
* We have both head and start_block to assist the search
@@ -743,12 +893,12 @@ fail_access:
* but we will shift to the place where start_block is,
* then start from there, when looking for a reservable space.
*
- * @size: the target new reservation window size
+ * @size: the target new reservation window size
*
- * @group_first_block: the first block we consider to start
+ * @group_first_block: the first block we consider to start
* the real search from
*
- * @last_block:
+ * @last_block:
* the maximum block number that our goal reservable space
* could start from. This is normally the last block in this
* group. The search will end when we found the start of next
@@ -756,10 +906,10 @@ fail_access:
* This could handle the cross boundary reservation window
* request.
*
- * basically we search from the given range, rather than the whole
- * reservation double linked list, (start_block, last_block)
- * to find a free region that is of my size and has not
- * been reserved.
+ * basically we search from the given range, rather than the whole
+ * reservation double linked list, (start_block, last_block)
+ * to find a free region that is of my size and has not
+ * been reserved.
*
*/
static int find_next_reservable_window(
@@ -812,7 +962,7 @@ static int find_next_reservable_window(
/*
* Found a reserveable space big enough. We could
* have a reservation across the group boundary here
- */
+ */
break;
}
}
@@ -848,7 +998,7 @@ static int find_next_reservable_window(
}
/**
- * alloc_new_reservation()--allocate a new reservation window
+ * alloc_new_reservation()--allocate a new reservation window
*
* To make a new reservation, we search part of the filesystem
* reservation list (the list that inside the group). We try to
@@ -897,7 +1047,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
group_first_block = ext3_group_first_block_no(sb, group);
- group_end_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1;
+ group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
if (grp_goal < 0)
start_block = group_first_block;
@@ -929,9 +1079,10 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
if ((my_rsv->rsv_alloc_hit >
(my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
/*
- * if we previously allocation hit ration is greater than half
- * we double the size of reservation window next time
- * otherwise keep the same
+ * if the previously allocation hit ratio is
+ * greater than 1/2, then we double the size of
+ * the reservation window the next time,
+ * otherwise we keep the same size window
*/
size = size * 2;
if (size > EXT3_MAX_RESERVE_BLOCKS)
@@ -1010,6 +1161,23 @@ retry:
goto retry;
}
+/**
+ * try_to_extend_reservation()
+ * @my_rsv: given reservation window
+ * @sb: super block
+ * @size: the delta to extend
+ *
+ * Attempt to expand the reservation window large enough to have
+ * required number of free blocks
+ *
+ * Since ext3_try_to_allocate() will always allocate blocks within
+ * the reservation window range, if the window size is too small,
+ * multiple blocks allocation has to stop at the end of the reservation
+ * window. To make this more efficient, given the total number of
+ * blocks needed and the current size of the window, we try to
+ * expand the reservation window size if necessary on a best-effort
+ * basis before ext3_new_blocks() tries to allocate blocks,
+ */
static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
struct super_block *sb, int size)
{
@@ -1035,7 +1203,17 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
spin_unlock(rsv_lock);
}
-/*
+/**
+ * ext3_try_to_allocate_with_rsv()
+ * @sb: superblock
+ * @handle: handle to this transaction
+ * @group: given allocation block group
+ * @bitmap_bh: bufferhead holds the block bitmap
+ * @grp_goal: given target block within the group
+ * @count: target number of blocks to allocate
+ * @my_rsv: reservation window
+ * @errp: pointer to store the error code
+ *
* This is the main function used to allocate a new block and its reservation
* window.
*
@@ -1051,9 +1229,7 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
* reservation), and there are lots of free blocks, but they are all
* being reserved.
*
- * We use a sorted double linked list for the per-filesystem reservation list.
- * The insert, remove and find a free space(non-reserved) operations for the
- * sorted double linked list should be fast.
+ * We use a red-black tree for the per-filesystem reservation list.
*
*/
static ext3_grpblk_t
@@ -1063,7 +1239,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
struct ext3_reserve_window_node * my_rsv,
unsigned long *count, int *errp)
{
- ext3_fsblk_t group_first_block;
+ ext3_fsblk_t group_first_block, group_last_block;
ext3_grpblk_t ret = 0;
int fatal;
unsigned long num = *count;
@@ -1100,6 +1276,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
* first block is the block number of the first block in this group
*/
group_first_block = ext3_group_first_block_no(sb, group);
+ group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
/*
* Basically we will allocate a new block from inode's reservation
@@ -1118,7 +1295,8 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
*/
while (1) {
if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
- !goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb)) {
+ !goal_in_my_reservation(&my_rsv->rsv_window,
+ grp_goal, group, sb)) {
if (my_rsv->rsv_goal_size < *count)
my_rsv->rsv_goal_size = *count;
ret = alloc_new_reservation(my_rsv, grp_goal, sb,
@@ -1126,17 +1304,21 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
if (ret < 0)
break; /* failed */
- if (!goal_in_my_reservation(&my_rsv->rsv_window, grp_goal, group, sb))
+ if (!goal_in_my_reservation(&my_rsv->rsv_window,
+ grp_goal, group, sb))
grp_goal = -1;
- } else if (grp_goal > 0 && (my_rsv->rsv_end-grp_goal+1) < *count)
+ } else if (grp_goal > 0 &&
+ (my_rsv->rsv_end-grp_goal+1) < *count)
try_to_extend_reservation(my_rsv, sb,
*count-my_rsv->rsv_end + grp_goal - 1);
- if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
- || (my_rsv->rsv_end < group_first_block))
+ if ((my_rsv->rsv_start > group_last_block) ||
+ (my_rsv->rsv_end < group_first_block)) {
+ rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1);
BUG();
- ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, grp_goal,
- &num, &my_rsv->rsv_window);
+ }
+ ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+ grp_goal, &num, &my_rsv->rsv_window);
if (ret >= 0) {
my_rsv->rsv_alloc_hit += num;
*count = num;
@@ -1161,6 +1343,12 @@ out:
return ret;
}
+/**
+ * ext3_has_free_blocks()
+ * @sbi: in-core super block structure.
+ *
+ * Check if filesystem has at least 1 free block available for allocation.
+ */
static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
{
ext3_fsblk_t free_blocks, root_blocks;
@@ -1175,11 +1363,17 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
return 1;
}
-/*
+/**
+ * ext3_should_retry_alloc()
+ * @sb: super block
+ * @retries number of attemps has been made
+ *
* ext3_should_retry_alloc() is called when ENOSPC is returned, and if
* it is profitable to retry the operation, this function will wait
* for the current or commiting transaction to complete, and then
* return TRUE.
+ *
+ * if the total number of retries exceed three times, return FALSE.
*/
int ext3_should_retry_alloc(struct super_block *sb, int *retries)
{
@@ -1191,13 +1385,19 @@ int ext3_should_retry_alloc(struct super_block *sb, int *retries)
return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
}
-/*
- * ext3_new_block uses a goal block to assist allocation. If the goal is
- * free, or there is a free block within 32 blocks of the goal, that block
- * is allocated. Otherwise a forward search is made for a free block; within
- * each block group the search first looks for an entire free byte in the block
- * bitmap, and then for any free bit if that fails.
- * This function also updates quota and i_blocks field.
+/**
+ * ext3_new_blocks() -- core block(s) allocation function
+ * @handle: handle to this transaction
+ * @inode: file inode
+ * @goal: given target block(filesystem wide)
+ * @count: target number of blocks to allocate
+ * @errp: error code
+ *
+ * ext3_new_blocks uses a goal block to assist allocation. It tries to
+ * allocate block(s) from the block group contains the goal block first. If that
+ * fails, it will try to allocate block(s) from other block groups without
+ * any specific goal block.
+ *
*/
ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
ext3_fsblk_t goal, unsigned long *count, int *errp)
@@ -1303,7 +1503,7 @@ retry_alloc:
smp_rmb();
/*
- * Now search the rest of the groups. We assume that
+ * Now search the rest of the groups. We assume that
* i and gdp correctly point to the last group visited.
*/
for (bgi = 0; bgi < ngroups; bgi++) {
@@ -1428,7 +1628,7 @@ allocated:
spin_lock(sb_bgl_lock(sbi, group_no));
gdp->bg_free_blocks_count =
- cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - num);
+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
spin_unlock(sb_bgl_lock(sbi, group_no));
percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
@@ -1471,6 +1671,12 @@ ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
return ext3_new_blocks(handle, inode, goal, &count, errp);
}
+/**
+ * ext3_count_free_blocks() -- count filesystem free blocks
+ * @sb: superblock
+ *
+ * Adds up the number of free blocks from each block group.
+ */
ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
{
ext3_fsblk_t desc_count;
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index ce4f82b9e528..b9176eed98d1 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -20,7 +20,7 @@ unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
unsigned int i;
unsigned long sum = 0;
- if (!map)
+ if (!map)
return (0);
for (i = 0; i < numchars; i++)
sum += nibblemap[map->b_data[i] & 0xf] +
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index fbb0d4ed07d4..429acbb4e064 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -59,7 +59,7 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
return (ext3_filetype_table[filetype]);
}
-
+
int ext3_check_dir_entry (const char * function, struct inode * dir,
struct ext3_dir_entry_2 * de,
@@ -67,7 +67,7 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
unsigned long offset)
{
const char * error_msg = NULL;
- const int rlen = le16_to_cpu(de->rec_len);
+ const int rlen = le16_to_cpu(de->rec_len);
if (rlen < EXT3_DIR_REC_LEN(1))
error_msg = "rec_len is smaller than minimal";
@@ -162,7 +162,7 @@ revalidate:
* to make sure. */
if (filp->f_version != inode->i_version) {
for (i = 0; i < sb->s_blocksize && i < offset; ) {
- de = (struct ext3_dir_entry_2 *)
+ de = (struct ext3_dir_entry_2 *)
(bh->b_data + i);
/* It's too expensive to do a full
* dirent test each time round this
@@ -181,7 +181,7 @@ revalidate:
filp->f_version = inode->i_version;
}
- while (!error && filp->f_pos < inode->i_size
+ while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
@@ -229,7 +229,7 @@ out:
/*
* These functions convert from the major/minor hash to an f_pos
* value.
- *
+ *
* Currently we only use major hash numer. This is unfortunate, but
* on 32-bit machines, the same VFS interface is used for lseek and
* llseek, so if we use the 64 bit offset, then the 32-bit versions of
@@ -250,7 +250,7 @@ out:
struct fname {
__u32 hash;
__u32 minor_hash;
- struct rb_node rb_hash;
+ struct rb_node rb_hash;
struct fname *next;
__u32 inode;
__u8 name_len;
@@ -343,10 +343,9 @@ int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
/* Create and allocate the fname structure */
len = sizeof(struct fname) + dirent->name_len + 1;
- new_fn = kmalloc(len, GFP_KERNEL);
+ new_fn = kzalloc(len, GFP_KERNEL);
if (!new_fn)
return -ENOMEM;
- memset(new_fn, 0, len);
new_fn->hash = hash;
new_fn->minor_hash = minor_hash;
new_fn->inode = le32_to_cpu(dirent->inode);
@@ -410,7 +409,7 @@ static int call_filldir(struct file * filp, void * dirent,
curr_pos = hash2pos(fname->hash, fname->minor_hash);
while (fname) {
error = filldir(dirent, fname->name,
- fname->name_len, curr_pos,
+ fname->name_len, curr_pos,
fname->inode,
get_dtype(sb, fname->file_type));
if (error) {
@@ -465,7 +464,7 @@ static int ext3_dx_readdir(struct file * filp,
/*
* Fill the rbtree if we have no more entries,
* or the inode has changed since we last read in the
- * cached entries.
+ * cached entries.
*/
if ((!info->curr_node) ||
(filp->f_version != inode->i_version)) {
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 1efefb630ea9..994efd189f4e 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -100,7 +100,7 @@ ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
force_commit:
err = ext3_force_commit(inode->i_sb);
- if (err)
+ if (err)
return err;
return ret;
}
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 49382a208e05..dd1fd3c0fc05 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -8,14 +8,14 @@
* Universite Pierre et Marie Curie (Paris VI)
* from
* linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
- *
+ *
* ext3fs fsync primitive
*
* Big-endian to little-endian byte-swapping/bitmaps by
* David S. Miller (davem@caip.rutgers.edu), 1995
- *
+ *
* Removed unnecessary code duplication for little endian machines
- * and excessive __inline__s.
+ * and excessive __inline__s.
* Andi Kleen, 1997
*
* Major simplications and cleanup - we only need to do the metadata, because
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index 5a2d1235ead0..deeb27b5ba83 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -4,7 +4,7 @@
* Copyright (C) 2002 by Theodore Ts'o
*
* This file is released under the GPL v2.
- *
+ *
* This file may be redistributed under the terms of the GNU Public
* License.
*/
@@ -80,11 +80,11 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
* Returns the hash of a filename. If len is 0 and name is NULL, then
* this function can be used to test whether or not a hash version is
* supported.
- *
+ *
* The seed is an 4 longword (32 bits) "secret" which can be used to
* uniquify a hash. If the seed is all zero's, then some default seed
* may be used.
- *
+ *
* A particular hash version specifies whether or not the seed is
* represented, and whether or not the returned hash is 32 bits or 64
* bits. 32 bit hashes will return 0 for the minor hash.
@@ -95,7 +95,7 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
__u32 minor_hash = 0;
const char *p;
int i;
- __u32 in[8], buf[4];
+ __u32 in[8], buf[4];
/* Initialize the default seed for the hash checksum functions */
buf[0] = 0x67452301;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 36546ed36a14..e45dbd651736 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -202,7 +202,7 @@ error_return:
static int find_group_dir(struct super_block *sb, struct inode *parent)
{
int ngroups = EXT3_SB(sb)->s_groups_count;
- int freei, avefreei;
+ unsigned int freei, avefreei;
struct ext3_group_desc *desc, *best_desc = NULL;
struct buffer_head *bh;
int group, best_group = -1;
@@ -216,7 +216,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
continue;
if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
continue;
- if (!best_desc ||
+ if (!best_desc ||
(le16_to_cpu(desc->bg_free_blocks_count) >
le16_to_cpu(best_desc->bg_free_blocks_count))) {
best_group = group;
@@ -226,30 +226,30 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
return best_group;
}
-/*
- * Orlov's allocator for directories.
- *
+/*
+ * Orlov's allocator for directories.
+ *
* We always try to spread first-level directories.
*
- * If there are blockgroups with both free inodes and free blocks counts
- * not worse than average we return one with smallest directory count.
- * Otherwise we simply return a random group.
- *
- * For the rest rules look so:
- *
- * It's OK to put directory into a group unless
- * it has too many directories already (max_dirs) or
- * it has too few free inodes left (min_inodes) or
- * it has too few free blocks left (min_blocks) or
- * it's already running too large debt (max_debt).
- * Parent's group is prefered, if it doesn't satisfy these
- * conditions we search cyclically through the rest. If none
- * of the groups look good we just look for a group with more
- * free inodes than average (starting at parent's group).
- *
- * Debt is incremented each time we allocate a directory and decremented
- * when we allocate an inode, within 0--255.
- */
+ * If there are blockgroups with both free inodes and free blocks counts
+ * not worse than average we return one with smallest directory count.
+ * Otherwise we simply return a random group.
+ *
+ * For the rest rules look so:
+ *
+ * It's OK to put directory into a group unless
+ * it has too many directories already (max_dirs) or
+ * it has too few free inodes left (min_inodes) or
+ * it has too few free blocks left (min_blocks) or
+ * it's already running too large debt (max_debt).
+ * Parent's group is prefered, if it doesn't satisfy these
+ * conditions we search cyclically through the rest. If none
+ * of the groups look good we just look for a group with more
+ * free inodes than average (starting at parent's group).
+ *
+ * Debt is incremented each time we allocate a directory and decremented
+ * when we allocate an inode, within 0--255.
+ */
#define INODE_COST 64
#define BLOCK_COST 256
@@ -261,10 +261,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
struct ext3_super_block *es = sbi->s_es;
int ngroups = sbi->s_groups_count;
int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
- int freei, avefreei;
+ unsigned int freei, avefreei;
ext3_fsblk_t freeb, avefreeb;
ext3_fsblk_t blocks_per_dir;
- int ndirs;
+ unsigned int ndirs;
int max_debt, max_dirs, min_inodes;
ext3_grpblk_t min_blocks;
int group = -1, i;
@@ -454,7 +454,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
group = find_group_dir(sb, dir);
else
group = find_group_orlov(sb, dir);
- } else
+ } else
group = find_group_other(sb, dir);
err = -ENOSPC;
@@ -559,7 +559,6 @@ got:
inode->i_ino = ino;
/* This is the optimal IO size (for stat), not the fs block size */
- inode->i_blksize = PAGE_SIZE;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 84be02e93652..dcf4f1dd108b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -13,11 +13,11 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*
* Goal-directed block allocation by Stephen Tweedie
- * (sct@redhat.com), 1993, 1998
+ * (sct@redhat.com), 1993, 1998
* Big-endian to little-endian byte-swapping/bitmaps by
* David S. Miller (davem@caip.rutgers.edu), 1995
* 64-bit file support on 64-bit platforms by Jakub Jelinek
- * (jj@sunsite.ms.mff.cuni.cz)
+ * (jj@sunsite.ms.mff.cuni.cz)
*
* Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
*/
@@ -55,7 +55,7 @@ static int ext3_inode_is_fast_symlink(struct inode *inode)
/*
* The ext3 forget function must perform a revoke if we are freeing data
* which has been journaled. Metadata (eg. indirect blocks) must be
- * revoked in all cases.
+ * revoked in all cases.
*
* "bh" may be NULL: a metadata block may have been freed from memory
* but there may still be a record of it in the journal, and that record
@@ -105,7 +105,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
* Work out how many blocks we need to proceed with the next chunk of a
* truncate transaction.
*/
-static unsigned long blocks_for_truncate(struct inode *inode)
+static unsigned long blocks_for_truncate(struct inode *inode)
{
unsigned long needed;
@@ -122,13 +122,13 @@ static unsigned long blocks_for_truncate(struct inode *inode)
/* But we need to bound the transaction so we don't overflow the
* journal. */
- if (needed > EXT3_MAX_TRANS_DATA)
+ if (needed > EXT3_MAX_TRANS_DATA)
needed = EXT3_MAX_TRANS_DATA;
return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
}
-/*
+/*
* Truncate transactions can be complex and absolutely huge. So we need to
* be able to restart the transaction at a conventient checkpoint to make
* sure we don't overflow the journal.
@@ -136,9 +136,9 @@ static unsigned long blocks_for_truncate(struct inode *inode)
* start_transaction gets us a new handle for a truncate transaction,
* and extend_transaction tries to extend the existing one a bit. If
* extend fails, we need to propagate the failure up and restart the
- * transaction in the top-level truncate loop. --sct
+ * transaction in the top-level truncate loop. --sct
*/
-static handle_t *start_transaction(struct inode *inode)
+static handle_t *start_transaction(struct inode *inode)
{
handle_t *result;
@@ -215,12 +215,12 @@ void ext3_delete_inode (struct inode * inode)
ext3_orphan_del(handle, inode);
EXT3_I(inode)->i_dtime = get_seconds();
- /*
+ /*
* One subtle ordering requirement: if anything has gone wrong
* (transaction abort, IO errors, whatever), then we can still
* do these next steps (the fs will already have been marked as
* having errors), but we can't free the inode if the mark_dirty
- * fails.
+ * fails.
*/
if (ext3_mark_inode_dirty(handle, inode))
/* If that failed, just do the required in-core inode clear. */
@@ -398,7 +398,7 @@ no_block:
* + if there is a block to the left of our position - allocate near it.
* + if pointer will live in indirect block - allocate near that block.
* + if pointer will live in inode - allocate in the same
- * cylinder group.
+ * cylinder group.
*
* In the latter case we colour the starting block by the callers PID to
* prevent it from clashing with concurrent allocations for a different inode
@@ -470,7 +470,7 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
* ext3_blks_to_allocate: Look up the block map and count the number
* of direct blocks need to be allocated for the given branch.
*
- * @branch: chain of indirect blocks
+ * @branch: chain of indirect blocks
* @k: number of blocks need for indirect blocks
* @blks: number of data blocks to be mapped.
* @blocks_to_boundary: the offset in the indirect block
@@ -744,7 +744,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
jbd_debug(5, "splicing indirect only\n");
BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, where->bh);
- if (err)
+ if (err)
goto err_out;
} else {
/*
@@ -1098,7 +1098,7 @@ static int walk_page_buffers( handle_t *handle,
for ( bh = head, block_start = 0;
ret == 0 && (bh != head || !block_start);
- block_start = block_end, bh = next)
+ block_start = block_end, bh = next)
{
next = bh->b_this_page;
block_end = block_start + blocksize;
@@ -1137,7 +1137,7 @@ static int walk_page_buffers( handle_t *handle,
* So what we do is to rely on the fact that journal_stop/journal_start
* will _not_ run commit under these circumstances because handle->h_ref
* is elevated. We'll still have enough credits for the tiny quotafile
- * write.
+ * write.
*/
static int do_journal_get_write_access(handle_t *handle,
struct buffer_head *bh)
@@ -1282,7 +1282,7 @@ static int ext3_journalled_commit_write(struct file *file,
if (inode->i_size > EXT3_I(inode)->i_disksize) {
EXT3_I(inode)->i_disksize = inode->i_size;
ret2 = ext3_mark_inode_dirty(handle, inode);
- if (!ret)
+ if (!ret)
ret = ret2;
}
ret2 = ext3_journal_stop(handle);
@@ -1291,7 +1291,7 @@ static int ext3_journalled_commit_write(struct file *file,
return ret;
}
-/*
+/*
* bmap() is special. It gets used by applications such as lilo and by
* the swapper to find the on-disk block of a specific piece of data.
*
@@ -1300,10 +1300,10 @@ static int ext3_journalled_commit_write(struct file *file,
* filesystem and enables swap, then they may get a nasty shock when the
* data getting swapped to that swapfile suddenly gets overwritten by
* the original zero's written out previously to the journal and
- * awaiting writeback in the kernel's buffer cache.
+ * awaiting writeback in the kernel's buffer cache.
*
* So, if we see any bmap calls here on a modified, data-journaled file,
- * take extra steps to flush any blocks which might be in the cache.
+ * take extra steps to flush any blocks which might be in the cache.
*/
static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
{
@@ -1312,16 +1312,16 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
int err;
if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
- /*
+ /*
* This is a REALLY heavyweight approach, but the use of
* bmap on dirty files is expected to be extremely rare:
* only if we run lilo or swapon on a freshly made file
- * do we expect this to happen.
+ * do we expect this to happen.
*
* (bmap requires CAP_SYS_RAWIO so this does not
* represent an unprivileged user DOS attack --- we'd be
* in trouble if mortal users could trigger this path at
- * will.)
+ * will.)
*
* NB. EXT3_STATE_JDATA is not set on files other than
* regular files. If somebody wants to bmap a directory
@@ -1457,7 +1457,7 @@ static int ext3_ordered_writepage(struct page *page,
*/
/*
- * And attach them to the current transaction. But only if
+ * And attach them to the current transaction. But only if
* block_write_full_page() succeeded. Otherwise they are unmapped,
* and generally junk.
*/
@@ -1644,7 +1644,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
}
}
- ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+ ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs,
ext3_get_block, NULL);
@@ -2025,7 +2025,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
__le32 *first, __le32 *last)
{
ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */
- unsigned long count = 0; /* Number of blocks in the run */
+ unsigned long count = 0; /* Number of blocks in the run */
__le32 *block_to_free_p = NULL; /* Pointer into inode/ind
corresponding to
block_to_free */
@@ -2054,7 +2054,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
} else if (nr == block_to_free + count) {
count++;
} else {
- ext3_clear_blocks(handle, inode, this_bh,
+ ext3_clear_blocks(handle, inode, this_bh,
block_to_free,
count, block_to_free_p, p);
block_to_free = nr;
@@ -2115,7 +2115,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
*/
if (!bh) {
ext3_error(inode->i_sb, "ext3_free_branches",
- "Read failure, inode=%ld, block="E3FSBLK,
+ "Read failure, inode=%lu, block="E3FSBLK,
inode->i_ino, nr);
continue;
}
@@ -2184,7 +2184,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
*p = 0;
BUFFER_TRACE(parent_bh,
"call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle,
+ ext3_journal_dirty_metadata(handle,
parent_bh);
}
}
@@ -2632,9 +2632,6 @@ void ext3_read_inode(struct inode * inode)
* recovery code: that's fine, we're about to complete
* the process of deleting those. */
}
- inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size
- * (for stat), not the fs block
- * size */
inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
#ifdef EXT3_FRAGMENTS
@@ -2704,7 +2701,7 @@ void ext3_read_inode(struct inode * inode)
if (raw_inode->i_block[0])
init_special_inode(inode, inode->i_mode,
old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
- else
+ else
init_special_inode(inode, inode->i_mode,
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
}
@@ -2724,8 +2721,8 @@ bad_inode:
*
* The caller must have write access to iloc->bh.
*/
-static int ext3_do_update_inode(handle_t *handle,
- struct inode *inode,
+static int ext3_do_update_inode(handle_t *handle,
+ struct inode *inode,
struct ext3_iloc *iloc)
{
struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
@@ -2900,7 +2897,7 @@ int ext3_write_inode(struct inode *inode, int wait)
* commit will leave the blocks being flushed in an unused state on
* disk. (On recovery, the inode will get truncated and the blocks will
* be freed, so we have a strong guarantee that no future commit will
- * leave these blocks visible to the user.)
+ * leave these blocks visible to the user.)
*
* Called with inode->sem down.
*/
@@ -3043,13 +3040,13 @@ int ext3_mark_iloc_dirty(handle_t *handle,
return err;
}
-/*
+/*
* On success, We end up with an outstanding reference count against
- * iloc->bh. This _must_ be cleaned up later.
+ * iloc->bh. This _must_ be cleaned up later.
*/
int
-ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
+ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
struct ext3_iloc *iloc)
{
int err = 0;
@@ -3139,7 +3136,7 @@ out:
}
#if 0
-/*
+/*
* Bind an inode's backing buffer_head into this transaction, to prevent
* it from being flushed to disk early. Unlike
* ext3_reserve_inode_write, this leaves behind no bh reference and
@@ -3157,7 +3154,7 @@ static int ext3_pin_inode(handle_t *handle, struct inode *inode)
BUFFER_TRACE(iloc.bh, "get_write_access");
err = journal_get_write_access(handle, iloc.bh);
if (!err)
- err = ext3_journal_dirty_metadata(handle,
+ err = ext3_journal_dirty_metadata(handle,
iloc.bh);
brelse(iloc.bh);
}
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 2aa7101b27cd..85d132c37ee0 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -15,13 +15,13 @@
* Big-endian to little-endian byte-swapping/bitmaps by
* David S. Miller (davem@caip.rutgers.edu), 1995
* Directory entry file type support and forward compatibility hooks
- * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
+ * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
* Hash Tree Directory indexing (c)
- * Daniel Phillips, 2001
+ * Daniel Phillips, 2001
* Hash Tree Directory indexing porting
- * Christopher Li, 2002
+ * Christopher Li, 2002
* Hash Tree Directory indexing cleanup
- * Theodore Ts'o, 2002
+ * Theodore Ts'o, 2002
*/
#include <linux/fs.h>
@@ -76,7 +76,7 @@ static struct buffer_head *ext3_append(handle_t *handle,
#ifdef DX_DEBUG
#define dxtrace(command) command
#else
-#define dxtrace(command)
+#define dxtrace(command)
#endif
struct fake_dirent
@@ -169,7 +169,7 @@ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
static int ext3_htree_next_block(struct inode *dir, __u32 hash,
struct dx_frame *frame,
- struct dx_frame *frames,
+ struct dx_frame *frames,
__u32 *start_hash);
static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
struct ext3_dir_entry_2 **res_dir, int *err);
@@ -250,7 +250,7 @@ static void dx_show_index (char * label, struct dx_entry *entries)
}
struct stats
-{
+{
unsigned names;
unsigned space;
unsigned bcount;
@@ -278,7 +278,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
((char *) de - base));
}
space += EXT3_DIR_REC_LEN(de->name_len);
- names++;
+ names++;
}
de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
}
@@ -464,7 +464,7 @@ static void dx_release (struct dx_frame *frames)
*/
static int ext3_htree_next_block(struct inode *dir, __u32 hash,
struct dx_frame *frame,
- struct dx_frame *frames,
+ struct dx_frame *frames,
__u32 *start_hash)
{
struct dx_frame *p;
@@ -632,7 +632,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
}
count += ret;
hashval = ~0;
- ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
+ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
frame, frames, &hashval);
*next_hash = hashval;
if (ret < 0) {
@@ -649,7 +649,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
break;
}
dx_release(frames);
- dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+ dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
count, *next_hash));
return count;
errout:
@@ -1050,7 +1050,7 @@ struct dentry *ext3_get_parent(struct dentry *child)
parent = ERR_PTR(-ENOMEM);
}
return parent;
-}
+}
#define S_SHIFT 12
static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -1198,7 +1198,7 @@ errout:
* add_dirent_to_buf will attempt search the directory block for
* space. It will return -ENOSPC if no space is available, and -EIO
* and -EEXIST if directory entry already exists.
- *
+ *
* NOTE! bh is NOT released in the case where ENOSPC is returned. In
* all other cases bh is released.
*/
@@ -1572,7 +1572,7 @@ cleanup:
* ext3_delete_entry deletes a directory entry by merging it with the
* previous entry
*/
-static int ext3_delete_entry (handle_t *handle,
+static int ext3_delete_entry (handle_t *handle,
struct inode * dir,
struct ext3_dir_entry_2 * de_del,
struct buffer_head * bh)
@@ -1643,12 +1643,12 @@ static int ext3_add_nondir(handle_t *handle,
* is so far negative - it has no inode.
*
* If the create succeeds, we fill in the inode information
- * with d_instantiate().
+ * with d_instantiate().
*/
static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
struct nameidata *nd)
{
- handle_t *handle;
+ handle_t *handle;
struct inode * inode;
int err, retries = 0;
@@ -1688,7 +1688,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -1813,10 +1813,10 @@ static int empty_dir (struct inode * inode)
de1 = (struct ext3_dir_entry_2 *)
((char *) de + le16_to_cpu(de->rec_len));
if (le32_to_cpu(de->inode) != inode->i_ino ||
- !le32_to_cpu(de1->inode) ||
+ !le32_to_cpu(de1->inode) ||
strcmp (".", de->name) ||
strcmp ("..", de1->name)) {
- ext3_warning (inode->i_sb, "empty_dir",
+ ext3_warning (inode->i_sb, "empty_dir",
"bad directory (dir #%lu) - no `.' or `..'",
inode->i_ino);
brelse (bh);
@@ -1883,7 +1883,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
* being truncated, or files being unlinked. */
/* @@@ FIXME: Observation from aviro:
- * I think I can trigger J_ASSERT in ext3_orphan_add(). We block
+ * I think I can trigger J_ASSERT in ext3_orphan_add(). We block
* here (on lock_super()), so race with ext3_link() which might bump
* ->i_nlink. For, say it, character device. Not a regular file,
* not a directory, not a symlink and ->i_nlink > 0.
@@ -1919,8 +1919,8 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
if (!err)
list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
- jbd_debug(4, "superblock will point to %ld\n", inode->i_ino);
- jbd_debug(4, "orphan inode %ld will point to %d\n",
+ jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
+ jbd_debug(4, "orphan inode %lu will point to %d\n",
inode->i_ino, NEXT_ORPHAN(inode));
out_unlock:
unlock_super(sb);
@@ -2129,7 +2129,7 @@ static int ext3_symlink (struct inode * dir,
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -2227,7 +2227,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
DQUOT_INIT(new_dentry->d_inode);
handle = ext3_journal_start(old_dir, 2 *
EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
- EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -2393,4 +2393,4 @@ struct inode_operations ext3_special_inode_operations = {
.removexattr = generic_removexattr,
#endif
.permission = ext3_permission,
-};
+};
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 5e1337fd878a..b73cba12f79c 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -336,7 +336,7 @@ static int verify_reserved_gdb(struct super_block *sb,
unsigned five = 5;
unsigned seven = 7;
unsigned grp;
- __u32 *p = (__u32 *)primary->b_data;
+ __le32 *p = (__le32 *)primary->b_data;
int gdbackups = 0;
while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
@@ -380,7 +380,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
struct buffer_head *dind;
int gdbackups;
struct ext3_iloc iloc;
- __u32 *data;
+ __le32 *data;
int err;
if (test_opt(sb, DEBUG))
@@ -417,7 +417,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
goto exit_bh;
}
- data = (__u32 *)dind->b_data;
+ data = (__le32 *)dind->b_data;
if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
ext3_warning(sb, __FUNCTION__,
"new group %u GDT block "E3FSBLK" not reserved",
@@ -439,8 +439,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
goto exit_dindj;
- n_group_desc = (struct buffer_head **)kmalloc((gdb_num + 1) *
- sizeof(struct buffer_head *), GFP_KERNEL);
+ n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
+ GFP_KERNEL);
if (!n_group_desc) {
err = -ENOMEM;
ext3_warning (sb, __FUNCTION__,
@@ -519,7 +519,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
struct buffer_head *dind;
struct ext3_iloc iloc;
ext3_fsblk_t blk;
- __u32 *data, *end;
+ __le32 *data, *end;
int gdbackups = 0;
int res, i;
int err;
@@ -536,8 +536,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
}
blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
- data = (__u32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
- end = (__u32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
+ data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
+ end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
/* Get each reserved primary GDT block and verify it holds backups */
for (res = 0; res < reserved_gdb; res++, blk++) {
@@ -545,7 +545,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
ext3_warning(sb, __FUNCTION__,
"reserved block "E3FSBLK
" not at offset %ld",
- blk, (long)(data - (__u32 *)dind->b_data));
+ blk,
+ (long)(data - (__le32 *)dind->b_data));
err = -EINVAL;
goto exit_bh;
}
@@ -560,7 +561,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
goto exit_bh;
}
if (++data >= end)
- data = (__u32 *)dind->b_data;
+ data = (__le32 *)dind->b_data;
}
for (i = 0; i < reserved_gdb; i++) {
@@ -584,7 +585,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
for (i = 0; i < reserved_gdb; i++) {
int err2;
- data = (__u32 *)primary[i]->b_data;
+ data = (__le32 *)primary[i]->b_data;
/* printk("reserving backup %lu[%u] = %lu\n",
primary[i]->b_blocknr, gdbackups,
blk + primary[i]->b_blocknr); */
@@ -689,7 +690,7 @@ exit_err:
"can't update backup for group %d (err %d), "
"forcing fsck on next reboot", group, err);
sbi->s_mount_state &= ~EXT3_VALID_FS;
- sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS);
+ sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
mark_buffer_dirty(sbi->s_sbh);
}
}
@@ -730,6 +731,18 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
return -EPERM;
}
+ if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
+ le32_to_cpu(es->s_blocks_count)) {
+ ext3_warning(sb, __FUNCTION__, "blocks_count overflow\n");
+ return -EINVAL;
+ }
+
+ if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
+ le32_to_cpu(es->s_inodes_count)) {
+ ext3_warning(sb, __FUNCTION__, "inodes_count overflow\n");
+ return -EINVAL;
+ }
+
if (reserved_gdb || gdb_off == 0) {
if (!EXT3_HAS_COMPAT_FEATURE(sb,
EXT3_FEATURE_COMPAT_RESIZE_INODE)){
@@ -958,6 +971,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
add = EXT3_BLOCKS_PER_GROUP(sb) - last;
+ if (o_blocks_count + add < o_blocks_count) {
+ ext3_warning(sb, __FUNCTION__, "blocks_count overflow");
+ return -EINVAL;
+ }
+
if (o_blocks_count + add > n_blocks_count)
add = n_blocks_count - o_blocks_count;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 3559086eee5f..8bfd56ef18ca 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -45,7 +45,7 @@
static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
unsigned long journal_devnum);
static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
- int);
+ unsigned int);
static void ext3_commit_super (struct super_block * sb,
struct ext3_super_block * es,
int sync);
@@ -62,13 +62,13 @@ static void ext3_unlockfs(struct super_block *sb);
static void ext3_write_super (struct super_block * sb);
static void ext3_write_super_lockfs(struct super_block *sb);
-/*
+/*
* Wrappers for journal_start/end.
*
* The only special thing we need to do here is to make sure that all
* journal_end calls result in the superblock being marked dirty, so
* that sync() will call the filesystem's write_super callback if
- * appropriate.
+ * appropriate.
*/
handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
{
@@ -90,11 +90,11 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
return journal_start(journal, nblocks);
}
-/*
+/*
* The only special thing we need to do here is to make sure that all
* journal_stop calls result in the superblock being marked dirty, so
* that sync() will call the filesystem's write_super callback if
- * appropriate.
+ * appropriate.
*/
int __ext3_journal_stop(const char *where, handle_t *handle)
{
@@ -159,20 +159,21 @@ static void ext3_handle_error(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return;
- if (test_opt (sb, ERRORS_RO)) {
- printk (KERN_CRIT "Remounting filesystem read-only\n");
- sb->s_flags |= MS_RDONLY;
- } else {
+ if (!test_opt (sb, ERRORS_CONT)) {
journal_t *journal = EXT3_SB(sb)->s_journal;
EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
if (journal)
journal_abort(journal, -EIO);
}
+ if (test_opt (sb, ERRORS_RO)) {
+ printk (KERN_CRIT "Remounting filesystem read-only\n");
+ sb->s_flags |= MS_RDONLY;
+ }
+ ext3_commit_super(sb, es, 1);
if (test_opt(sb, ERRORS_PANIC))
panic("EXT3-fs (device %s): panic forced after error\n",
sb->s_id);
- ext3_commit_super(sb, es, 1);
}
void ext3_error (struct super_block * sb, const char * function,
@@ -369,16 +370,16 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
{
struct list_head *l;
- printk(KERN_ERR "sb orphan head is %d\n",
+ printk(KERN_ERR "sb orphan head is %d\n",
le32_to_cpu(sbi->s_es->s_last_orphan));
printk(KERN_ERR "sb_info orphan list:\n");
list_for_each(l, &sbi->s_orphan) {
struct inode *inode = orphan_list_entry(l);
printk(KERN_ERR " "
- "inode %s:%ld at %p: mode %o, nlink %d, next %d\n",
+ "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
inode->i_sb->s_id, inode->i_ino, inode,
- inode->i_mode, inode->i_nlink,
+ inode->i_mode, inode->i_nlink,
NEXT_ORPHAN(inode));
}
}
@@ -475,7 +476,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
inode_init_once(&ei->vfs_inode);
}
}
-
+
static int init_inodecache(void)
{
ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
@@ -490,8 +491,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(ext3_inode_cachep))
- printk(KERN_INFO "ext3_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(ext3_inode_cachep);
}
static void ext3_clear_inode(struct inode *inode)
@@ -733,8 +733,8 @@ static match_table_t tokens = {
static ext3_fsblk_t get_sb_block(void **data)
{
- ext3_fsblk_t sb_block;
- char *options = (char *) *data;
+ ext3_fsblk_t sb_block;
+ char *options = (char *) *data;
if (!options || strncmp(options, "sb=", 3) != 0)
return 1; /* Default location */
@@ -753,7 +753,7 @@ static ext3_fsblk_t get_sb_block(void **data)
}
static int parse_options (char *options, struct super_block *sb,
- unsigned long *inum, unsigned long *journal_devnum,
+ unsigned int *inum, unsigned long *journal_devnum,
ext3_fsblk_t *n_blocks_count, int is_remount)
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
@@ -1174,7 +1174,8 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
static int ext3_check_descriptors (struct super_block * sb)
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
- ext3_fsblk_t block = le32_to_cpu(sbi->s_es->s_first_data_block);
+ ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
+ ext3_fsblk_t last_block;
struct ext3_group_desc * gdp = NULL;
int desc_block = 0;
int i;
@@ -1183,12 +1184,17 @@ static int ext3_check_descriptors (struct super_block * sb)
for (i = 0; i < sbi->s_groups_count; i++)
{
+ if (i == sbi->s_groups_count - 1)
+ last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
+ else
+ last_block = first_block +
+ (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+
if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0)
gdp = (struct ext3_group_desc *)
sbi->s_group_desc[desc_block++]->b_data;
- if (le32_to_cpu(gdp->bg_block_bitmap) < block ||
- le32_to_cpu(gdp->bg_block_bitmap) >=
- block + EXT3_BLOCKS_PER_GROUP(sb))
+ if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
+ le32_to_cpu(gdp->bg_block_bitmap) > last_block)
{
ext3_error (sb, "ext3_check_descriptors",
"Block bitmap for group %d"
@@ -1197,9 +1203,8 @@ static int ext3_check_descriptors (struct super_block * sb)
le32_to_cpu(gdp->bg_block_bitmap));
return 0;
}
- if (le32_to_cpu(gdp->bg_inode_bitmap) < block ||
- le32_to_cpu(gdp->bg_inode_bitmap) >=
- block + EXT3_BLOCKS_PER_GROUP(sb))
+ if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
+ le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
{
ext3_error (sb, "ext3_check_descriptors",
"Inode bitmap for group %d"
@@ -1208,9 +1213,9 @@ static int ext3_check_descriptors (struct super_block * sb)
le32_to_cpu(gdp->bg_inode_bitmap));
return 0;
}
- if (le32_to_cpu(gdp->bg_inode_table) < block ||
- le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >=
- block + EXT3_BLOCKS_PER_GROUP(sb))
+ if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
+ le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
+ last_block)
{
ext3_error (sb, "ext3_check_descriptors",
"Inode table for group %d"
@@ -1219,7 +1224,7 @@ static int ext3_check_descriptors (struct super_block * sb)
le32_to_cpu(gdp->bg_inode_table));
return 0;
}
- block += EXT3_BLOCKS_PER_GROUP(sb);
+ first_block += EXT3_BLOCKS_PER_GROUP(sb);
gdp++;
}
@@ -1301,17 +1306,17 @@ static void ext3_orphan_cleanup (struct super_block * sb,
DQUOT_INIT(inode);
if (inode->i_nlink) {
printk(KERN_DEBUG
- "%s: truncating inode %ld to %Ld bytes\n",
+ "%s: truncating inode %lu to %Ld bytes\n",
__FUNCTION__, inode->i_ino, inode->i_size);
- jbd_debug(2, "truncating inode %ld to %Ld bytes\n",
+ jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
inode->i_ino, inode->i_size);
ext3_truncate(inode);
nr_truncates++;
} else {
printk(KERN_DEBUG
- "%s: deleting unreferenced inode %ld\n",
+ "%s: deleting unreferenced inode %lu\n",
__FUNCTION__, inode->i_ino);
- jbd_debug(2, "deleting unreferenced inode %ld\n",
+ jbd_debug(2, "deleting unreferenced inode %lu\n",
inode->i_ino);
nr_orphans++;
}
@@ -1390,7 +1395,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
ext3_fsblk_t sb_block = get_sb_block(&data);
ext3_fsblk_t logic_sb_block;
unsigned long offset = 0;
- unsigned long journal_inum = 0;
+ unsigned int journal_inum = 0;
unsigned long journal_devnum = 0;
unsigned long def_mount_opts;
struct inode *root;
@@ -1401,11 +1406,10 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
int needs_recovery;
__le32 features;
- sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
- memset(sbi, 0, sizeof(*sbi));
sbi->s_mount_opt = 0;
sbi->s_resuid = EXT3_DEF_RESUID;
sbi->s_resgid = EXT3_DEF_RESGID;
@@ -1483,7 +1487,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
(EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
- printk(KERN_WARNING
+ printk(KERN_WARNING
"EXT3-fs warning: feature flags set on rev 0 fs, "
"running e2fsck is recommended\n");
/*
@@ -1509,7 +1513,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
if (blocksize < EXT3_MIN_BLOCK_SIZE ||
blocksize > EXT3_MAX_BLOCK_SIZE) {
- printk(KERN_ERR
+ printk(KERN_ERR
"EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
blocksize, sb->s_id);
goto failed_mount;
@@ -1533,14 +1537,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
bh = sb_bread(sb, logic_sb_block);
if (!bh) {
- printk(KERN_ERR
+ printk(KERN_ERR
"EXT3-fs: Can't read superblock on 2nd try.\n");
goto failed_mount;
}
es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
- printk (KERN_ERR
+ printk (KERN_ERR
"EXT3-fs: Magic mismatch, very weird !\n");
goto failed_mount;
}
@@ -1622,10 +1626,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
goto cantfind_ext3;
- sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) -
- le32_to_cpu(es->s_first_data_block) +
- EXT3_BLOCKS_PER_GROUP(sb) - 1) /
- EXT3_BLOCKS_PER_GROUP(sb);
+ sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
+ le32_to_cpu(es->s_first_data_block) - 1)
+ / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
EXT3_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
@@ -1820,7 +1823,7 @@ out_fail:
/*
* Setup any per-fs journal parameters now. We'll do this both on
* initial mount, once the journal has been initialised but before we've
- * done any recovery; and again on any subsequent remount.
+ * done any recovery; and again on any subsequent remount.
*/
static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
{
@@ -1840,7 +1843,8 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
spin_unlock(&journal->j_state_lock);
}
-static journal_t *ext3_get_journal(struct super_block *sb, int journal_inum)
+static journal_t *ext3_get_journal(struct super_block *sb,
+ unsigned int journal_inum)
{
struct inode *journal_inode;
journal_t *journal;
@@ -1975,7 +1979,7 @@ static int ext3_load_journal(struct super_block *sb,
unsigned long journal_devnum)
{
journal_t *journal;
- int journal_inum = le32_to_cpu(es->s_journal_inum);
+ unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
dev_t journal_dev;
int err = 0;
int really_read_only;
@@ -2061,7 +2065,7 @@ static int ext3_load_journal(struct super_block *sb,
static int ext3_create_journal(struct super_block * sb,
struct ext3_super_block * es,
- int journal_inum)
+ unsigned int journal_inum)
{
journal_t *journal;
@@ -2074,7 +2078,7 @@ static int ext3_create_journal(struct super_block * sb,
if (!(journal = ext3_get_journal(sb, journal_inum)))
return -EINVAL;
- printk(KERN_INFO "EXT3-fs: creating new journal on inode %d\n",
+ printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
journal_inum);
if (journal_create(journal)) {
@@ -2342,10 +2346,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
*/
ext3_clear_journal_err(sb, es);
sbi->s_mount_state = le16_to_cpu(es->s_state);
- if ((ret = ext3_group_extend(sb, es, n_blocks_count))) {
- err = ret;
+ if ((err = ext3_group_extend(sb, es, n_blocks_count)))
goto restore_opts;
- }
if (!ext3_setup_super (sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
}
@@ -2734,7 +2736,7 @@ static int __init init_ext3_fs(void)
out:
destroy_inodecache();
out1:
- exit_ext3_xattr();
+ exit_ext3_xattr();
return err;
}
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index a44a0562203a..f86f2482f01d 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -75,7 +75,7 @@
#ifdef EXT3_XATTR_DEBUG
# define ea_idebug(inode, f...) do { \
- printk(KERN_DEBUG "inode %s:%ld: ", \
+ printk(KERN_DEBUG "inode %s:%lu: ", \
inode->i_sb->s_id, inode->i_ino); \
printk(f); \
printk("\n"); \
@@ -233,7 +233,7 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext3_xattr_check_block(bh)) {
bad_block: ext3_error(inode->i_sb, __FUNCTION__,
- "inode %ld: bad block "E3FSBLK, inode->i_ino,
+ "inode %lu: bad block "E3FSBLK, inode->i_ino,
EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
@@ -375,7 +375,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext3_xattr_check_block(bh)) {
ext3_error(inode->i_sb, __FUNCTION__,
- "inode %ld: bad block "E3FSBLK, inode->i_ino,
+ "inode %lu: bad block "E3FSBLK, inode->i_ino,
EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
@@ -647,7 +647,7 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
le32_to_cpu(BHDR(bs->bh)->h_refcount));
if (ext3_xattr_check_block(bs->bh)) {
ext3_error(sb, __FUNCTION__,
- "inode %ld: bad block "E3FSBLK, inode->i_ino,
+ "inode %lu: bad block "E3FSBLK, inode->i_ino,
EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
@@ -848,7 +848,7 @@ cleanup_dquot:
bad_block:
ext3_error(inode->i_sb, __FUNCTION__,
- "inode %ld: bad block "E3FSBLK, inode->i_ino,
+ "inode %lu: bad block "E3FSBLK, inode->i_ino,
EXT3_I(inode)->i_file_acl);
goto cleanup;
@@ -1077,14 +1077,14 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
if (!bh) {
ext3_error(inode->i_sb, __FUNCTION__,
- "inode %ld: block "E3FSBLK" read error", inode->i_ino,
+ "inode %lu: block "E3FSBLK" read error", inode->i_ino,
EXT3_I(inode)->i_file_acl);
goto cleanup;
}
if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1)) {
ext3_error(inode->i_sb, __FUNCTION__,
- "inode %ld: bad block "E3FSBLK, inode->i_ino,
+ "inode %lu: bad block "E3FSBLK, inode->i_ino,
EXT3_I(inode)->i_file_acl);
goto cleanup;
}
@@ -1211,7 +1211,7 @@ again:
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
ext3_error(inode->i_sb, __FUNCTION__,
- "inode %ld: block %lu read error",
+ "inode %lu: block %lu read error",
inode->i_ino, (unsigned long) ce->e_block);
} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
EXT3_XATTR_REFCOUNT_MAX) {
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 97b967b84fc6..82cc4f59e3ba 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -58,8 +58,7 @@ int __init fat_cache_init(void)
void fat_cache_destroy(void)
{
- if (kmem_cache_destroy(fat_cache_cachep))
- printk(KERN_INFO "fat_cache: not all structures were freed\n");
+ kmem_cache_destroy(fat_cache_cachep);
}
static inline struct fat_cache *fat_cache_alloc(struct inode *inode)
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 1ee25232e6af..d50fc47169c1 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -13,6 +13,7 @@
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
+#include <linux/blkdev.h>
int fat_generic_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
@@ -112,6 +113,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
}
}
+static int fat_file_release(struct inode *inode, struct file *filp)
+{
+ if ((filp->f_mode & FMODE_WRITE) &&
+ MSDOS_SB(inode->i_sb)->options.flush) {
+ fat_flush_inodes(inode->i_sb, inode, NULL);
+ blk_congestion_wait(WRITE, HZ/10);
+ }
+ return 0;
+}
+
const struct file_operations fat_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -121,6 +132,7 @@ const struct file_operations fat_file_operations = {
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
+ .release = fat_file_release,
.ioctl = fat_generic_ioctl,
.fsync = file_fsync,
.sendfile = generic_file_sendfile,
@@ -289,6 +301,7 @@ void fat_truncate(struct inode *inode)
lock_kernel();
fat_free(inode, nr_clusters);
unlock_kernel();
+ fat_flush_inodes(inode->i_sb, inode, NULL);
}
struct inode_operations fat_file_inode_operations = {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 31b7174176ba..045738032a83 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -24,6 +24,7 @@
#include <linux/vfs.h>
#include <linux/parser.h>
#include <linux/uio.h>
+#include <linux/writeback.h>
#include <asm/unaligned.h>
#ifndef CONFIG_FAT_DEFAULT_IOCHARSET
@@ -50,14 +51,14 @@ static int fat_add_cluster(struct inode *inode)
return err;
}
-static int __fat_get_blocks(struct inode *inode, sector_t iblock,
- unsigned long *max_blocks,
- struct buffer_head *bh_result, int create)
+static inline int __fat_get_block(struct inode *inode, sector_t iblock,
+ unsigned long *max_blocks,
+ struct buffer_head *bh_result, int create)
{
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- sector_t phys;
unsigned long mapped_blocks;
+ sector_t phys;
int err, offset;
err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
@@ -73,7 +74,7 @@ static int __fat_get_blocks(struct inode *inode, sector_t iblock,
if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)",
- MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
+ MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
return -EIO;
}
@@ -93,34 +94,29 @@ static int __fat_get_blocks(struct inode *inode, sector_t iblock,
err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
if (err)
return err;
+
BUG_ON(!phys);
BUG_ON(*max_blocks != mapped_blocks);
set_buffer_new(bh_result);
map_bh(bh_result, sb, phys);
+
return 0;
}
-static int fat_get_blocks(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+static int fat_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
{
struct super_block *sb = inode->i_sb;
- int err;
unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ int err;
- err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
+ err = __fat_get_block(inode, iblock, &max_blocks, bh_result, create);
if (err)
return err;
bh_result->b_size = max_blocks << sb->s_blocksize_bits;
return 0;
}
-static int fat_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- unsigned long max_blocks = 1;
- return __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
-}
-
static int fat_writepage(struct page *page, struct writeback_control *wbc)
{
return block_write_full_page(page, fat_get_block, wbc);
@@ -188,7 +184,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
* condition of fat_get_block() and ->truncate().
*/
return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs, fat_get_blocks, NULL);
+ offset, nr_segs, fat_get_block, NULL);
}
static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
@@ -375,8 +371,6 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
inode->i_flags |= S_IMMUTABLE;
}
MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED;
- /* this is as close to the truth as we can get ... */
- inode->i_blksize = sbi->cluster_size;
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
inode->i_mtime.tv_sec =
@@ -528,8 +522,7 @@ static int __init fat_init_inodecache(void)
static void __exit fat_destroy_inodecache(void)
{
- if (kmem_cache_destroy(fat_inode_cachep))
- printk(KERN_INFO "fat_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(fat_inode_cachep);
}
static int fat_remount(struct super_block *sb, int *flags, char *data)
@@ -861,7 +854,7 @@ enum {
Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
- Opt_obsolate, Opt_err,
+ Opt_obsolate, Opt_flush, Opt_err,
};
static match_table_t fat_tokens = {
@@ -893,7 +886,8 @@ static match_table_t fat_tokens = {
{Opt_obsolate, "cvf_format=%20s"},
{Opt_obsolate, "cvf_options=%100s"},
{Opt_obsolate, "posix"},
- {Opt_err, NULL}
+ {Opt_flush, "flush"},
+ {Opt_err, NULL},
};
static match_table_t msdos_tokens = {
{Opt_nodots, "nodots"},
@@ -1034,6 +1028,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
return 0;
opts->codepage = option;
break;
+ case Opt_flush:
+ opts->flush = 1;
+ break;
/* msdos specific */
case Opt_dots:
@@ -1137,7 +1134,6 @@ static int fat_read_root(struct inode *inode)
MSDOS_I(inode)->i_start = 0;
inode->i_size = sbi->dir_entries * sizeof(struct msdos_dir_entry);
}
- inode->i_blksize = sbi->cluster_size;
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
MSDOS_I(inode)->i_logstart = 0;
@@ -1168,11 +1164,10 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
long error;
char buf[50];
- sbi = kmalloc(sizeof(struct msdos_sb_info), GFP_KERNEL);
+ sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
- memset(sbi, 0, sizeof(struct msdos_sb_info));
sb->s_flags |= MS_NODIRATIME;
sb->s_magic = MSDOS_SUPER_MAGIC;
@@ -1435,6 +1430,56 @@ out_fail:
EXPORT_SYMBOL_GPL(fat_fill_super);
+/*
+ * helper function for fat_flush_inodes. This writes both the inode
+ * and the file data blocks, waiting for in flight data blocks before
+ * the start of the call. It does not wait for any io started
+ * during the call
+ */
+static int writeback_inode(struct inode *inode)
+{
+
+ int ret;
+ struct address_space *mapping = inode->i_mapping;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE,
+ .nr_to_write = 0,
+ };
+ /* if we used WB_SYNC_ALL, sync_inode waits for the io for the
+ * inode to finish. So WB_SYNC_NONE is sent down to sync_inode
+ * and filemap_fdatawrite is used for the data blocks
+ */
+ ret = sync_inode(inode, &wbc);
+ if (!ret)
+ ret = filemap_fdatawrite(mapping);
+ return ret;
+}
+
+/*
+ * write data and metadata corresponding to i1 and i2. The io is
+ * started but we do not wait for any of it to finish.
+ *
+ * filemap_flush is used for the block device, so if there is a dirty
+ * page for a block already in flight, we will not wait and start the
+ * io over again
+ */
+int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2)
+{
+ int ret = 0;
+ if (!MSDOS_SB(sb)->options.flush)
+ return 0;
+ if (i1)
+ ret = writeback_inode(i1);
+ if (!ret && i2)
+ ret = writeback_inode(i2);
+ if (!ret && sb) {
+ struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
+ ret = filemap_flush(mapping);
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(fat_flush_inodes);
+
static int __init init_fat_fs(void)
{
int err;
diff --git a/fs/file.c b/fs/file.c
index b3c6b82e6a9d..8e81775c5dc8 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -281,80 +281,70 @@ static struct fdtable *alloc_fdtable(int nr)
out2:
nfds = fdt->max_fdset;
out:
- if (new_openset)
- free_fdset(new_openset, nfds);
- if (new_execset)
- free_fdset(new_execset, nfds);
+ free_fdset(new_openset, nfds);
+ free_fdset(new_execset, nfds);
kfree(fdt);
return NULL;
}
/*
- * Expands the file descriptor table - it will allocate a new fdtable and
- * both fd array and fdset. It is expected to be called with the
- * files_lock held.
+ * Expand the file descriptor table.
+ * This function will allocate a new fdtable and both fd array and fdset, of
+ * the given size.
+ * Return <0 error code on error; 1 on successful completion.
+ * The files->file_lock should be held on entry, and will be held on exit.
*/
static int expand_fdtable(struct files_struct *files, int nr)
__releases(files->file_lock)
__acquires(files->file_lock)
{
- int error = 0;
- struct fdtable *fdt;
- struct fdtable *nfdt = NULL;
+ struct fdtable *new_fdt, *cur_fdt;
spin_unlock(&files->file_lock);
- nfdt = alloc_fdtable(nr);
- if (!nfdt) {
- error = -ENOMEM;
- spin_lock(&files->file_lock);
- goto out;
- }
-
+ new_fdt = alloc_fdtable(nr);
spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
+ if (!new_fdt)
+ return -ENOMEM;
/*
- * Check again since another task may have expanded the
- * fd table while we dropped the lock
+ * Check again since another task may have expanded the fd table while
+ * we dropped the lock
*/
- if (nr >= fdt->max_fds || nr >= fdt->max_fdset) {
- copy_fdtable(nfdt, fdt);
+ cur_fdt = files_fdtable(files);
+ if (nr >= cur_fdt->max_fds || nr >= cur_fdt->max_fdset) {
+ /* Continue as planned */
+ copy_fdtable(new_fdt, cur_fdt);
+ rcu_assign_pointer(files->fdt, new_fdt);
+ free_fdtable(cur_fdt);
} else {
- /* Somebody expanded while we dropped file_lock */
- spin_unlock(&files->file_lock);
- __free_fdtable(nfdt);
- spin_lock(&files->file_lock);
- goto out;
+ /* Somebody else expanded, so undo our attempt */
+ __free_fdtable(new_fdt);
}
- rcu_assign_pointer(files->fdt, nfdt);
- free_fdtable(fdt);
-out:
- return error;
+ return 1;
}
/*
* Expand files.
- * Return <0 on error; 0 nothing done; 1 files expanded, we may have blocked.
- * Should be called with the files->file_lock spinlock held for write.
+ * This function will expand the file structures, if the requested size exceeds
+ * the current capacity and there is room for expansion.
+ * Return <0 error code on error; 0 when nothing done; 1 when files were
+ * expanded and execution may have blocked.
+ * The files->file_lock should be held on entry, and will be held on exit.
*/
int expand_files(struct files_struct *files, int nr)
{
- int err, expand = 0;
struct fdtable *fdt;
fdt = files_fdtable(files);
- if (nr >= fdt->max_fdset || nr >= fdt->max_fds) {
- if (fdt->max_fdset >= NR_OPEN ||
- fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) {
- err = -EMFILE;
- goto out;
- }
- expand = 1;
- if ((err = expand_fdtable(files, nr)))
- goto out;
- }
- err = expand;
-out:
- return err;
+ /* Do we need to expand? */
+ if (nr < fdt->max_fdset && nr < fdt->max_fds)
+ return 0;
+ /* Can we expand? */
+ if (fdt->max_fdset >= NR_OPEN || fdt->max_fds >= NR_OPEN ||
+ nr >= NR_OPEN)
+ return -EMFILE;
+
+ /* All good, so we try */
+ return expand_fdtable(files, nr);
}
static void __devinit fdtable_defer_list_init(int cpu)
diff --git a/fs/file_table.c b/fs/file_table.c
index 0131ba06e1ee..bc35a40417d7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -169,7 +169,7 @@ void fastcall __fput(struct file *file)
if (file->f_op && file->f_op->release)
file->f_op->release(inode, file);
security_file_free(file);
- if (unlikely(inode->i_cdev != NULL))
+ if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
cdev_put(inode->i_cdev);
fops_put(file->f_op);
if (file->f_mode & FMODE_WRITE)
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 9f1072836c8e..e3fa77c6ed56 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -69,8 +69,6 @@ int register_filesystem(struct file_system_type * fs)
int res = 0;
struct file_system_type ** p;
- if (!fs)
- return -EINVAL;
if (fs->next)
return -EBUSY;
INIT_LIST_HEAD(&fs->fs_supers);
diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h
index d35979a58743..c8a92652612a 100644
--- a/fs/freevxfs/vxfs.h
+++ b/fs/freevxfs/vxfs.h
@@ -252,7 +252,7 @@ enum {
* Get filesystem private data from VFS inode.
*/
#define VXFS_INO(ip) \
- ((struct vxfs_inode_info *)(ip)->u.generic_ip)
+ ((struct vxfs_inode_info *)(ip)->i_private)
/*
* Get filesystem private data from VFS superblock.
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ca6a39714771..4786d51ad3bd 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -239,11 +239,10 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
ip->i_ctime.tv_nsec = 0;
ip->i_mtime.tv_nsec = 0;
- ip->i_blksize = PAGE_SIZE;
ip->i_blocks = vip->vii_blocks;
ip->i_generation = vip->vii_gen;
- ip->u.generic_ip = (void *)vip;
+ ip->i_private = vip;
}
@@ -338,5 +337,5 @@ vxfs_read_inode(struct inode *ip)
void
vxfs_clear_inode(struct inode *ip)
{
- kmem_cache_free(vxfs_inode_cachep, ip->u.generic_ip);
+ kmem_cache_free(vxfs_inode_cachep, ip->i_private);
}
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index b74b791fc23b..ac28b0835ffc 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -260,12 +260,17 @@ static struct file_system_type vxfs_fs_type = {
static int __init
vxfs_init(void)
{
+ int rv;
+
vxfs_inode_cachep = kmem_cache_create("vxfs_inode",
sizeof(struct vxfs_inode_info), 0,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL);
- if (vxfs_inode_cachep)
- return register_filesystem(&vxfs_fs_type);
- return -ENOMEM;
+ if (!vxfs_inode_cachep)
+ return -ENOMEM;
+ rv = register_filesystem(&vxfs_fs_type);
+ if (rv < 0)
+ kmem_cache_destroy(vxfs_inode_cachep);
+ return rv;
}
static void __exit
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 46fe60b2da23..79ec1f23d4d2 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -23,7 +23,7 @@ static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
{
struct fuse_conn *fc;
mutex_lock(&fuse_mutex);
- fc = file->f_dentry->d_inode->u.generic_ip;
+ fc = file->f_dentry->d_inode->i_private;
if (fc)
fc = fuse_conn_get(fc);
mutex_unlock(&fuse_mutex);
@@ -98,7 +98,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
inode->i_op = iop;
inode->i_fop = fop;
inode->i_nlink = nlink;
- inode->u.generic_ip = fc;
+ inode->i_private = fc;
d_add(dentry, inode);
return dentry;
}
@@ -150,7 +150,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
for (i = fc->ctl_ndents - 1; i >= 0; i--) {
struct dentry *dentry = fc->ctl_dentry[i];
- dentry->d_inode->u.generic_ip = NULL;
+ dentry->d_inode->i_private = NULL;
d_drop(dentry);
dput(dentry);
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 1e2006caf158..4fc557c40cc0 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -212,6 +212,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
* Called with fc->lock, unlocks it
*/
static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+ __releases(fc->lock)
{
void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
req->end = NULL;
@@ -640,6 +641,7 @@ static void request_wait(struct fuse_conn *fc)
*/
static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
const struct iovec *iov, unsigned long nr_segs)
+ __releases(fc->lock)
{
struct fuse_copy_state cs;
struct fuse_in_header ih;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 409ce6a7cca4..f85b2a282f13 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -776,7 +776,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO))
return -EACCES;
- if (nd && (nd->flags & LOOKUP_ACCESS))
+ if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR)))
return fuse_access(inode, mask);
return 0;
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7d25092262ae..7d0a9aee01f2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -118,7 +118,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
inode->i_uid = attr->uid;
inode->i_gid = attr->gid;
i_size_write(inode, attr->size);
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = attr->blocks;
inode->i_atime.tv_sec = attr->atime;
inode->i_atime.tv_nsec = attr->atimensec;
@@ -252,6 +251,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
memset(&outarg, 0, sizeof(outarg));
req->in.numargs = 0;
req->in.h.opcode = FUSE_STATFS;
+ req->in.h.nodeid = get_node_id(dentry->d_inode);
req->out.numargs = 1;
req->out.args[0].size =
fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
new file mode 100644
index 000000000000..9ccb78947171
--- /dev/null
+++ b/fs/generic_acl.c
@@ -0,0 +1,197 @@
+/*
+ * fs/generic_acl.c
+ *
+ * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/generic_acl.h>
+
+/**
+ * generic_acl_list - Generic xattr_handler->list() operation
+ * @ops: Filesystem specific getacl and setacl callbacks
+ */
+size_t
+generic_acl_list(struct inode *inode, struct generic_acl_operations *ops,
+ int type, char *list, size_t list_size)
+{
+ struct posix_acl *acl;
+ const char *name;
+ size_t size;
+
+ acl = ops->getacl(inode, type);
+ if (!acl)
+ return 0;
+ posix_acl_release(acl);
+
+ switch(type) {
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ name = POSIX_ACL_XATTR_DEFAULT;
+ break;
+
+ default:
+ return 0;
+ }
+ size = strlen(name) + 1;
+ if (list && size <= list_size)
+ memcpy(list, name, size);
+ return size;
+}
+
+/**
+ * generic_acl_get - Generic xattr_handler->get() operation
+ * @ops: Filesystem specific getacl and setacl callbacks
+ */
+int
+generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
+ int type, void *buffer, size_t size)
+{
+ struct posix_acl *acl;
+ int error;
+
+ acl = ops->getacl(inode, type);
+ if (!acl)
+ return -ENODATA;
+ error = posix_acl_to_xattr(acl, buffer, size);
+ posix_acl_release(acl);
+
+ return error;
+}
+
+/**
+ * generic_acl_set - Generic xattr_handler->set() operation
+ * @ops: Filesystem specific getacl and setacl callbacks
+ */
+int
+generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
+ int type, const void *value, size_t size)
+{
+ struct posix_acl *acl = NULL;
+ int error;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+ if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
+ return -EPERM;
+ if (value) {
+ acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ }
+ if (acl) {
+ mode_t mode;
+
+ error = posix_acl_valid(acl);
+ if (error)
+ goto failed;
+ switch(type) {
+ case ACL_TYPE_ACCESS:
+ mode = inode->i_mode;
+ error = posix_acl_equiv_mode(acl, &mode);
+ if (error < 0)
+ goto failed;
+ inode->i_mode = mode;
+ if (error == 0) {
+ posix_acl_release(acl);
+ acl = NULL;
+ }
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode)) {
+ error = -EINVAL;
+ goto failed;
+ }
+ break;
+ }
+ }
+ ops->setacl(inode, type, acl);
+ error = 0;
+failed:
+ posix_acl_release(acl);
+ return error;
+}
+
+/**
+ * generic_acl_init - Take care of acl inheritance at @inode create time
+ * @ops: Filesystem specific getacl and setacl callbacks
+ *
+ * Files created inside a directory with a default ACL inherit the
+ * directory's default ACL.
+ */
+int
+generic_acl_init(struct inode *inode, struct inode *dir,
+ struct generic_acl_operations *ops)
+{
+ struct posix_acl *acl = NULL;
+ mode_t mode = inode->i_mode;
+ int error;
+
+ inode->i_mode = mode & ~current->fs->umask;
+ if (!S_ISLNK(inode->i_mode))
+ acl = ops->getacl(dir, ACL_TYPE_DEFAULT);
+ if (acl) {
+ struct posix_acl *clone;
+
+ if (S_ISDIR(inode->i_mode)) {
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ error = -ENOMEM;
+ if (!clone)
+ goto cleanup;
+ ops->setacl(inode, ACL_TYPE_DEFAULT, clone);
+ posix_acl_release(clone);
+ }
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ error = -ENOMEM;
+ if (!clone)
+ goto cleanup;
+ error = posix_acl_create_masq(clone, &mode);
+ if (error >= 0) {
+ inode->i_mode = mode;
+ if (error > 0)
+ ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+ }
+ posix_acl_release(clone);
+ }
+ error = 0;
+
+cleanup:
+ posix_acl_release(acl);
+ return error;
+}
+
+/**
+ * generic_acl_chmod - change the access acl of @inode upon chmod()
+ * @ops: FIlesystem specific getacl and setacl callbacks
+ *
+ * A chmod also changes the permissions of the owner, group/mask, and
+ * other ACL entries.
+ */
+int
+generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
+{
+ struct posix_acl *acl, *clone;
+ int error = 0;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+ acl = ops->getacl(inode, ACL_TYPE_ACCESS);
+ if (acl) {
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+ error = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!error)
+ ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+ posix_acl_release(clone);
+ }
+ return error;
+}
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 13231dd5ce66..0d200068d0af 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -249,10 +249,9 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
sb = tree->inode->i_sb;
size = sizeof(struct hfs_bnode) + tree->pages_per_bnode *
sizeof(struct page *);
- node = kmalloc(size, GFP_KERNEL);
+ node = kzalloc(size, GFP_KERNEL);
if (!node)
return NULL;
- memset(node, 0, size);
node->tree = tree;
node->this = cnid;
set_bit(HFS_BNODE_NEW, &node->flags);
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 400357994319..5fd0ed71f923 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -21,10 +21,9 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
struct page *page;
unsigned int size;
- tree = kmalloc(sizeof(*tree), GFP_KERNEL);
+ tree = kzalloc(sizeof(*tree), GFP_KERNEL);
if (!tree)
return NULL;
- memset(tree, 0, sizeof(*tree));
init_MUTEX(&tree->tree_lock);
spin_lock_init(&tree->hash_lock);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 315cf44a90b2..d05641c35fc9 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -154,7 +154,6 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
inode->i_gid = current->fsgid;
inode->i_nlink = 1;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
- inode->i_blksize = HFS_SB(sb)->alloc_blksz;
HFS_I(inode)->flags = 0;
HFS_I(inode)->rsrc_inode = NULL;
HFS_I(inode)->fs_blocks = 0;
@@ -284,7 +283,6 @@ static int hfs_read_inode(struct inode *inode, void *data)
inode->i_uid = hsb->s_uid;
inode->i_gid = hsb->s_gid;
inode->i_nlink = 1;
- inode->i_blksize = HFS_SB(inode->i_sb)->alloc_blksz;
if (idata->key)
HFS_I(inode)->cat_key = *idata->key;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 34937ee83ab1..d43b4fcc8ad3 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -356,11 +356,10 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
struct inode *root_inode;
int res;
- sbi = kmalloc(sizeof(struct hfs_sb_info), GFP_KERNEL);
+ sbi = kzalloc(sizeof(struct hfs_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
- memset(sbi, 0, sizeof(struct hfs_sb_info));
INIT_HLIST_HEAD(&sbi->rsrc_inodes);
res = -EINVAL;
@@ -455,8 +454,7 @@ static int __init init_hfs_fs(void)
static void __exit exit_hfs_fs(void)
{
unregister_filesystem(&hfs_fs_type);
- if (kmem_cache_destroy(hfs_inode_cachep))
- printk(KERN_ERR "hfs_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(hfs_inode_cachep);
}
module_init(init_hfs_fs)
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 77bf434da679..29da6574ba77 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -409,10 +409,9 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
sb = tree->inode->i_sb;
size = sizeof(struct hfs_bnode) + tree->pages_per_bnode *
sizeof(struct page *);
- node = kmalloc(size, GFP_KERNEL);
+ node = kzalloc(size, GFP_KERNEL);
if (!node)
return NULL;
- memset(node, 0, size);
node->tree = tree;
node->this = cnid;
set_bit(HFS_BNODE_NEW, &node->flags);
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index cfc852fdd1b5..a9b9e872e29a 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -24,10 +24,9 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
struct page *page;
unsigned int size;
- tree = kmalloc(sizeof(*tree), GFP_KERNEL);
+ tree = kzalloc(sizeof(*tree), GFP_KERNEL);
if (!tree)
return NULL;
- memset(tree, 0, sizeof(*tree));
init_MUTEX(&tree->tree_lock);
spin_lock_init(&tree->hash_lock);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 924ecdef8091..0eb1a6092668 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -304,7 +304,6 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
inode->i_gid = current->fsgid;
inode->i_nlink = 1;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
- inode->i_blksize = HFSPLUS_SB(sb).alloc_blksz;
INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
init_MUTEX(&HFSPLUS_I(inode).extents_lock);
atomic_set(&HFSPLUS_I(inode).opencnt, 0);
@@ -407,7 +406,6 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset);
HFSPLUS_I(inode).dev = 0;
- inode->i_blksize = HFSPLUS_SB(inode->i_sb).alloc_blksz;
if (type == HFSPLUS_FOLDER) {
struct hfsplus_cat_folder *folder = &entry.folder;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index d279d5924f28..194eede52fa4 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -493,8 +493,7 @@ static int __init init_hfsplus_fs(void)
static void __exit exit_hfsplus_fs(void)
{
unregister_filesystem(&hfsplus_fs_type);
- if (kmem_cache_destroy(hfsplus_inode_cachep))
- printk(KERN_ERR "hfsplus_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(hfsplus_inode_cachep);
}
module_init(init_hfsplus_fs)
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b82e3d9c8790..322e876c35ed 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -156,7 +156,6 @@ static int read_name(struct inode *ino, char *name)
ino->i_mode = i_mode;
ino->i_nlink = i_nlink;
ino->i_size = i_size;
- ino->i_blksize = i_blksize;
ino->i_blocks = i_blocks;
return(0);
}
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index 2807aa833e62..b52b7381d10f 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -76,7 +76,7 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe
return NULL;
}
- qbh->data = data = (char *)kmalloc(2048, GFP_NOFS);
+ qbh->data = data = kmalloc(2048, GFP_NOFS);
if (!data) {
printk("HPFS: hpfs_map_4sectors: out of memory\n");
goto bail;
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index f687d54ed442..32ab51e42b96 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -12,7 +12,6 @@
#include <linux/mutex.h>
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
-#include <linux/hpfs_fs.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 56f2c338c4d9..bcf6ee36e065 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -17,7 +17,6 @@ void hpfs_init_inode(struct inode *i)
i->i_gid = hpfs_sb(sb)->sb_gid;
i->i_mode = hpfs_sb(sb)->sb_mode;
hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv;
- i->i_blksize = 512;
i->i_size = -1;
i->i_blocks = -1;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f798480a363f..450b5e0b4785 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -11,6 +11,7 @@
#include <linux/parser.h>
#include <linux/init.h>
#include <linux/statfs.h>
+#include <linux/magic.h>
/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
@@ -202,8 +203,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(hpfs_inode_cachep))
- printk(KERN_INFO "hpfs_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(hpfs_inode_cachep);
}
/*
@@ -461,11 +461,10 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
int o;
- sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
s->s_fs_info = sbi;
- memset(sbi, 0, sizeof(*sbi));
sbi->sb_bmp_dir = NULL;
sbi->sb_cp_table = NULL;
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index 3a9bdf58166f..dcb6d2e988b8 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -152,7 +152,6 @@ static void hppfs_read_inode(struct inode *ino)
ino->i_mode = proc_ino->i_mode;
ino->i_nlink = proc_ino->i_nlink;
ino->i_size = proc_ino->i_size;
- ino->i_blksize = proc_ino->i_blksize;
ino->i_blocks = proc_ino->i_blocks;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c3920c96dadf..f5b8f329aca6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -229,7 +229,7 @@ static void hugetlbfs_delete_inode(struct inode *inode)
clear_inode(inode);
}
-static void hugetlbfs_forget_inode(struct inode *inode)
+static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
{
struct super_block *sb = inode->i_sb;
@@ -357,7 +357,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
inode->i_mode = mode;
inode->i_uid = uid;
inode->i_gid = gid;
- inode->i_blksize = HPAGE_SIZE;
inode->i_blocks = 0;
inode->i_mapping->a_ops = &hugetlbfs_aops;
inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
diff --git a/fs/inode.c b/fs/inode.c
index 0bf9f0444a96..abf77471e6c4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -133,7 +133,6 @@ static struct inode *alloc_inode(struct super_block *sb)
inode->i_bdev = NULL;
inode->i_cdev = NULL;
inode->i_rdev = 0;
- inode->i_security = NULL;
inode->dirtied_when = 0;
if (security_inode_alloc(inode)) {
if (inode->i_sb->s_op->destroy_inode)
@@ -163,7 +162,7 @@ static struct inode *alloc_inode(struct super_block *sb)
bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
mapping->backing_dev_info = bdi;
}
- memset(&inode->u, 0, sizeof(inode->u));
+ inode->i_private = 0;
inode->i_mapping = mapping;
}
return inode;
@@ -254,9 +253,9 @@ void clear_inode(struct inode *inode)
DQUOT_DROP(inode);
if (inode->i_sb && inode->i_sb->s_op->clear_inode)
inode->i_sb->s_op->clear_inode(inode);
- if (inode->i_bdev)
+ if (S_ISBLK(inode->i_mode) && inode->i_bdev)
bd_forget(inode);
- if (inode->i_cdev)
+ if (S_ISCHR(inode->i_mode) && inode->i_cdev)
cd_forget(inode);
inode->i_state = I_CLEAR;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 14391361c886..c34b862cdbf2 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -96,9 +96,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(isofs_inode_cachep))
- printk(KERN_INFO "iso_inode_cache: not all structures were "
- "freed\n");
+ kmem_cache_destroy(isofs_inode_cachep);
}
static int isofs_remount(struct super_block *sb, int *flags, char *data)
@@ -557,11 +555,10 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
struct iso9660_options opt;
struct isofs_sb_info * sbi;
- sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
s->s_fs_info = sbi;
- memset(sbi, 0, sizeof(*sbi));
if (!parse_options((char *)data, &opt))
goto out_freesbi;
@@ -963,30 +960,30 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
goto abort;
}
- if (nextblk) {
- while (b_off >= (offset + sect_size)) {
- struct inode *ninode;
-
- offset += sect_size;
- if (nextblk == 0)
- goto abort;
- ninode = isofs_iget(inode->i_sb, nextblk, nextoff);
- if (!ninode)
- goto abort;
- firstext = ISOFS_I(ninode)->i_first_extent;
- sect_size = ISOFS_I(ninode)->i_section_size >> ISOFS_BUFFER_BITS(ninode);
- nextblk = ISOFS_I(ninode)->i_next_section_block;
- nextoff = ISOFS_I(ninode)->i_next_section_offset;
- iput(ninode);
-
- if (++section > 100) {
- printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n");
- printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u "
- "nextblk=%lu nextoff=%lu\n",
- iblock, firstext, (unsigned) sect_size,
- nextblk, nextoff);
- goto abort;
- }
+ /* On the last section, nextblk == 0, section size is likely to
+ * exceed sect_size by a partial block, and access beyond the
+ * end of the file will reach beyond the section size, too.
+ */
+ while (nextblk && (b_off >= (offset + sect_size))) {
+ struct inode *ninode;
+
+ offset += sect_size;
+ ninode = isofs_iget(inode->i_sb, nextblk, nextoff);
+ if (!ninode)
+ goto abort;
+ firstext = ISOFS_I(ninode)->i_first_extent;
+ sect_size = ISOFS_I(ninode)->i_section_size >> ISOFS_BUFFER_BITS(ninode);
+ nextblk = ISOFS_I(ninode)->i_next_section_block;
+ nextoff = ISOFS_I(ninode)->i_next_section_offset;
+ iput(ninode);
+
+ if (++section > 100) {
+ printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n");
+ printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u "
+ "nextblk=%lu nextoff=%lu\n",
+ iblock, firstext, (unsigned) sect_size,
+ nextblk, nextoff);
+ goto abort;
}
}
@@ -1238,7 +1235,7 @@ static void isofs_read_inode(struct inode *inode)
}
inode->i_uid = sbi->s_uid;
inode->i_gid = sbi->s_gid;
- inode->i_blocks = inode->i_blksize = 0;
+ inode->i_blocks = 0;
ei->i_format_parm[0] = 0;
ei->i_format_parm[1] = 0;
@@ -1294,7 +1291,6 @@ static void isofs_read_inode(struct inode *inode)
isonum_711 (de->ext_attr_length));
/* Set the number of blocks for stat() - should be done before RR */
- inode->i_blksize = PAGE_CACHE_SIZE; /* For stat() only */
inode->i_blocks = (inode->i_size + 511) >> 9;
/*
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 47678a26c13b..0208cc7ac5d0 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -1,6 +1,6 @@
/*
* linux/fs/checkpoint.c
- *
+ *
* Written by Stephen C. Tweedie <sct@redhat.com>, 1999
*
* Copyright 1999 Red Hat Software --- All Rights Reserved
@@ -9,8 +9,8 @@
* the terms of the GNU General Public License, version 2, or at your
* option, any later version, incorporated herein by reference.
*
- * Checkpoint routines for the generic filesystem journaling code.
- * Part of the ext2fs journaling system.
+ * Checkpoint routines for the generic filesystem journaling code.
+ * Part of the ext2fs journaling system.
*
* Checkpointing is the process of ensuring that a section of the log is
* committed fully to disk, so that that portion of the log can be
@@ -145,6 +145,7 @@ void __log_wait_for_space(journal_t *journal)
* jbd_unlock_bh_state().
*/
static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
+ __releases(journal->j_list_lock)
{
get_bh(bh);
spin_unlock(&journal->j_list_lock);
@@ -225,7 +226,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
* Try to flush one buffer from the checkpoint list to disk.
*
* Return 1 if something happened which requires us to abort the current
- * scan of the checkpoint list.
+ * scan of the checkpoint list.
*
* Called with j_list_lock held and drops it if 1 is returned
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -269,7 +270,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
* possibly block, while still holding the journal lock.
* We cannot afford to let the transaction logic start
* messing around with this buffer before we write it to
- * disk, as that would break recoverability.
+ * disk, as that would break recoverability.
*/
BUFFER_TRACE(bh, "queue");
get_bh(bh);
@@ -292,7 +293,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
* Perform an actual checkpoint. We take the first transaction on the
* list of transactions to be checkpointed and send all its buffers
* to disk. We submit larger chunks of data at once.
- *
+ *
* The journal should be locked before calling this function.
*/
int log_do_checkpoint(journal_t *journal)
@@ -303,10 +304,10 @@ int log_do_checkpoint(journal_t *journal)
jbd_debug(1, "Start checkpoint\n");
- /*
+ /*
* First thing: if there are any transactions in the log which
* don't need checkpointing, just eliminate them from the
- * journal straight away.
+ * journal straight away.
*/
result = cleanup_journal_tail(journal);
jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
@@ -384,9 +385,9 @@ out:
* we have already got rid of any since the last update of the log tail
* in the journal superblock. If so, we can instantly roll the
* superblock forward to remove those transactions from the log.
- *
+ *
* Return <0 on error, 0 on success, 1 if there was nothing to clean up.
- *
+ *
* Called with the journal lock held.
*
* This is the only part of the journaling code which really needs to be
@@ -403,8 +404,8 @@ int cleanup_journal_tail(journal_t *journal)
unsigned long blocknr, freed;
/* OK, work out the oldest transaction remaining in the log, and
- * the log block it starts at.
- *
+ * the log block it starts at.
+ *
* If the log is now empty, we need to work out which is the
* next transaction ID we will write, and where it will
* start. */
@@ -479,7 +480,7 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
if (!jh)
return 0;
- last_jh = jh->b_cpprev;
+ last_jh = jh->b_cpprev;
do {
jh = next_jh;
next_jh = jh->b_cpnext;
@@ -557,7 +558,7 @@ out:
return ret;
}
-/*
+/*
* journal_remove_checkpoint: called after a buffer has been committed
* to disk (either by being write-back flushed to disk, or being
* committed to the log).
@@ -635,7 +636,7 @@ out:
* Called with the journal locked.
* Called with j_list_lock held.
*/
-void __journal_insert_checkpoint(struct journal_head *jh,
+void __journal_insert_checkpoint(struct journal_head *jh,
transaction_t *transaction)
{
JBUFFER_TRACE(jh, "entry");
@@ -657,7 +658,7 @@ void __journal_insert_checkpoint(struct journal_head *jh,
/*
* We've finished with this transaction structure: adios...
- *
+ *
* The transaction must have no links except for the checkpoint by this
* point.
*
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 42da60784311..32a8caf0c41e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -160,6 +160,117 @@ static int journal_write_commit_record(journal_t *journal,
return (ret == -EIO);
}
+static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
+{
+ int i;
+
+ for (i = 0; i < bufs; i++) {
+ wbuf[i]->b_end_io = end_buffer_write_sync;
+ /* We use-up our safety reference in submit_bh() */
+ submit_bh(WRITE, wbuf[i]);
+ }
+}
+
+/*
+ * Submit all the data buffers to disk
+ */
+static void journal_submit_data_buffers(journal_t *journal,
+ transaction_t *commit_transaction)
+{
+ struct journal_head *jh;
+ struct buffer_head *bh;
+ int locked;
+ int bufs = 0;
+ struct buffer_head **wbuf = journal->j_wbuf;
+
+ /*
+ * Whenever we unlock the journal and sleep, things can get added
+ * onto ->t_sync_datalist, so we have to keep looping back to
+ * write_out_data until we *know* that the list is empty.
+ *
+ * Cleanup any flushed data buffers from the data list. Even in
+ * abort mode, we want to flush this out as soon as possible.
+ */
+write_out_data:
+ cond_resched();
+ spin_lock(&journal->j_list_lock);
+
+ while (commit_transaction->t_sync_datalist) {
+ jh = commit_transaction->t_sync_datalist;
+ bh = jh2bh(jh);
+ locked = 0;
+
+ /* Get reference just to make sure buffer does not disappear
+ * when we are forced to drop various locks */
+ get_bh(bh);
+ /* If the buffer is dirty, we need to submit IO and hence
+ * we need the buffer lock. We try to lock the buffer without
+ * blocking. If we fail, we need to drop j_list_lock and do
+ * blocking lock_buffer().
+ */
+ if (buffer_dirty(bh)) {
+ if (test_set_buffer_locked(bh)) {
+ BUFFER_TRACE(bh, "needs blocking lock");
+ spin_unlock(&journal->j_list_lock);
+ /* Write out all data to prevent deadlocks */
+ journal_do_submit_data(wbuf, bufs);
+ bufs = 0;
+ lock_buffer(bh);
+ spin_lock(&journal->j_list_lock);
+ }
+ locked = 1;
+ }
+ /* We have to get bh_state lock. Again out of order, sigh. */
+ if (!inverted_lock(journal, bh)) {
+ jbd_lock_bh_state(bh);
+ spin_lock(&journal->j_list_lock);
+ }
+ /* Someone already cleaned up the buffer? */
+ if (!buffer_jbd(bh)
+ || jh->b_transaction != commit_transaction
+ || jh->b_jlist != BJ_SyncData) {
+ jbd_unlock_bh_state(bh);
+ if (locked)
+ unlock_buffer(bh);
+ BUFFER_TRACE(bh, "already cleaned up");
+ put_bh(bh);
+ continue;
+ }
+ if (locked && test_clear_buffer_dirty(bh)) {
+ BUFFER_TRACE(bh, "needs writeout, adding to array");
+ wbuf[bufs++] = bh;
+ __journal_file_buffer(jh, commit_transaction,
+ BJ_Locked);
+ jbd_unlock_bh_state(bh);
+ if (bufs == journal->j_wbufsize) {
+ spin_unlock(&journal->j_list_lock);
+ journal_do_submit_data(wbuf, bufs);
+ bufs = 0;
+ goto write_out_data;
+ }
+ }
+ else {
+ BUFFER_TRACE(bh, "writeout complete: unfile");
+ __journal_unfile_buffer(jh);
+ jbd_unlock_bh_state(bh);
+ if (locked)
+ unlock_buffer(bh);
+ journal_remove_journal_head(bh);
+ /* Once for our safety reference, once for
+ * journal_remove_journal_head() */
+ put_bh(bh);
+ put_bh(bh);
+ }
+
+ if (lock_need_resched(&journal->j_list_lock)) {
+ spin_unlock(&journal->j_list_lock);
+ goto write_out_data;
+ }
+ }
+ spin_unlock(&journal->j_list_lock);
+ journal_do_submit_data(wbuf, bufs);
+}
+
/*
* journal_commit_transaction
*
@@ -313,80 +424,13 @@ void journal_commit_transaction(journal_t *journal)
* Now start flushing things to disk, in the order they appear
* on the transaction lists. Data blocks go first.
*/
-
err = 0;
- /*
- * Whenever we unlock the journal and sleep, things can get added
- * onto ->t_sync_datalist, so we have to keep looping back to
- * write_out_data until we *know* that the list is empty.
- */
- bufs = 0;
- /*
- * Cleanup any flushed data buffers from the data list. Even in
- * abort mode, we want to flush this out as soon as possible.
- */
-write_out_data:
- cond_resched();
- spin_lock(&journal->j_list_lock);
-
- while (commit_transaction->t_sync_datalist) {
- struct buffer_head *bh;
-
- jh = commit_transaction->t_sync_datalist;
- commit_transaction->t_sync_datalist = jh->b_tnext;
- bh = jh2bh(jh);
- if (buffer_locked(bh)) {
- BUFFER_TRACE(bh, "locked");
- if (!inverted_lock(journal, bh))
- goto write_out_data;
- __journal_temp_unlink_buffer(jh);
- __journal_file_buffer(jh, commit_transaction,
- BJ_Locked);
- jbd_unlock_bh_state(bh);
- if (lock_need_resched(&journal->j_list_lock)) {
- spin_unlock(&journal->j_list_lock);
- goto write_out_data;
- }
- } else {
- if (buffer_dirty(bh)) {
- BUFFER_TRACE(bh, "start journal writeout");
- get_bh(bh);
- wbuf[bufs++] = bh;
- if (bufs == journal->j_wbufsize) {
- jbd_debug(2, "submit %d writes\n",
- bufs);
- spin_unlock(&journal->j_list_lock);
- ll_rw_block(SWRITE, bufs, wbuf);
- journal_brelse_array(wbuf, bufs);
- bufs = 0;
- goto write_out_data;
- }
- } else {
- BUFFER_TRACE(bh, "writeout complete: unfile");
- if (!inverted_lock(journal, bh))
- goto write_out_data;
- __journal_unfile_buffer(jh);
- jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
- put_bh(bh);
- if (lock_need_resched(&journal->j_list_lock)) {
- spin_unlock(&journal->j_list_lock);
- goto write_out_data;
- }
- }
- }
- }
-
- if (bufs) {
- spin_unlock(&journal->j_list_lock);
- ll_rw_block(SWRITE, bufs, wbuf);
- journal_brelse_array(wbuf, bufs);
- spin_lock(&journal->j_list_lock);
- }
+ journal_submit_data_buffers(journal, commit_transaction);
/*
* Wait for all previously submitted IO to complete.
*/
+ spin_lock(&journal->j_list_lock);
while (commit_transaction->t_locked_list) {
struct buffer_head *bh;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f66724ce443a..7af6099c911c 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -181,7 +181,7 @@ loop:
transaction->t_expires))
should_sleep = 0;
if (journal->j_flags & JFS_UNMOUNT)
- should_sleep = 0;
+ should_sleep = 0;
if (should_sleep) {
spin_unlock(&journal->j_state_lock);
schedule();
@@ -271,7 +271,7 @@ static void journal_kill_thread(journal_t *journal)
int journal_write_metadata_buffer(transaction_t *transaction,
struct journal_head *jh_in,
struct journal_head **jh_out,
- int blocknr)
+ unsigned long blocknr)
{
int need_copy_out = 0;
int done_copy_out = 0;
@@ -578,7 +578,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
* this is a no-op. If needed, we can use j_blk_offset - everything is
* ready.
*/
-int journal_bmap(journal_t *journal, unsigned long blocknr,
+int journal_bmap(journal_t *journal, unsigned long blocknr,
unsigned long *retp)
{
int err = 0;
@@ -696,13 +696,13 @@ fail:
* @bdev: Block device on which to create the journal
* @fs_dev: Device which hold journalled filesystem for this journal.
* @start: Block nr Start of journal.
- * @len: Lenght of the journal in blocks.
+ * @len: Length of the journal in blocks.
* @blocksize: blocksize of journalling device
* @returns: a newly created journal_t *
- *
+ *
* journal_init_dev creates a journal which maps a fixed contiguous
* range of blocks on an arbitrary block device.
- *
+ *
*/
journal_t * journal_init_dev(struct block_device *bdev,
struct block_device *fs_dev,
@@ -715,18 +715,8 @@ journal_t * journal_init_dev(struct block_device *bdev,
if (!journal)
return NULL;
- journal->j_dev = bdev;
- journal->j_fs_dev = fs_dev;
- journal->j_blk_offset = start;
- journal->j_maxlen = len;
- journal->j_blocksize = blocksize;
-
- bh = __getblk(journal->j_dev, start, journal->j_blocksize);
- J_ASSERT(bh != NULL);
- journal->j_sb_buffer = bh;
- journal->j_superblock = (journal_superblock_t *)bh->b_data;
-
/* journal descriptor can store up to n blocks -bzzz */
+ journal->j_blocksize = blocksize;
n = journal->j_blocksize / sizeof(journal_block_tag_t);
journal->j_wbufsize = n;
journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
@@ -736,14 +726,23 @@ journal_t * journal_init_dev(struct block_device *bdev,
kfree(journal);
journal = NULL;
}
+ journal->j_dev = bdev;
+ journal->j_fs_dev = fs_dev;
+ journal->j_blk_offset = start;
+ journal->j_maxlen = len;
+
+ bh = __getblk(journal->j_dev, start, journal->j_blocksize);
+ J_ASSERT(bh != NULL);
+ journal->j_sb_buffer = bh;
+ journal->j_superblock = (journal_superblock_t *)bh->b_data;
return journal;
}
-
-/**
+
+/**
* journal_t * journal_init_inode () - creates a journal which maps to a inode.
* @inode: An inode to create the journal in
- *
+ *
* journal_init_inode creates a journal which maps an on-disk inode as
* the journal. The inode must exist already, must support bmap() and
* must have all data blocks preallocated.
@@ -763,7 +762,7 @@ journal_t * journal_init_inode (struct inode *inode)
journal->j_inode = inode;
jbd_debug(1,
"journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
- journal, inode->i_sb->s_id, inode->i_ino,
+ journal, inode->i_sb->s_id, inode->i_ino,
(long long) inode->i_size,
inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
@@ -798,10 +797,10 @@ journal_t * journal_init_inode (struct inode *inode)
return journal;
}
-/*
+/*
* If the journal init or create aborts, we need to mark the journal
* superblock as being NULL to prevent the journal destroy from writing
- * back a bogus superblock.
+ * back a bogus superblock.
*/
static void journal_fail_superblock (journal_t *journal)
{
@@ -820,7 +819,7 @@ static void journal_fail_superblock (journal_t *journal)
static int journal_reset(journal_t *journal)
{
journal_superblock_t *sb = journal->j_superblock;
- unsigned int first, last;
+ unsigned long first, last;
first = be32_to_cpu(sb->s_first);
last = be32_to_cpu(sb->s_maxlen);
@@ -844,13 +843,13 @@ static int journal_reset(journal_t *journal)
return 0;
}
-/**
+/**
* int journal_create() - Initialise the new journal file
* @journal: Journal to create. This structure must have been initialised
- *
+ *
* Given a journal_t structure which tells us which disk blocks we can
* use, create a new journal superblock and initialise all of the
- * journal fields from scratch.
+ * journal fields from scratch.
**/
int journal_create(journal_t *journal)
{
@@ -915,7 +914,7 @@ int journal_create(journal_t *journal)
return journal_reset(journal);
}
-/**
+/**
* void journal_update_superblock() - Update journal sb on disk.
* @journal: The journal to update.
* @wait: Set to '0' if you don't want to wait for IO completion.
@@ -939,7 +938,7 @@ void journal_update_superblock(journal_t *journal, int wait)
journal->j_transaction_sequence) {
jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
"(start %ld, seq %d, errno %d)\n",
- journal->j_tail, journal->j_tail_sequence,
+ journal->j_tail, journal->j_tail_sequence,
journal->j_errno);
goto out;
}
@@ -1062,7 +1061,7 @@ static int load_superblock(journal_t *journal)
/**
* int journal_load() - Read journal from disk.
* @journal: Journal to act on.
- *
+ *
* Given a journal_t structure which tells us which disk blocks contain
* a journal, read the journal from disk to initialise the in-memory
* structures.
@@ -1094,7 +1093,7 @@ int journal_load(journal_t *journal)
/*
* Create a slab for this blocksize
*/
- err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize));
+ err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
if (err)
return err;
@@ -1172,9 +1171,9 @@ void journal_destroy(journal_t *journal)
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
* @incompat: bitmask of incompatible features
- *
+ *
* Check whether the journal uses all of a given set of
- * features. Return true (non-zero) if it does.
+ * features. Return true (non-zero) if it does.
**/
int journal_check_used_features (journal_t *journal, unsigned long compat,
@@ -1203,7 +1202,7 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
* @incompat: bitmask of incompatible features
- *
+ *
* Check whether the journaling code supports the use of
* all of a given set of features on this journal. Return true
* (non-zero) if it can. */
@@ -1241,7 +1240,7 @@ int journal_check_available_features (journal_t *journal, unsigned long compat,
* @incompat: bitmask of incompatible features
*
* Mark a given journal feature as present on the
- * superblock. Returns true if the requested features could be set.
+ * superblock. Returns true if the requested features could be set.
*
*/
@@ -1327,7 +1326,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
/**
* int journal_flush () - Flush journal
* @journal: Journal to act on.
- *
+ *
* Flush all data for a given journal to disk and empty the journal.
* Filesystems can use this when remounting readonly to ensure that
* recovery does not need to happen on remount.
@@ -1394,7 +1393,7 @@ int journal_flush(journal_t *journal)
* int journal_wipe() - Wipe journal contents
* @journal: Journal to act on.
* @write: flag (see below)
- *
+ *
* Wipe out all of the contents of a journal, safely. This will produce
* a warning if the journal contains any valid recovery information.
* Must be called between journal_init_*() and journal_load().
@@ -1449,7 +1448,7 @@ static const char *journal_dev_name(journal_t *journal, char *buffer)
/*
* Journal abort has very specific semantics, which we describe
- * for journal abort.
+ * for journal abort.
*
* Two internal function, which provide abort to te jbd layer
* itself are here.
@@ -1504,7 +1503,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
* Perform a complete, immediate shutdown of the ENTIRE
* journal (not of a single transaction). This operation cannot be
* undone without closing and reopening the journal.
- *
+ *
* The journal_abort function is intended to support higher level error
* recovery mechanisms such as the ext2/ext3 remount-readonly error
* mode.
@@ -1538,7 +1537,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
* supply an errno; a null errno implies that absolutely no further
* writes are done to the journal (unless there are any already in
* progress).
- *
+ *
*/
void journal_abort(journal_t *journal, int errno)
@@ -1546,7 +1545,7 @@ void journal_abort(journal_t *journal, int errno)
__journal_abort_soft(journal, errno);
}
-/**
+/**
* int journal_errno () - returns the journal's error state.
* @journal: journal to examine.
*
@@ -1570,7 +1569,7 @@ int journal_errno(journal_t *journal)
return err;
}
-/**
+/**
* int journal_clear_err () - clears the journal's error state
* @journal: journal to act on.
*
@@ -1590,7 +1589,7 @@ int journal_clear_err(journal_t *journal)
return err;
}
-/**
+/**
* void journal_ack_err() - Ack journal err.
* @journal: journal to act on.
*
@@ -1612,7 +1611,7 @@ int journal_blocks_per_page(struct inode *inode)
/*
* Simple support for retrying memory allocations. Introduced to help to
- * debug different VM deadlock avoidance strategies.
+ * debug different VM deadlock avoidance strategies.
*/
void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
{
@@ -2047,13 +2046,7 @@ static int __init journal_init(void)
{
int ret;
-/* Static check for data structure consistency. There's no code
- * invoked --- we'll just get a linker failure if things aren't right.
- */
- extern void journal_bad_superblock_size(void);
- if (sizeof(struct journal_superblock_s) != 1024)
- journal_bad_superblock_size();
-
+ BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
ret = journal_init_caches();
if (ret != 0)
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index de5bafb4e853..11563fe2a52b 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -1,6 +1,6 @@
/*
* linux/fs/recovery.c
- *
+ *
* Written by Stephen C. Tweedie <sct@redhat.com>, 1999
*
* Copyright 1999-2000 Red Hat Software --- All Rights Reserved
@@ -10,7 +10,7 @@
* option, any later version, incorporated herein by reference.
*
* Journal recovery routines for the generic filesystem journaling code;
- * part of the ext2fs journaling system.
+ * part of the ext2fs journaling system.
*/
#ifndef __KERNEL__
@@ -25,9 +25,9 @@
/*
* Maintain information about the progress of the recovery job, so that
- * the different passes can carry information between them.
+ * the different passes can carry information between them.
*/
-struct recovery_info
+struct recovery_info
{
tid_t start_transaction;
tid_t end_transaction;
@@ -46,7 +46,7 @@ static int scan_revoke_records(journal_t *, struct buffer_head *,
#ifdef __KERNEL__
/* Release readahead buffers after use */
-void journal_brelse_array(struct buffer_head *b[], int n)
+static void journal_brelse_array(struct buffer_head *b[], int n)
{
while (--n >= 0)
brelse (b[n]);
@@ -116,7 +116,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
err = 0;
failed:
- if (nbufs)
+ if (nbufs)
journal_brelse_array(bufs, nbufs);
return err;
}
@@ -128,7 +128,7 @@ failed:
* Read a block from the journal
*/
-static int jread(struct buffer_head **bhp, journal_t *journal,
+static int jread(struct buffer_head **bhp, journal_t *journal,
unsigned int offset)
{
int err;
@@ -212,14 +212,14 @@ do { \
/**
* journal_recover - recovers a on-disk journal
* @journal: the journal to recover
- *
+ *
* The primary function for recovering the log contents when mounting a
- * journaled device.
+ * journaled device.
*
* Recovery is done in three passes. In the first pass, we look for the
* end of the log. In the second, we assemble the list of revoke
* blocks. In the third and final pass, we replay any un-revoked blocks
- * in the log.
+ * in the log.
*/
int journal_recover(journal_t *journal)
{
@@ -231,10 +231,10 @@ int journal_recover(journal_t *journal)
memset(&info, 0, sizeof(info));
sb = journal->j_superblock;
- /*
+ /*
* The journal superblock's s_start field (the current log head)
* is always zero if, and only if, the journal was cleanly
- * unmounted.
+ * unmounted.
*/
if (!sb->s_start) {
@@ -253,7 +253,7 @@ int journal_recover(journal_t *journal)
jbd_debug(0, "JBD: recovery, exit status %d, "
"recovered transactions %u to %u\n",
err, info.start_transaction, info.end_transaction);
- jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
+ jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
/* Restart the log at the next transaction ID, thus invalidating
@@ -268,15 +268,15 @@ int journal_recover(journal_t *journal)
/**
* journal_skip_recovery - Start journal and wipe exiting records
* @journal: journal to startup
- *
+ *
* Locate any valid recovery information from the journal and set up the
* journal structures in memory to ignore it (presumably because the
- * caller has evidence that it is out of date).
+ * caller has evidence that it is out of date).
* This function does'nt appear to be exorted..
*
* We perform one pass over the journal to allow us to tell the user how
* much recovery information is being erased, and to let us initialise
- * the journal transaction sequence numbers to the next unused ID.
+ * the journal transaction sequence numbers to the next unused ID.
*/
int journal_skip_recovery(journal_t *journal)
{
@@ -297,7 +297,7 @@ int journal_skip_recovery(journal_t *journal)
#ifdef CONFIG_JBD_DEBUG
int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
#endif
- jbd_debug(0,
+ jbd_debug(0,
"JBD: ignoring %d transaction%s from the journal.\n",
dropped, (dropped == 1) ? "" : "s");
journal->j_transaction_sequence = ++info.end_transaction;
@@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal,
unsigned long next_log_block;
int err, success = 0;
journal_superblock_t * sb;
- journal_header_t * tmp;
+ journal_header_t * tmp;
struct buffer_head * bh;
unsigned int sequence;
int blocktype;
@@ -324,10 +324,10 @@ static int do_one_pass(journal_t *journal,
MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
/ sizeof(journal_block_tag_t));
- /*
+ /*
* First thing is to establish what we expect to find in the log
* (in terms of transaction IDs), and where (in terms of log
- * block offsets): query the superblock.
+ * block offsets): query the superblock.
*/
sb = journal->j_superblock;
@@ -344,7 +344,7 @@ static int do_one_pass(journal_t *journal,
* Now we walk through the log, transaction by transaction,
* making sure that each transaction has a commit block in the
* expected place. Each complete transaction gets replayed back
- * into the main filesystem.
+ * into the main filesystem.
*/
while (1) {
@@ -379,8 +379,8 @@ static int do_one_pass(journal_t *journal,
next_log_block++;
wrap(journal, next_log_block);
- /* What kind of buffer is it?
- *
+ /* What kind of buffer is it?
+ *
* If it is a descriptor block, check that it has the
* expected sequence number. Otherwise, we're all done
* here. */
@@ -394,7 +394,7 @@ static int do_one_pass(journal_t *journal,
blocktype = be32_to_cpu(tmp->h_blocktype);
sequence = be32_to_cpu(tmp->h_sequence);
- jbd_debug(3, "Found magic %d, sequence %d\n",
+ jbd_debug(3, "Found magic %d, sequence %d\n",
blocktype, sequence);
if (sequence != next_commit_ID) {
@@ -438,7 +438,7 @@ static int do_one_pass(journal_t *journal,
/* Recover what we can, but
* report failure at the end. */
success = err;
- printk (KERN_ERR
+ printk (KERN_ERR
"JBD: IO error %d recovering "
"block %ld in log\n",
err, io_block);
@@ -452,7 +452,7 @@ static int do_one_pass(journal_t *journal,
* revoked, then we're all done
* here. */
if (journal_test_revoke
- (journal, blocknr,
+ (journal, blocknr,
next_commit_ID)) {
brelse(obh);
++info->nr_revoke_hits;
@@ -465,7 +465,7 @@ static int do_one_pass(journal_t *journal,
blocknr,
journal->j_blocksize);
if (nbh == NULL) {
- printk(KERN_ERR
+ printk(KERN_ERR
"JBD: Out of memory "
"during recovery.\n");
err = -ENOMEM;
@@ -537,7 +537,7 @@ static int do_one_pass(journal_t *journal,
}
done:
- /*
+ /*
* We broke out of the log scan loop: either we came to the
* known end of the log or we found an unexpected block in the
* log. If the latter happened, then we know that the "current"
@@ -567,7 +567,7 @@ static int do_one_pass(journal_t *journal,
/* Scan a revoke record, marking all blocks mentioned as revoked. */
-static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
+static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
tid_t sequence, struct recovery_info *info)
{
journal_revoke_header_t *header;
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index a56144183462..c532429d8d9b 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -1,6 +1,6 @@
/*
* linux/fs/revoke.c
- *
+ *
* Written by Stephen C. Tweedie <sct@redhat.com>, 2000
*
* Copyright 2000 Red Hat corp --- All Rights Reserved
@@ -15,10 +15,10 @@
* Revoke is the mechanism used to prevent old log records for deleted
* metadata from being replayed on top of newer data using the same
* blocks. The revoke mechanism is used in two separate places:
- *
+ *
* + Commit: during commit we write the entire list of the current
* transaction's revoked blocks to the journal
- *
+ *
* + Recovery: during recovery we record the transaction ID of all
* revoked blocks. If there are multiple revoke records in the log
* for a single block, only the last one counts, and if there is a log
@@ -29,7 +29,7 @@
* single transaction:
*
* Block is revoked and then journaled:
- * The desired end result is the journaling of the new block, so we
+ * The desired end result is the journaling of the new block, so we
* cancel the revoke before the transaction commits.
*
* Block is journaled and then revoked:
@@ -41,7 +41,7 @@
* transaction must have happened after the block was journaled and so
* the revoke must take precedence.
*
- * Block is revoked and then written as data:
+ * Block is revoked and then written as data:
* The data write is allowed to succeed, but the revoke is _not_
* cancelled. We still need to prevent old log records from
* overwriting the new data. We don't even need to clear the revoke
@@ -54,7 +54,7 @@
* buffer has not been revoked, and cancel_revoke
* need do nothing.
* RevokeValid set, Revoked set:
- * buffer has been revoked.
+ * buffer has been revoked.
*/
#ifndef __KERNEL__
@@ -77,7 +77,7 @@ static kmem_cache_t *revoke_table_cache;
journal replay, this involves recording the transaction ID of the
last transaction to revoke this block. */
-struct jbd_revoke_record_s
+struct jbd_revoke_record_s
{
struct list_head hash;
tid_t sequence; /* Used for recovery only */
@@ -90,8 +90,8 @@ struct jbd_revoke_table_s
{
/* It is conceivable that we might want a larger hash table
* for recovery. Must be a power of two. */
- int hash_size;
- int hash_shift;
+ int hash_size;
+ int hash_shift;
struct list_head *hash_table;
};
@@ -301,22 +301,22 @@ void journal_destroy_revoke(journal_t *journal)
#ifdef __KERNEL__
-/*
+/*
* journal_revoke: revoke a given buffer_head from the journal. This
* prevents the block from being replayed during recovery if we take a
* crash after this current transaction commits. Any subsequent
* metadata writes of the buffer in this transaction cancel the
- * revoke.
+ * revoke.
*
* Note that this call may block --- it is up to the caller to make
* sure that there are no further calls to journal_write_metadata
* before the revoke is complete. In ext3, this implies calling the
* revoke before clearing the block bitmap when we are deleting
- * metadata.
+ * metadata.
*
* Revoke performs a journal_forget on any buffer_head passed in as a
* parameter, but does _not_ forget the buffer_head if the bh was only
- * found implicitly.
+ * found implicitly.
*
* bh_in may not be a journalled buffer - it may have come off
* the hash tables without an attached journal_head.
@@ -325,7 +325,7 @@ void journal_destroy_revoke(journal_t *journal)
* by one.
*/
-int journal_revoke(handle_t *handle, unsigned long blocknr,
+int journal_revoke(handle_t *handle, unsigned long blocknr,
struct buffer_head *bh_in)
{
struct buffer_head *bh = NULL;
@@ -487,7 +487,7 @@ void journal_switch_revoke_table(journal_t *journal)
else
journal->j_revoke = journal->j_revoke_table[0];
- for (i = 0; i < journal->j_revoke->hash_size; i++)
+ for (i = 0; i < journal->j_revoke->hash_size; i++)
INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
}
@@ -498,7 +498,7 @@ void journal_switch_revoke_table(journal_t *journal)
* Called with the journal lock held.
*/
-void journal_write_revoke_records(journal_t *journal,
+void journal_write_revoke_records(journal_t *journal,
transaction_t *transaction)
{
struct journal_head *descriptor;
@@ -507,7 +507,7 @@ void journal_write_revoke_records(journal_t *journal,
struct list_head *hash_list;
int i, offset, count;
- descriptor = NULL;
+ descriptor = NULL;
offset = 0;
count = 0;
@@ -519,10 +519,10 @@ void journal_write_revoke_records(journal_t *journal,
hash_list = &revoke->hash_table[i];
while (!list_empty(hash_list)) {
- record = (struct jbd_revoke_record_s *)
+ record = (struct jbd_revoke_record_s *)
hash_list->next;
write_one_revoke_record(journal, transaction,
- &descriptor, &offset,
+ &descriptor, &offset,
record);
count++;
list_del(&record->hash);
@@ -534,14 +534,14 @@ void journal_write_revoke_records(journal_t *journal,
jbd_debug(1, "Wrote %d revoke records\n", count);
}
-/*
+/*
* Write out one revoke record. We need to create a new descriptor
- * block if the old one is full or if we have not already created one.
+ * block if the old one is full or if we have not already created one.
*/
-static void write_one_revoke_record(journal_t *journal,
+static void write_one_revoke_record(journal_t *journal,
transaction_t *transaction,
- struct journal_head **descriptorp,
+ struct journal_head **descriptorp,
int *offsetp,
struct jbd_revoke_record_s *record)
{
@@ -584,21 +584,21 @@ static void write_one_revoke_record(journal_t *journal,
*descriptorp = descriptor;
}
- * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
+ * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
cpu_to_be32(record->blocknr);
offset += 4;
*offsetp = offset;
}
-/*
+/*
* Flush a revoke descriptor out to the journal. If we are aborting,
* this is a noop; otherwise we are generating a buffer which needs to
* be waited for during commit, so it has to go onto the appropriate
* journal buffer list.
*/
-static void flush_descriptor(journal_t *journal,
- struct journal_head *descriptor,
+static void flush_descriptor(journal_t *journal,
+ struct journal_head *descriptor,
int offset)
{
journal_revoke_header_t *header;
@@ -618,7 +618,7 @@ static void flush_descriptor(journal_t *journal,
}
#endif
-/*
+/*
* Revoke support for recovery.
*
* Recovery needs to be able to:
@@ -629,7 +629,7 @@ static void flush_descriptor(journal_t *journal,
* check whether a given block in a given transaction should be replayed
* (ie. has not been revoked by a revoke record in that or a subsequent
* transaction)
- *
+ *
* empty the revoke table after recovery.
*/
@@ -637,11 +637,11 @@ static void flush_descriptor(journal_t *journal,
* First, setting revoke records. We create a new revoke record for
* every block ever revoked in the log as we scan it for recovery, and
* we update the existing records if we find multiple revokes for a
- * single block.
+ * single block.
*/
-int journal_set_revoke(journal_t *journal,
- unsigned long blocknr,
+int journal_set_revoke(journal_t *journal,
+ unsigned long blocknr,
tid_t sequence)
{
struct jbd_revoke_record_s *record;
@@ -653,18 +653,18 @@ int journal_set_revoke(journal_t *journal,
if (tid_gt(sequence, record->sequence))
record->sequence = sequence;
return 0;
- }
+ }
return insert_revoke_hash(journal, blocknr, sequence);
}
-/*
+/*
* Test revoke records. For a given block referenced in the log, has
* that block been revoked? A revoke record with a given transaction
* sequence number revokes all blocks in that transaction and earlier
* ones, but later transactions still need replayed.
*/
-int journal_test_revoke(journal_t *journal,
+int journal_test_revoke(journal_t *journal,
unsigned long blocknr,
tid_t sequence)
{
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index f5169a96260e..e1b3c8af4d17 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1,6 +1,6 @@
/*
* linux/fs/transaction.c
- *
+ *
* Written by Stephen C. Tweedie <sct@redhat.com>, 1998
*
* Copyright 1998 Red Hat corp --- All Rights Reserved
@@ -10,7 +10,7 @@
* option, any later version, incorporated herein by reference.
*
* Generic filesystem transaction handling code; part of the ext2fs
- * journaling system.
+ * journaling system.
*
* This file manages transactions (compound commits managed by the
* journaling code) and handles (individual atomic operations by the
@@ -74,7 +74,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
* start_this_handle: Given a handle, deal with any locking or stalling
* needed to make sure that there is enough journal space for the handle
* to begin. Attach the handle to a transaction and set up the
- * transaction's buffer credits.
+ * transaction's buffer credits.
*/
static int start_this_handle(journal_t *journal, handle_t *handle)
@@ -117,7 +117,7 @@ repeat_locked:
if (is_journal_aborted(journal) ||
(journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
spin_unlock(&journal->j_state_lock);
- ret = -EROFS;
+ ret = -EROFS;
goto out;
}
@@ -182,7 +182,7 @@ repeat_locked:
goto repeat;
}
- /*
+ /*
* The commit code assumes that it can get enough log space
* without forcing a checkpoint. This is *critical* for
* correctness: a checkpoint of a buffer which is also
@@ -191,7 +191,7 @@ repeat_locked:
*
* We must therefore ensure the necessary space in the journal
* *before* starting to dirty potentially checkpointed buffers
- * in the new transaction.
+ * in the new transaction.
*
* The worst part is, any transaction currently committing can
* reduce the free space arbitrarily. Be careful to account for
@@ -246,13 +246,13 @@ static handle_t *new_handle(int nblocks)
}
/**
- * handle_t *journal_start() - Obtain a new handle.
+ * handle_t *journal_start() - Obtain a new handle.
* @journal: Journal to start transaction on.
* @nblocks: number of block buffer we might modify
*
* We make sure that the transaction can guarantee at least nblocks of
* modified buffers in the log. We block until the log can guarantee
- * that much space.
+ * that much space.
*
* This function is visible to journal users (like ext3fs), so is not
* called with the journal already locked.
@@ -292,11 +292,11 @@ handle_t *journal_start(journal_t *journal, int nblocks)
* int journal_extend() - extend buffer credits.
* @handle: handle to 'extend'
* @nblocks: nr blocks to try to extend by.
- *
+ *
* Some transactions, such as large extends and truncates, can be done
* atomically all at once or in several stages. The operation requests
* a credit for a number of buffer modications in advance, but can
- * extend its credit if it needs more.
+ * extend its credit if it needs more.
*
* journal_extend tries to give the running handle more buffer credits.
* It does not guarantee that allocation - this is a best-effort only.
@@ -363,7 +363,7 @@ out:
* int journal_restart() - restart a handle .
* @handle: handle to restart
* @nblocks: nr credits requested
- *
+ *
* Restart a handle for a multi-transaction filesystem
* operation.
*
@@ -462,7 +462,7 @@ void journal_lock_updates(journal_t *journal)
/**
* void journal_unlock_updates (journal_t* journal) - release barrier
* @journal: Journal to release the barrier on.
- *
+ *
* Release a transaction barrier obtained with journal_lock_updates().
*
* Should be called without the journal lock held.
@@ -547,8 +547,8 @@ repeat:
jbd_lock_bh_state(bh);
/* We now hold the buffer lock so it is safe to query the buffer
- * state. Is the buffer dirty?
- *
+ * state. Is the buffer dirty?
+ *
* If so, there are two possibilities. The buffer may be
* non-journaled, and undergoing a quite legitimate writeback.
* Otherwise, it is journaled, and we don't expect dirty buffers
@@ -566,7 +566,7 @@ repeat:
*/
if (jh->b_transaction) {
J_ASSERT_JH(jh,
- jh->b_transaction == transaction ||
+ jh->b_transaction == transaction ||
jh->b_transaction ==
journal->j_committing_transaction);
if (jh->b_next_transaction)
@@ -580,7 +580,7 @@ repeat:
*/
JBUFFER_TRACE(jh, "Unexpected dirty buffer");
jbd_unexpected_dirty_buffer(jh);
- }
+ }
unlock_buffer(bh);
@@ -653,7 +653,7 @@ repeat:
* buffer had better remain locked during the kmalloc,
* but that should be true --- we hold the journal lock
* still and the buffer is already on the BUF_JOURNAL
- * list so won't be flushed.
+ * list so won't be flushed.
*
* Subtle point, though: if this is a get_undo_access,
* then we will be relying on the frozen_data to contain
@@ -765,8 +765,8 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
* manually rather than reading off disk), then we need to keep the
* buffer_head locked until it has been completely filled with new
* data. In this case, we should be able to make the assertion that
- * the bh is not already part of an existing transaction.
- *
+ * the bh is not already part of an existing transaction.
+ *
* The buffer should already be locked by the caller by this point.
* There is no lock ranking violation: it was a newly created,
* unlocked buffer beforehand. */
@@ -778,7 +778,7 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
*
* Call this if you create a new bh.
*/
-int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
+int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
@@ -847,13 +847,13 @@ out:
* do not reuse freed space until the deallocation has been committed,
* since if we overwrote that space we would make the delete
* un-rewindable in case of a crash.
- *
+ *
* To deal with that, journal_get_undo_access requests write access to a
* buffer for parts of non-rewindable operations such as delete
* operations on the bitmaps. The journaling code must keep a copy of
* the buffer's contents prior to the undo_access call until such time
* as we know that the buffer has definitely been committed to disk.
- *
+ *
* We never need to know which transaction the committed data is part
* of, buffers touched here are guaranteed to be dirtied later and so
* will be committed to a new transaction in due course, at which point
@@ -911,13 +911,13 @@ out:
return err;
}
-/**
+/**
* int journal_dirty_data() - mark a buffer as containing dirty data which
* needs to be flushed before we can commit the
- * current transaction.
+ * current transaction.
* @handle: transaction
* @bh: bufferhead to mark
- *
+ *
* The buffer is placed on the transaction's data list and is marked as
* belonging to the transaction.
*
@@ -946,15 +946,15 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
/*
* What if the buffer is already part of a running transaction?
- *
+ *
* There are two cases:
* 1) It is part of the current running transaction. Refile it,
* just in case we have allocated it as metadata, deallocated
- * it, then reallocated it as data.
+ * it, then reallocated it as data.
* 2) It is part of the previous, still-committing transaction.
* If all we want to do is to guarantee that the buffer will be
* written to disk before this new transaction commits, then
- * being sure that the *previous* transaction has this same
+ * being sure that the *previous* transaction has this same
* property is sufficient for us! Just leave it on its old
* transaction.
*
@@ -1076,18 +1076,18 @@ no_journal:
return 0;
}
-/**
+/**
* int journal_dirty_metadata() - mark a buffer as containing dirty metadata
* @handle: transaction to add buffer to.
- * @bh: buffer to mark
- *
+ * @bh: buffer to mark
+ *
* mark dirty metadata which needs to be journaled as part of the current
* transaction.
*
* The buffer is placed on the transaction's metadata list and is marked
- * as belonging to the transaction.
+ * as belonging to the transaction.
*
- * Returns error number or 0 on success.
+ * Returns error number or 0 on success.
*
* Special care needs to be taken if the buffer already belongs to the
* current committing transaction (in which case we should have frozen
@@ -1135,11 +1135,11 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
set_buffer_jbddirty(bh);
- /*
+ /*
* Metadata already on the current transaction list doesn't
* need to be filed. Metadata on another transaction's list must
* be committing, and will be refiled once the commit completes:
- * leave it alone for now.
+ * leave it alone for now.
*/
if (jh->b_transaction != transaction) {
JBUFFER_TRACE(jh, "already on other transaction");
@@ -1165,7 +1165,7 @@ out:
return 0;
}
-/*
+/*
* journal_release_buffer: undo a get_write_access without any buffer
* updates, if the update decided in the end that it didn't need access.
*
@@ -1176,20 +1176,20 @@ journal_release_buffer(handle_t *handle, struct buffer_head *bh)
BUFFER_TRACE(bh, "entry");
}
-/**
+/**
* void journal_forget() - bforget() for potentially-journaled buffers.
* @handle: transaction handle
* @bh: bh to 'forget'
*
* We can only do the bforget if there are no commits pending against the
* buffer. If the buffer is dirty in the current running transaction we
- * can safely unlink it.
+ * can safely unlink it.
*
* bh may not be a journalled buffer at all - it may be a non-JBD
* buffer which came off the hashtable. Check for this.
*
* Decrements bh->b_count by one.
- *
+ *
* Allow this call even if the handle has aborted --- it may be part of
* the caller's cleanup after an abort.
*/
@@ -1237,7 +1237,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
drop_reserve = 1;
- /*
+ /*
* We are no longer going to journal this buffer.
* However, the commit of this transaction is still
* important to the buffer: the delete that we are now
@@ -1246,7 +1246,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
*
* So, if we have a checkpoint on the buffer, we should
* now refile the buffer on our BJ_Forget list so that
- * we know to remove the checkpoint after we commit.
+ * we know to remove the checkpoint after we commit.
*/
if (jh->b_cp_transaction) {
@@ -1264,7 +1264,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
}
}
} else if (jh->b_transaction) {
- J_ASSERT_JH(jh, (jh->b_transaction ==
+ J_ASSERT_JH(jh, (jh->b_transaction ==
journal->j_committing_transaction));
/* However, if the buffer is still owned by a prior
* (committing) transaction, we can't drop it yet... */
@@ -1294,7 +1294,7 @@ drop:
/**
* int journal_stop() - complete a transaction
* @handle: tranaction to complete.
- *
+ *
* All done for a particular handle.
*
* There is not much action needed here. We just return any remaining
@@ -1303,7 +1303,7 @@ drop:
* filesystem is marked for synchronous update.
*
* journal_stop itself will not usually return an error, but it may
- * do so in unusual circumstances. In particular, expect it to
+ * do so in unusual circumstances. In particular, expect it to
* return -EIO if a journal_abort has been executed since the
* transaction began.
*/
@@ -1373,7 +1373,7 @@ int journal_stop(handle_t *handle)
if (handle->h_sync ||
transaction->t_outstanding_credits >
journal->j_max_transaction_buffers ||
- time_after_eq(jiffies, transaction->t_expires)) {
+ time_after_eq(jiffies, transaction->t_expires)) {
/* Do this even for aborted journals: an abort still
* completes the commit thread, it just doesn't write
* anything to disk. */
@@ -1388,7 +1388,7 @@ int journal_stop(handle_t *handle)
/*
* Special case: JFS_SYNC synchronous updates require us
- * to wait for the commit to complete.
+ * to wait for the commit to complete.
*/
if (handle->h_sync && !(current->flags & PF_MEMALLOC))
err = log_wait_commit(journal, tid);
@@ -1439,7 +1439,7 @@ int journal_force_commit(journal_t *journal)
* jbd_lock_bh_state(jh2bh(jh)) is held.
*/
-static inline void
+static inline void
__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
{
if (!*list) {
@@ -1454,7 +1454,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
}
}
-/*
+/*
* Remove a buffer from a transaction list, given the transaction's list
* head pointer.
*
@@ -1475,7 +1475,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
jh->b_tnext->b_tprev = jh->b_tprev;
}
-/*
+/*
* Remove a buffer from the appropriate transaction list.
*
* Note that this function can *change* the value of
@@ -1595,17 +1595,17 @@ out:
}
-/**
+/**
* int journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
* @unused_gfp_mask: unused
*
- *
+ *
* For all the buffers on this page,
* if they are fully written out ordered data, move them onto BUF_CLEAN
* so try_to_free_buffers() can reap them.
- *
+ *
* This function returns non-zero if we wish try_to_free_buffers()
* to be called. We do this if the page is releasable by try_to_free_buffers().
* We also do it if the page has locked or dirty buffers and the caller wants
@@ -1629,7 +1629,7 @@ out:
* cannot happen because we never reallocate freed data as metadata
* while the data is part of a transaction. Yes?
*/
-int journal_try_to_free_buffers(journal_t *journal,
+int journal_try_to_free_buffers(journal_t *journal,
struct page *page, gfp_t unused_gfp_mask)
{
struct buffer_head *head;
@@ -1697,7 +1697,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
}
/*
- * journal_invalidatepage
+ * journal_invalidatepage
*
* This code is tricky. It has a number of cases to deal with.
*
@@ -1705,15 +1705,15 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
*
* i_size must be updated on disk before we start calling invalidatepage on the
* data.
- *
+ *
* This is done in ext3 by defining an ext3_setattr method which
* updates i_size before truncate gets going. By maintaining this
* invariant, we can be sure that it is safe to throw away any buffers
* attached to the current transaction: once the transaction commits,
* we know that the data will not be needed.
- *
+ *
* Note however that we can *not* throw away data belonging to the
- * previous, committing transaction!
+ * previous, committing transaction!
*
* Any disk blocks which *are* part of the previous, committing
* transaction (and which therefore cannot be discarded immediately) are
@@ -1732,7 +1732,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
* don't make guarantees about the order in which data hits disk --- in
* particular we don't guarantee that new dirty data is flushed before
* transaction commit --- so it is always safe just to discard data
- * immediately in that mode. --sct
+ * immediately in that mode. --sct
*/
/*
@@ -1876,9 +1876,9 @@ zap_buffer_unlocked:
return may_free;
}
-/**
+/**
* void journal_invalidatepage()
- * @journal: journal to use for flush...
+ * @journal: journal to use for flush...
* @page: page to flush
* @offset: length of page to invalidate.
*
@@ -1886,7 +1886,7 @@ zap_buffer_unlocked:
*
*/
void journal_invalidatepage(journal_t *journal,
- struct page *page,
+ struct page *page,
unsigned long offset)
{
struct buffer_head *head, *bh, *next;
@@ -1908,7 +1908,7 @@ void journal_invalidatepage(journal_t *journal,
next = bh->b_this_page;
if (offset <= curr_off) {
- /* This block is wholly outside the truncation point */
+ /* This block is wholly outside the truncation point */
lock_buffer(bh);
may_free &= journal_unmap_buffer(journal, bh);
unlock_buffer(bh);
@@ -1924,8 +1924,8 @@ void journal_invalidatepage(journal_t *journal,
}
}
-/*
- * File a buffer on the given transaction list.
+/*
+ * File a buffer on the given transaction list.
*/
void __journal_file_buffer(struct journal_head *jh,
transaction_t *transaction, int jlist)
@@ -1948,7 +1948,7 @@ void __journal_file_buffer(struct journal_head *jh,
* with __jbd_unexpected_dirty_buffer()'s handling of dirty
* state. */
- if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+ if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
jlist == BJ_Shadow || jlist == BJ_Forget) {
if (test_clear_buffer_dirty(bh) ||
test_clear_buffer_jbddirty(bh))
@@ -2008,7 +2008,7 @@ void journal_file_buffer(struct journal_head *jh,
jbd_unlock_bh_state(jh2bh(jh));
}
-/*
+/*
* Remove a buffer from its current buffer list in preparation for
* dropping it from its current transaction entirely. If the buffer has
* already started to be used by a subsequent transaction, refile the
@@ -2060,7 +2060,7 @@ void __journal_refile_buffer(struct journal_head *jh)
* to the caller to remove the journal_head if necessary. For the
* unlocked journal_refile_buffer call, the caller isn't going to be
* doing anything else to the buffer so we need to do the cleanup
- * ourselves to avoid a jh leak.
+ * ourselves to avoid a jh leak.
*
* *** The journal_head may be freed by this call! ***
*/
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 93068697a9bf..f5cf9c93e243 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -364,12 +364,11 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode,
inode->i_ctime.tv_nsec = 0;
inode->i_mtime.tv_nsec = 0;
inode->i_atime.tv_nsec = 0;
- inode->i_blksize = PAGE_SIZE;
inode->i_blocks = (inode->i_size + 511) >> 9;
f = jffs_find_file(c, raw_inode->ino);
- inode->u.generic_ip = (void *)f;
+ inode->i_private = (void *)f;
insert_inode_hash(inode);
return inode;
@@ -442,7 +441,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry,
});
result = -ENOTDIR;
- if (!(old_dir_f = (struct jffs_file *)old_dir->u.generic_ip)) {
+ if (!(old_dir_f = old_dir->i_private)) {
D(printk("jffs_rename(): Old dir invalid.\n"));
goto jffs_rename_end;
}
@@ -456,7 +455,7 @@ jffs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* Find the new directory. */
result = -ENOTDIR;
- if (!(new_dir_f = (struct jffs_file *)new_dir->u.generic_ip)) {
+ if (!(new_dir_f = new_dir->i_private)) {
D(printk("jffs_rename(): New dir invalid.\n"));
goto jffs_rename_end;
}
@@ -593,7 +592,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
else {
ddino = ((struct jffs_file *)
- inode->u.generic_ip)->pino;
+ inode->i_private)->pino;
}
D3(printk("jffs_readdir(): \"..\" %u\n", ddino));
if (filldir(dirent, "..", 2, filp->f_pos, ddino, DT_DIR) < 0) {
@@ -604,7 +603,7 @@ jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
filp->f_pos++;
}
- f = ((struct jffs_file *)inode->u.generic_ip)->children;
+ f = ((struct jffs_file *)inode->i_private)->children;
j = 2;
while(f && (f->deleted || j++ < filp->f_pos )) {
@@ -652,7 +651,7 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
lock_kernel();
D3({
- char *s = (char *)kmalloc(len + 1, GFP_KERNEL);
+ char *s = kmalloc(len + 1, GFP_KERNEL);
memcpy(s, name, len);
s[len] = '\0';
printk("jffs_lookup(): dir: 0x%p, name: \"%s\"\n", dir, s);
@@ -668,7 +667,7 @@ jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
}
r = -EACCES;
- if (!(d = (struct jffs_file *)dir->u.generic_ip)) {
+ if (!(d = (struct jffs_file *)dir->i_private)) {
D(printk("jffs_lookup(): No such inode! (%lu)\n",
dir->i_ino));
goto jffs_lookup_end;
@@ -739,7 +738,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
unsigned long read_len;
int result;
struct inode *inode = (struct inode*)page->mapping->host;
- struct jffs_file *f = (struct jffs_file *)inode->u.generic_ip;
+ struct jffs_file *f = (struct jffs_file *)inode->i_private;
struct jffs_control *c = (struct jffs_control *)inode->i_sb->s_fs_info;
int r;
loff_t offset;
@@ -828,7 +827,7 @@ jffs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
});
lock_kernel();
- dir_f = (struct jffs_file *)dir->u.generic_ip;
+ dir_f = dir->i_private;
ASSERT(if (!dir_f) {
printk(KERN_ERR "jffs_mkdir(): No reference to a "
@@ -972,7 +971,7 @@ jffs_remove(struct inode *dir, struct dentry *dentry, int type)
kfree(_name);
});
- dir_f = (struct jffs_file *) dir->u.generic_ip;
+ dir_f = dir->i_private;
c = dir_f->c;
result = -ENOENT;
@@ -1082,7 +1081,7 @@ jffs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
if (!old_valid_dev(rdev))
return -EINVAL;
lock_kernel();
- dir_f = (struct jffs_file *)dir->u.generic_ip;
+ dir_f = dir->i_private;
c = dir_f->c;
D3(printk (KERN_NOTICE "mknod(): down biglock\n"));
@@ -1173,8 +1172,8 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
lock_kernel();
D1({
int len = dentry->d_name.len;
- char *_name = (char *)kmalloc(len + 1, GFP_KERNEL);
- char *_symname = (char *)kmalloc(symname_len + 1, GFP_KERNEL);
+ char *_name = kmalloc(len + 1, GFP_KERNEL);
+ char *_symname = kmalloc(symname_len + 1, GFP_KERNEL);
memcpy(_name, dentry->d_name.name, len);
_name[len] = '\0';
memcpy(_symname, symname, symname_len);
@@ -1186,7 +1185,7 @@ jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
kfree(_symname);
});
- dir_f = (struct jffs_file *)dir->u.generic_ip;
+ dir_f = dir->i_private;
ASSERT(if (!dir_f) {
printk(KERN_ERR "jffs_symlink(): No reference to a "
"jffs_file struct in inode.\n");
@@ -1282,14 +1281,14 @@ jffs_create(struct inode *dir, struct dentry *dentry, int mode,
lock_kernel();
D1({
int len = dentry->d_name.len;
- char *s = (char *)kmalloc(len + 1, GFP_KERNEL);
+ char *s = kmalloc(len + 1, GFP_KERNEL);
memcpy(s, dentry->d_name.name, len);
s[len] = '\0';
printk("jffs_create(): dir: 0x%p, name: \"%s\"\n", dir, s);
kfree(s);
});
- dir_f = (struct jffs_file *)dir->u.generic_ip;
+ dir_f = dir->i_private;
ASSERT(if (!dir_f) {
printk(KERN_ERR "jffs_create(): No reference to a "
"jffs_file struct in inode.\n");
@@ -1403,9 +1402,9 @@ jffs_file_write(struct file *filp, const char *buf, size_t count,
goto out_isem;
}
- if (!(f = (struct jffs_file *)inode->u.generic_ip)) {
- D(printk("jffs_file_write(): inode->u.generic_ip = 0x%p\n",
- inode->u.generic_ip));
+ if (!(f = inode->i_private)) {
+ D(printk("jffs_file_write(): inode->i_private = 0x%p\n",
+ inode->i_private));
goto out_isem;
}
@@ -1693,7 +1692,7 @@ jffs_read_inode(struct inode *inode)
mutex_unlock(&c->fmc->biglock);
return;
}
- inode->u.generic_ip = (void *)f;
+ inode->i_private = f;
inode->i_mode = f->mode;
inode->i_nlink = f->nlink;
inode->i_uid = f->uid;
@@ -1706,7 +1705,6 @@ jffs_read_inode(struct inode *inode)
inode->i_mtime.tv_nsec =
inode->i_ctime.tv_nsec = 0;
- inode->i_blksize = PAGE_SIZE;
inode->i_blocks = (inode->i_size + 511) >> 9;
if (S_ISREG(inode->i_mode)) {
inode->i_op = &jffs_file_inode_operations;
@@ -1748,7 +1746,7 @@ jffs_delete_inode(struct inode *inode)
lock_kernel();
inode->i_size = 0;
inode->i_blocks = 0;
- inode->u.generic_ip = NULL;
+ inode->i_private = NULL;
clear_inode(inode);
if (inode->i_nlink == 0) {
c = (struct jffs_control *) inode->i_sb->s_fs_info;
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 9000f1effedf..4a543e114970 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -488,13 +488,11 @@ jffs_create_file(struct jffs_control *c,
{
struct jffs_file *f;
- if (!(f = (struct jffs_file *)kmalloc(sizeof(struct jffs_file),
- GFP_KERNEL))) {
+ if (!(f = kzalloc(sizeof(*f), GFP_KERNEL))) {
D(printk("jffs_create_file(): Failed!\n"));
return NULL;
}
no_jffs_file++;
- memset(f, 0, sizeof(struct jffs_file));
f->ino = raw_inode->ino;
f->pino = raw_inode->pino;
f->nlink = raw_inode->nlink;
@@ -516,7 +514,7 @@ jffs_create_control(struct super_block *sb)
D2(printk("jffs_create_control()\n"));
- if (!(c = (struct jffs_control *)kmalloc(s, GFP_KERNEL))) {
+ if (!(c = kmalloc(s, GFP_KERNEL))) {
goto fail_control;
}
DJM(no_jffs_control++);
@@ -524,7 +522,7 @@ jffs_create_control(struct super_block *sb)
c->gc_task = NULL;
c->hash_len = JFFS_HASH_SIZE;
s = sizeof(struct list_head) * c->hash_len;
- if (!(c->hash = (struct list_head *)kmalloc(s, GFP_KERNEL))) {
+ if (!(c->hash = kmalloc(s, GFP_KERNEL))) {
goto fail_hash;
}
DJM(no_hash++);
@@ -593,8 +591,7 @@ jffs_add_virtual_root(struct jffs_control *c)
D2(printk("jffs_add_virtual_root(): "
"Creating a virtual root directory.\n"));
- if (!(root = (struct jffs_file *)kmalloc(sizeof(struct jffs_file),
- GFP_KERNEL))) {
+ if (!(root = kmalloc(sizeof(struct jffs_file), GFP_KERNEL))) {
return -ENOMEM;
}
no_jffs_file++;
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
index 7d8ca1aeace2..29b68d939bd9 100644
--- a/fs/jffs/jffs_fm.c
+++ b/fs/jffs/jffs_fm.c
@@ -94,8 +94,7 @@ jffs_build_begin(struct jffs_control *c, int unit)
struct mtd_info *mtd;
D3(printk("jffs_build_begin()\n"));
- fmc = (struct jffs_fmcontrol *)kmalloc(sizeof(struct jffs_fmcontrol),
- GFP_KERNEL);
+ fmc = kmalloc(sizeof(*fmc), GFP_KERNEL);
if (!fmc) {
D(printk("jffs_build_begin(): Allocation of "
"struct jffs_fmcontrol failed!\n"));
@@ -486,8 +485,7 @@ jffs_add_node(struct jffs_node *node)
D3(printk("jffs_add_node(): ino = %u\n", node->ino));
- ref = (struct jffs_node_ref *)kmalloc(sizeof(struct jffs_node_ref),
- GFP_KERNEL);
+ ref = kmalloc(sizeof(*ref), GFP_KERNEL);
if (!ref)
return -ENOMEM;
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 4780f82825d6..72d9909d95ff 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -263,7 +263,6 @@ void jffs2_read_inode (struct inode *inode)
inode->i_nlink = f->inocache->nlink;
- inode->i_blksize = PAGE_SIZE;
inode->i_blocks = (inode->i_size + 511) >> 9;
switch (inode->i_mode & S_IFMT) {
@@ -449,7 +448,6 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime));
- inode->i_blksize = PAGE_SIZE;
inode->i_blocks = 0;
inode->i_size = 0;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 68e3953419b4..6de374513c01 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -119,10 +119,9 @@ static int jffs2_get_sb_mtd(struct file_system_type *fs_type,
struct jffs2_sb_info *c;
int ret;
- c = kmalloc(sizeof(*c), GFP_KERNEL);
+ c = kzalloc(sizeof(*c), GFP_KERNEL);
if (!c)
return -ENOMEM;
- memset(c, 0, sizeof(*c));
c->mtd = mtd;
sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c);
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 4d52593a5fc6..4c74f0944f7e 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -468,7 +468,7 @@ int extRecord(struct inode *ip, xad_t * xp)
int extFill(struct inode *ip, xad_t * xp)
{
int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
- s64 blkno = offsetXAD(xp) >> ip->i_blksize;
+ s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
// assert(ISSPARSE(ip));
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index ccbe60aff83d..369d7f39c040 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3115,7 +3115,6 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
- ip->i_blksize = ip->i_sb->s_blocksize;
ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
ip->i_generation = le32_to_cpu(dip->di_gen);
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 495df402916d..bffaca9ae3a2 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -115,7 +115,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
}
jfs_inode->mode2 |= mode;
- inode->i_blksize = sb->s_blocksize;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
jfs_inode->otime = inode->i_ctime.tv_sec;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index e1e0a6e6ebdf..f5afc129d6b1 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -257,7 +257,7 @@ static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
int rc = 0;
int xflag;
s64 xaddr;
- sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >>
+ sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
inode->i_blkbits;
if (lblock >= file_blocks)
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index efbb586bed4b..3856efc399c1 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -282,7 +282,7 @@ int txInit(void)
TxLockVHWM = (nTxLock * 8) / 10;
size = sizeof(struct tblock) * nTxBlock;
- TxBlock = (struct tblock *) vmalloc(size);
+ TxBlock = vmalloc(size);
if (TxBlock == NULL)
return -ENOMEM;
@@ -307,7 +307,7 @@ int txInit(void)
* tlock id = 0 is reserved.
*/
size = sizeof(struct tlock) * nTxLock;
- TxLock = (struct tlock *) vmalloc(size);
+ TxLock = vmalloc(size);
if (TxLock == NULL) {
vfree(TxBlock);
return -ENOMEM;
diff --git a/fs/libfs.c b/fs/libfs.c
index ac02ea602c3d..3793aaa14577 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -317,17 +317,9 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
int simple_readpage(struct file *file, struct page *page)
{
- void *kaddr;
-
- if (PageUptodate(page))
- goto out;
-
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr, 0, PAGE_CACHE_SIZE);
- kunmap_atomic(kaddr, KM_USER0);
+ clear_highpage(page);
flush_dcache_page(page);
SetPageUptodate(page);
-out:
unlock_page(page);
return 0;
}
@@ -383,7 +375,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files
return -ENOMEM;
inode->i_mode = S_IFDIR | 0755;
inode->i_uid = inode->i_gid = 0;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_op = &simple_dir_inode_operations;
@@ -405,7 +396,6 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files
goto out;
inode->i_mode = S_IFREG | files->mode;
inode->i_uid = inode->i_gid = 0;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_fop = files->ops;
@@ -547,7 +537,7 @@ int simple_attr_open(struct inode *inode, struct file *file,
attr->get = get;
attr->set = set;
- attr->data = inode->u.generic_ip;
+ attr->data = inode->i_private;
attr->fmt = fmt;
mutex_init(&attr->mutex);
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 52774feab93f..f95cc3f3c42d 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -160,7 +160,7 @@ static void nlmclnt_prepare_reclaim(struct nlm_host *host)
*/
list_splice_init(&host->h_granted, &host->h_reclaim);
- dprintk("NLM: reclaiming locks for host %s", host->h_name);
+ dprintk("NLM: reclaiming locks for host %s\n", host->h_name);
}
static void nlmclnt_finish_reclaim(struct nlm_host *host)
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 89ba0df14c22..271e2165fff6 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -100,7 +100,7 @@ static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_
res = __nlm_find_lockowner(host, owner);
if (res == NULL) {
spin_unlock(&host->h_lock);
- new = (struct nlm_lockowner *)kmalloc(sizeof(*new), GFP_KERNEL);
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
spin_lock(&host->h_lock);
res = __nlm_find_lockowner(host, owner);
if (res == NULL && new != NULL) {
@@ -151,11 +151,13 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
int
nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
{
+ struct rpc_clnt *client = NFS_CLIENT(inode);
+ struct sockaddr_in addr;
struct nlm_host *host;
struct nlm_rqst *call;
sigset_t oldset;
unsigned long flags;
- int status, proto, vers;
+ int status, vers;
vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
if (NFS_PROTO(inode)->version > 3) {
@@ -163,10 +165,8 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
return -ENOLCK;
}
- /* Retrieve transport protocol from NFS client */
- proto = NFS_CLIENT(inode)->cl_xprt->prot;
-
- host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+ rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr));
+ host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers);
if (host == NULL)
return -ENOLCK;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 38b0e8a1aec0..a0d0b58ce7a4 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -26,7 +26,6 @@
#define NLM_HOST_REBIND (60 * HZ)
#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ)
-#define NLM_HOST_ADDR(sv) (&(sv)->s_nlmclnt->cl_xprt->addr)
static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH];
static unsigned long next_gc;
@@ -100,9 +99,9 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
/* Ooops, no host found, create it */
dprintk("lockd: creating host entry\n");
- if (!(host = (struct nlm_host *) kmalloc(sizeof(*host), GFP_KERNEL)))
+ host = kzalloc(sizeof(*host), GFP_KERNEL);
+ if (!host)
goto nohost;
- memset(host, 0, sizeof(*host));
addr = sin->sin_addr.s_addr;
sprintf(host->h_name, "%u.%u.%u.%u", NIPQUAD(addr));
@@ -167,7 +166,6 @@ struct rpc_clnt *
nlm_bind_host(struct nlm_host *host)
{
struct rpc_clnt *clnt;
- struct rpc_xprt *xprt;
dprintk("lockd: nlm_bind_host(%08x)\n",
(unsigned)ntohl(host->h_addr.sin_addr.s_addr));
@@ -179,7 +177,6 @@ nlm_bind_host(struct nlm_host *host)
* RPC rebind is required
*/
if ((clnt = host->h_rpcclnt) != NULL) {
- xprt = clnt->cl_xprt;
if (time_after_eq(jiffies, host->h_nextrebind)) {
rpc_force_rebind(clnt);
host->h_nextrebind = jiffies + NLM_HOST_REBIND;
@@ -187,31 +184,37 @@ nlm_bind_host(struct nlm_host *host)
host->h_nextrebind - jiffies);
}
} else {
- xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL);
- if (IS_ERR(xprt))
- goto forgetit;
-
- xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
- xprt->resvport = 1; /* NLM requires a reserved port */
-
- /* Existing NLM servers accept AUTH_UNIX only */
- clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
- host->h_version, RPC_AUTH_UNIX);
- if (IS_ERR(clnt))
- goto forgetit;
- clnt->cl_autobind = 1; /* turn on pmap queries */
- clnt->cl_softrtry = 1; /* All queries are soft */
-
- host->h_rpcclnt = clnt;
+ unsigned long increment = nlmsvc_timeout * HZ;
+ struct rpc_timeout timeparms = {
+ .to_initval = increment,
+ .to_increment = increment,
+ .to_maxval = increment * 6UL,
+ .to_retries = 5U,
+ };
+ struct rpc_create_args args = {
+ .protocol = host->h_proto,
+ .address = (struct sockaddr *)&host->h_addr,
+ .addrsize = sizeof(host->h_addr),
+ .timeout = &timeparms,
+ .servername = host->h_name,
+ .program = &nlm_program,
+ .version = host->h_version,
+ .authflavor = RPC_AUTH_UNIX,
+ .flags = (RPC_CLNT_CREATE_HARDRTRY |
+ RPC_CLNT_CREATE_AUTOBIND),
+ };
+
+ clnt = rpc_create(&args);
+ if (!IS_ERR(clnt))
+ host->h_rpcclnt = clnt;
+ else {
+ printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
+ clnt = NULL;
+ }
}
mutex_unlock(&host->h_mutex);
return clnt;
-
-forgetit:
- printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
- mutex_unlock(&host->h_mutex);
- return NULL;
}
/*
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3fc683f46b3e..5954dcb497e4 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -109,30 +109,23 @@ nsm_unmonitor(struct nlm_host *host)
static struct rpc_clnt *
nsm_create(void)
{
- struct rpc_xprt *xprt;
- struct rpc_clnt *clnt;
- struct sockaddr_in sin;
-
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- sin.sin_port = 0;
-
- xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL);
- if (IS_ERR(xprt))
- return (struct rpc_clnt *)xprt;
- xprt->resvport = 1; /* NSM requires a reserved port */
-
- clnt = rpc_create_client(xprt, "localhost",
- &nsm_program, SM_VERSION,
- RPC_AUTH_NULL);
- if (IS_ERR(clnt))
- goto out_err;
- clnt->cl_softrtry = 1;
- clnt->cl_oneshot = 1;
- return clnt;
-
-out_err:
- return clnt;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = 0,
+ };
+ struct rpc_create_args args = {
+ .protocol = IPPROTO_UDP,
+ .address = (struct sockaddr *)&sin,
+ .addrsize = sizeof(sin),
+ .servername = "localhost",
+ .program = &nsm_program,
+ .version = SM_VERSION,
+ .authflavor = RPC_AUTH_NULL,
+ .flags = (RPC_CLNT_CREATE_ONESHOT),
+ };
+
+ return rpc_create(&args);
}
/*
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 01b4db9e5466..a92dd98f8401 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -100,11 +100,10 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
nlm_debug_print_fh("creating file for", f);
nfserr = nlm_lck_denied_nolocks;
- file = (struct nlm_file *) kmalloc(sizeof(*file), GFP_KERNEL);
+ file = kzalloc(sizeof(*file), GFP_KERNEL);
if (!file)
goto out_unlock;
- memset(file, 0, sizeof(*file));
memcpy(&file->f_handle, f, sizeof(struct nfs_fh));
file->f_hash = hash;
init_MUTEX(&file->f_sema);
diff --git a/fs/mbcache.c b/fs/mbcache.c
index e4fde1ab22cd..0ff71256e65b 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -160,6 +160,7 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
static void
__mb_cache_entry_release_unlock(struct mb_cache_entry *ce)
+ __releases(mb_cache_spinlock)
{
/* Wake up all processes queuing for this cache entry. */
if (ce->e_queued)
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 4a6abc49418e..df6b1075b549 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -254,7 +254,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
inode->i_ino = j;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
- inode->i_blocks = inode->i_blksize = 0;
+ inode->i_blocks = 0;
memset(&minix_i(inode)->u, 0, sizeof(minix_i(inode)->u));
insert_inode_hash(inode);
mark_inode_dirty(inode);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 330ff9fc7cf0..c11a4b9fb863 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -90,8 +90,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(minix_inode_cachep))
- printk(KERN_INFO "minix_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(minix_inode_cachep);
}
static struct super_operations minix_sops = {
@@ -145,11 +144,10 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
struct inode *root_inode;
struct minix_sb_info *sbi;
- sbi = kmalloc(sizeof(struct minix_sb_info), GFP_KERNEL);
+ sbi = kzalloc(sizeof(struct minix_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
s->s_fs_info = sbi;
- memset(sbi, 0, sizeof(struct minix_sb_info));
/* N.B. These should be compile-time tests.
Unfortunately that is impossible. */
@@ -207,10 +205,9 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
goto out_illegal_sb;
i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
- map = kmalloc(i, GFP_KERNEL);
+ map = kzalloc(i, GFP_KERNEL);
if (!map)
goto out_no_map;
- memset(map, 0, i);
sbi->s_imap = &map[0];
sbi->s_zmap = &map[sbi->s_imap_blocks];
@@ -399,7 +396,7 @@ static void V1_minix_read_inode(struct inode * inode)
inode->i_mtime.tv_nsec = 0;
inode->i_atime.tv_nsec = 0;
inode->i_ctime.tv_nsec = 0;
- inode->i_blocks = inode->i_blksize = 0;
+ inode->i_blocks = 0;
for (i = 0; i < 9; i++)
minix_inode->u.i1_data[i] = raw_inode->i_zone[i];
minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0]));
@@ -432,7 +429,7 @@ static void V2_minix_read_inode(struct inode * inode)
inode->i_mtime.tv_nsec = 0;
inode->i_atime.tv_nsec = 0;
inode->i_ctime.tv_nsec = 0;
- inode->i_blocks = inode->i_blksize = 0;
+ inode->i_blocks = 0;
for (i = 0; i < 10; i++)
minix_inode->u.i2_data[i] = raw_inode->i_zone[i];
minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0]));
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 9e44158a7540..d220165d4918 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -280,7 +280,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode,
struct nameidata *nd)
{
struct super_block *sb = dir->i_sb;
- struct inode *inode;
+ struct inode *inode = NULL;
struct fat_slot_info sinfo;
struct timespec ts;
unsigned char msdos_name[MSDOS_NAME];
@@ -316,6 +316,8 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode,
d_instantiate(dentry, inode);
out:
unlock_kernel();
+ if (!err)
+ err = fat_flush_inodes(sb, dir, inode);
return err;
}
@@ -348,6 +350,8 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
fat_detach(inode);
out:
unlock_kernel();
+ if (!err)
+ err = fat_flush_inodes(inode->i_sb, dir, inode);
return err;
}
@@ -401,6 +405,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode)
d_instantiate(dentry, inode);
unlock_kernel();
+ fat_flush_inodes(sb, dir, inode);
return 0;
out_free:
@@ -430,6 +435,8 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry)
fat_detach(inode);
out:
unlock_kernel();
+ if (!err)
+ err = fat_flush_inodes(inode->i_sb, dir, inode);
return err;
}
@@ -635,6 +642,8 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dir, new_msdos_name, new_dentry, is_hid);
out:
unlock_kernel();
+ if (!err)
+ err = fat_flush_inodes(old_dir->i_sb, old_dir, new_dir);
return err;
}
diff --git a/fs/namei.c b/fs/namei.c
index 432d6bc6fab0..2892e68d3a86 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -372,6 +372,30 @@ void release_open_intent(struct nameidata *nd)
fput(nd->intent.open.file);
}
+static inline struct dentry *
+do_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ int status = dentry->d_op->d_revalidate(dentry, nd);
+ if (unlikely(status <= 0)) {
+ /*
+ * The dentry failed validation.
+ * If d_revalidate returned 0 attempt to invalidate
+ * the dentry otherwise d_revalidate is asking us
+ * to return a fail status.
+ */
+ if (!status) {
+ if (!d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
+ }
+ } else {
+ dput(dentry);
+ dentry = ERR_PTR(status);
+ }
+ }
+ return dentry;
+}
+
/*
* Internal lookup() using the new generic dcache.
* SMP-safe
@@ -386,12 +410,9 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
if (!dentry)
dentry = d_lookup(parent, name);
- if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
- if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) {
- dput(dentry);
- dentry = NULL;
- }
- }
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
+ dentry = do_revalidate(dentry, nd);
+
return dentry;
}
@@ -484,10 +505,9 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
*/
mutex_unlock(&dir->i_mutex);
if (result->d_op && result->d_op->d_revalidate) {
- if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
- dput(result);
+ result = do_revalidate(result, nd);
+ if (!result)
result = ERR_PTR(-ENOENT);
- }
}
return result;
}
@@ -498,18 +518,20 @@ static int __emul_lookup_dentry(const char *, struct nameidata *);
static __always_inline int
walk_init_root(const char *name, struct nameidata *nd)
{
- read_lock(&current->fs->lock);
- if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
- nd->mnt = mntget(current->fs->altrootmnt);
- nd->dentry = dget(current->fs->altroot);
- read_unlock(&current->fs->lock);
+ struct fs_struct *fs = current->fs;
+
+ read_lock(&fs->lock);
+ if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+ nd->mnt = mntget(fs->altrootmnt);
+ nd->dentry = dget(fs->altroot);
+ read_unlock(&fs->lock);
if (__emul_lookup_dentry(name,nd))
return 0;
- read_lock(&current->fs->lock);
+ read_lock(&fs->lock);
}
- nd->mnt = mntget(current->fs->rootmnt);
- nd->dentry = dget(current->fs->root);
- read_unlock(&current->fs->lock);
+ nd->mnt = mntget(fs->rootmnt);
+ nd->dentry = dget(fs->root);
+ read_unlock(&fs->lock);
return 1;
}
@@ -704,17 +726,19 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
static __always_inline void follow_dotdot(struct nameidata *nd)
{
+ struct fs_struct *fs = current->fs;
+
while(1) {
struct vfsmount *parent;
struct dentry *old = nd->dentry;
- read_lock(&current->fs->lock);
- if (nd->dentry == current->fs->root &&
- nd->mnt == current->fs->rootmnt) {
- read_unlock(&current->fs->lock);
+ read_lock(&fs->lock);
+ if (nd->dentry == fs->root &&
+ nd->mnt == fs->rootmnt) {
+ read_unlock(&fs->lock);
break;
}
- read_unlock(&current->fs->lock);
+ read_unlock(&fs->lock);
spin_lock(&dcache_lock);
if (nd->dentry != nd->mnt->mnt_root) {
nd->dentry = dget(nd->dentry->d_parent);
@@ -767,12 +791,12 @@ need_lookup:
goto done;
need_revalidate:
- if (dentry->d_op->d_revalidate(dentry, nd))
- goto done;
- if (d_invalidate(dentry))
- goto done;
- dput(dentry);
- goto need_lookup;
+ dentry = do_revalidate(dentry, nd);
+ if (!dentry)
+ goto need_lookup;
+ if (IS_ERR(dentry))
+ goto fail;
+ goto done;
fail:
return PTR_ERR(dentry);
@@ -1022,15 +1046,17 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
struct vfsmount *old_mnt = nd->mnt;
struct qstr last = nd->last;
int last_type = nd->last_type;
+ struct fs_struct *fs = current->fs;
+
/*
- * NAME was not found in alternate root or it's a directory. Try to find
- * it in the normal root:
+ * NAME was not found in alternate root or it's a directory.
+ * Try to find it in the normal root:
*/
nd->last_type = LAST_ROOT;
- read_lock(&current->fs->lock);
- nd->mnt = mntget(current->fs->rootmnt);
- nd->dentry = dget(current->fs->root);
- read_unlock(&current->fs->lock);
+ read_lock(&fs->lock);
+ nd->mnt = mntget(fs->rootmnt);
+ nd->dentry = dget(fs->root);
+ read_unlock(&fs->lock);
if (path_walk(name, nd) == 0) {
if (nd->dentry->d_inode) {
dput(old_dentry);
@@ -1054,6 +1080,7 @@ void set_fs_altroot(void)
struct vfsmount *mnt = NULL, *oldmnt;
struct dentry *dentry = NULL, *olddentry;
int err;
+ struct fs_struct *fs = current->fs;
if (!emul)
goto set_it;
@@ -1063,12 +1090,12 @@ void set_fs_altroot(void)
dentry = nd.dentry;
}
set_it:
- write_lock(&current->fs->lock);
- oldmnt = current->fs->altrootmnt;
- olddentry = current->fs->altroot;
- current->fs->altrootmnt = mnt;
- current->fs->altroot = dentry;
- write_unlock(&current->fs->lock);
+ write_lock(&fs->lock);
+ oldmnt = fs->altrootmnt;
+ olddentry = fs->altroot;
+ fs->altrootmnt = mnt;
+ fs->altroot = dentry;
+ write_unlock(&fs->lock);
if (olddentry) {
dput(olddentry);
mntput(oldmnt);
@@ -1082,29 +1109,30 @@ static int fastcall do_path_lookup(int dfd, const char *name,
int retval = 0;
int fput_needed;
struct file *file;
+ struct fs_struct *fs = current->fs;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags;
nd->depth = 0;
if (*name=='/') {
- read_lock(&current->fs->lock);
- if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
- nd->mnt = mntget(current->fs->altrootmnt);
- nd->dentry = dget(current->fs->altroot);
- read_unlock(&current->fs->lock);
+ read_lock(&fs->lock);
+ if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+ nd->mnt = mntget(fs->altrootmnt);
+ nd->dentry = dget(fs->altroot);
+ read_unlock(&fs->lock);
if (__emul_lookup_dentry(name,nd))
goto out; /* found in altroot */
- read_lock(&current->fs->lock);
+ read_lock(&fs->lock);
}
- nd->mnt = mntget(current->fs->rootmnt);
- nd->dentry = dget(current->fs->root);
- read_unlock(&current->fs->lock);
+ nd->mnt = mntget(fs->rootmnt);
+ nd->dentry = dget(fs->root);
+ read_unlock(&fs->lock);
} else if (dfd == AT_FDCWD) {
- read_lock(&current->fs->lock);
- nd->mnt = mntget(current->fs->pwdmnt);
- nd->dentry = dget(current->fs->pwd);
- read_unlock(&current->fs->lock);
+ read_lock(&fs->lock);
+ nd->mnt = mntget(fs->pwdmnt);
+ nd->dentry = dget(fs->pwd);
+ read_unlock(&fs->lock);
} else {
struct dentry *dentry;
@@ -2370,7 +2398,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
dput(new_dentry);
}
if (!error)
- d_move(old_dentry,new_dentry);
+ if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
+ d_move(old_dentry,new_dentry);
return error;
}
@@ -2393,8 +2422,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
else
error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
if (!error) {
- /* The following d_move() should become unconditional */
- if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
+ if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
d_move(old_dentry, new_dentry);
}
if (target)
diff --git a/fs/namespace.c b/fs/namespace.c
index fa7ed6a9fc2d..6ede3a539ed8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -13,10 +13,12 @@
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/quotaops.h>
#include <linux/acct.h>
#include <linux/capability.h>
#include <linux/module.h>
+#include <linux/sysfs.h>
#include <linux/seq_file.h>
#include <linux/namespace.h>
#include <linux/namei.h>
@@ -28,15 +30,6 @@
extern int __init init_rootfs(void);
-#ifdef CONFIG_SYSFS
-extern int __init sysfs_init(void);
-#else
-static inline int sysfs_init(void)
-{
- return 0;
-}
-#endif
-
/* spinlock for vfsmount related operations, inplace of dcache_lock */
__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
@@ -1821,6 +1814,7 @@ void __init mnt_init(unsigned long mempages)
struct list_head *d;
unsigned int nr_hash;
int i;
+ int err;
init_rwsem(&namespace_sem);
@@ -1861,8 +1855,14 @@ void __init mnt_init(unsigned long mempages)
d++;
i--;
} while (i);
- sysfs_init();
- subsystem_register(&fs_subsys);
+ err = sysfs_init();
+ if (err)
+ printk(KERN_WARNING "%s: sysfs_init error: %d\n",
+ __FUNCTION__, err);
+ err = subsystem_register(&fs_subsys);
+ if (err)
+ printk(KERN_WARNING "%s: subsystem_register error: %d\n",
+ __FUNCTION__, err);
init_rootfs();
init_mount_tree();
}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 1ddf77b0b825..42e3bef270c9 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -81,8 +81,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(ncp_inode_cachep))
- printk(KERN_INFO "ncp_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(ncp_inode_cachep);
}
static int ncp_remount(struct super_block *sb, int *flags, char* data)
@@ -224,7 +223,6 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
inode->i_nlink = 1;
inode->i_uid = server->m.uid;
inode->i_gid = server->m.gid;
- inode->i_blksize = NCP_BLOCK_SIZE;
ncp_update_dates(inode, &nwinfo->i);
ncp_update_inode(inode, nwinfo);
@@ -411,11 +409,10 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
#endif
struct ncp_entry_info finfo;
- server = kmalloc(sizeof(struct ncp_server), GFP_KERNEL);
+ server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL);
if (!server)
return -ENOMEM;
sb->s_fs_info = server;
- memset(server, 0, sizeof(struct ncp_server));
error = -EFAULT;
if (raw_data == NULL)
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index ca92c2406635..e3d26c1bd105 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -48,7 +48,7 @@ static int ncp_symlink_readpage(struct file *file, struct page *page)
char *buf = kmap(page);
error = -ENOMEM;
- rawlink=(char *)kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL);
+ rawlink = kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL);
if (!rawlink)
goto fail;
@@ -126,7 +126,7 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
/* EPERM is returned by VFS if symlink procedure does not exist */
return -EPERM;
- rawlink=(char *)kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL);
+ rawlink = kmalloc(NCP_MAX_SYMLINK_SIZE, GFP_KERNEL);
if (!rawlink)
return -ENOMEM;
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 0b572a0c1967..f4580b44eef4 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,9 +4,9 @@
obj-$(CONFIG_NFS_FS) += nfs.o
-nfs-y := dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \
- proc.o read.o symlink.o unlink.o write.o \
- namespace.o
+nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
+ pagelist.o proc.o read.o symlink.o unlink.o \
+ write.o namespace.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fe0a6b8ac149..a3ee11364db0 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -19,6 +19,7 @@
#include "nfs4_fs.h"
#include "callback.h"
+#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -36,6 +37,21 @@ static struct svc_program nfs4_callback_program;
unsigned int nfs_callback_set_tcpport;
unsigned short nfs_callback_tcpport;
+static const int nfs_set_port_min = 0;
+static const int nfs_set_port_max = 65535;
+
+static int param_set_port(const char *val, struct kernel_param *kp)
+{
+ char *endp;
+ int num = simple_strtol(val, &endp, 0);
+ if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
+ return -EINVAL;
+ *((int *)kp->arg) = num;
+ return 0;
+}
+
+module_param_call(callback_tcpport, param_set_port, param_get_int,
+ &nfs_callback_set_tcpport, 0644);
/*
* This is the callback kernel thread.
@@ -134,10 +150,8 @@ out_err:
/*
* Kill the server process if it is not already up.
*/
-int nfs_callback_down(void)
+void nfs_callback_down(void)
{
- int ret = 0;
-
lock_kernel();
mutex_lock(&nfs_callback_mutex);
nfs_callback_info.users--;
@@ -149,20 +163,19 @@ int nfs_callback_down(void)
} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
mutex_unlock(&nfs_callback_mutex);
unlock_kernel();
- return ret;
}
static int nfs_callback_authenticate(struct svc_rqst *rqstp)
{
- struct in_addr *addr = &rqstp->rq_addr.sin_addr;
- struct nfs4_client *clp;
+ struct sockaddr_in *addr = &rqstp->rq_addr;
+ struct nfs_client *clp;
/* Don't talk to strangers */
- clp = nfs4_find_client(addr);
+ clp = nfs_find_client(addr, 4);
if (clp == NULL)
return SVC_DROP;
- dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
- nfs4_put_client(clp);
+ dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr));
+ nfs_put_client(clp);
switch (rqstp->rq_authop->flavour) {
case RPC_AUTH_NULL:
if (rqstp->rq_proc != CB_NULL)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b252e7fe53a5..5676163d26e8 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -62,8 +62,13 @@ struct cb_recallargs {
extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
+#ifdef CONFIG_NFS_V4
extern int nfs_callback_up(void);
-extern int nfs_callback_down(void);
+extern void nfs_callback_down(void);
+#else
+#define nfs_callback_up() (0)
+#define nfs_callback_down() do {} while(0)
+#endif
extern unsigned int nfs_callback_set_tcpport;
extern unsigned short nfs_callback_tcpport;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 7719483ecdfc..97cf8f71451f 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -10,19 +10,20 @@
#include "nfs4_fs.h"
#include "callback.h"
#include "delegation.h"
+#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_CALLBACK
unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
{
- struct nfs4_client *clp;
+ struct nfs_client *clp;
struct nfs_delegation *delegation;
struct nfs_inode *nfsi;
struct inode *inode;
res->bitmap[0] = res->bitmap[1] = 0;
res->status = htonl(NFS4ERR_BADHANDLE);
- clp = nfs4_find_client(&args->addr->sin_addr);
+ clp = nfs_find_client(args->addr, 4);
if (clp == NULL)
goto out;
inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -48,7 +49,7 @@ out_iput:
up_read(&nfsi->rwsem);
iput(inode);
out_putclient:
- nfs4_put_client(clp);
+ nfs_put_client(clp);
out:
dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
return res->status;
@@ -56,12 +57,12 @@ out:
unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
{
- struct nfs4_client *clp;
+ struct nfs_client *clp;
struct inode *inode;
unsigned res;
res = htonl(NFS4ERR_BADHANDLE);
- clp = nfs4_find_client(&args->addr->sin_addr);
+ clp = nfs_find_client(args->addr, 4);
if (clp == NULL)
goto out;
inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -80,7 +81,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
}
iput(inode);
out_putclient:
- nfs4_put_client(clp);
+ nfs_put_client(clp);
out:
dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
return res;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
new file mode 100644
index 000000000000..ec1938d4b814
--- /dev/null
+++ b/fs/nfs/client.c
@@ -0,0 +1,1448 @@
+/* client.c: NFS client sharing and management code
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/inet.h>
+#include <linux/nfs_xdr.h>
+
+#include <asm/system.h>
+
+#include "nfs4_fs.h"
+#include "callback.h"
+#include "delegation.h"
+#include "iostat.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY NFSDBG_CLIENT
+
+static DEFINE_SPINLOCK(nfs_client_lock);
+static LIST_HEAD(nfs_client_list);
+static LIST_HEAD(nfs_volume_list);
+static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
+
+/*
+ * RPC cruft for NFS
+ */
+static struct rpc_version *nfs_version[5] = {
+ [2] = &nfs_version2,
+#ifdef CONFIG_NFS_V3
+ [3] = &nfs_version3,
+#endif
+#ifdef CONFIG_NFS_V4
+ [4] = &nfs_version4,
+#endif
+};
+
+struct rpc_program nfs_program = {
+ .name = "nfs",
+ .number = NFS_PROGRAM,
+ .nrvers = ARRAY_SIZE(nfs_version),
+ .version = nfs_version,
+ .stats = &nfs_rpcstat,
+ .pipe_dir_name = "/nfs",
+};
+
+struct rpc_stat nfs_rpcstat = {
+ .program = &nfs_program
+};
+
+
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
+static struct rpc_version * nfsacl_version[] = {
+ [3] = &nfsacl_version3,
+};
+
+struct rpc_program nfsacl_program = {
+ .name = "nfsacl",
+ .number = NFS_ACL_PROGRAM,
+ .nrvers = ARRAY_SIZE(nfsacl_version),
+ .version = nfsacl_version,
+ .stats = &nfsacl_rpcstat,
+};
+#endif /* CONFIG_NFS_V3_ACL */
+
+/*
+ * Allocate a shared client record
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+static struct nfs_client *nfs_alloc_client(const char *hostname,
+ const struct sockaddr_in *addr,
+ int nfsversion)
+{
+ struct nfs_client *clp;
+ int error;
+
+ if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
+ goto error_0;
+
+ error = rpciod_up();
+ if (error < 0) {
+ dprintk("%s: couldn't start rpciod! Error = %d\n",
+ __FUNCTION__, error);
+ goto error_1;
+ }
+ __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+
+ if (nfsversion == 4) {
+ if (nfs_callback_up() < 0)
+ goto error_2;
+ __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+ }
+
+ atomic_set(&clp->cl_count, 1);
+ clp->cl_cons_state = NFS_CS_INITING;
+
+ clp->cl_nfsversion = nfsversion;
+ memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+
+ if (hostname) {
+ clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+ if (!clp->cl_hostname)
+ goto error_3;
+ }
+
+ INIT_LIST_HEAD(&clp->cl_superblocks);
+ clp->cl_rpcclient = ERR_PTR(-EINVAL);
+
+#ifdef CONFIG_NFS_V4
+ init_rwsem(&clp->cl_sem);
+ INIT_LIST_HEAD(&clp->cl_delegations);
+ INIT_LIST_HEAD(&clp->cl_state_owners);
+ INIT_LIST_HEAD(&clp->cl_unused);
+ spin_lock_init(&clp->cl_lock);
+ INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+ rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
+ clp->cl_boot_time = CURRENT_TIME;
+ clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+#endif
+
+ return clp;
+
+error_3:
+ if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+ nfs_callback_down();
+error_2:
+ rpciod_down();
+ __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+error_1:
+ kfree(clp);
+error_0:
+ return NULL;
+}
+
+static void nfs4_shutdown_client(struct nfs_client *clp)
+{
+#ifdef CONFIG_NFS_V4
+ if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
+ nfs4_kill_renewd(clp);
+ while (!list_empty(&clp->cl_unused)) {
+ struct nfs4_state_owner *sp;
+
+ sp = list_entry(clp->cl_unused.next,
+ struct nfs4_state_owner,
+ so_list);
+ list_del(&sp->so_list);
+ kfree(sp);
+ }
+ BUG_ON(!list_empty(&clp->cl_state_owners));
+ if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
+ nfs_idmap_delete(clp);
+#endif
+}
+
+/*
+ * Destroy a shared client record
+ */
+static void nfs_free_client(struct nfs_client *clp)
+{
+ dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
+
+ nfs4_shutdown_client(clp);
+
+ /* -EIO all pending I/O */
+ if (!IS_ERR(clp->cl_rpcclient))
+ rpc_shutdown_client(clp->cl_rpcclient);
+
+ if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+ nfs_callback_down();
+
+ if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
+ rpciod_down();
+
+ kfree(clp->cl_hostname);
+ kfree(clp);
+
+ dprintk("<-- nfs_free_client()\n");
+}
+
+/*
+ * Release a reference to a shared client record
+ */
+void nfs_put_client(struct nfs_client *clp)
+{
+ if (!clp)
+ return;
+
+ dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
+
+ if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
+ list_del(&clp->cl_share_link);
+ spin_unlock(&nfs_client_lock);
+
+ BUG_ON(!list_empty(&clp->cl_superblocks));
+
+ nfs_free_client(clp);
+ }
+}
+
+/*
+ * Find a client by address
+ * - caller must hold nfs_client_lock
+ */
+static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+ struct nfs_client *clp;
+
+ list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ /* Different NFS versions cannot share the same nfs_client */
+ if (clp->cl_nfsversion != nfsversion)
+ continue;
+
+ if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
+ sizeof(clp->cl_addr.sin_addr)) != 0)
+ continue;
+
+ if (clp->cl_addr.sin_port == addr->sin_port)
+ goto found;
+ }
+
+ return NULL;
+
+found:
+ atomic_inc(&clp->cl_count);
+ return clp;
+}
+
+/*
+ * Find a client by IP address and protocol version
+ * - returns NULL if no such client
+ */
+struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+ struct nfs_client *clp;
+
+ spin_lock(&nfs_client_lock);
+ clp = __nfs_find_client(addr, nfsversion);
+ spin_unlock(&nfs_client_lock);
+
+ BUG_ON(clp && clp->cl_cons_state == 0);
+
+ return clp;
+}
+
+/*
+ * Look up a client by IP address and protocol version
+ * - creates a new record if one doesn't yet exist
+ */
+static struct nfs_client *nfs_get_client(const char *hostname,
+ const struct sockaddr_in *addr,
+ int nfsversion)
+{
+ struct nfs_client *clp, *new = NULL;
+ int error;
+
+ dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
+ hostname ?: "", NIPQUAD(addr->sin_addr),
+ addr->sin_port, nfsversion);
+
+ /* see if the client already exists */
+ do {
+ spin_lock(&nfs_client_lock);
+
+ clp = __nfs_find_client(addr, nfsversion);
+ if (clp)
+ goto found_client;
+ if (new)
+ goto install_client;
+
+ spin_unlock(&nfs_client_lock);
+
+ new = nfs_alloc_client(hostname, addr, nfsversion);
+ } while (new);
+
+ return ERR_PTR(-ENOMEM);
+
+ /* install a new client and return with it unready */
+install_client:
+ clp = new;
+ list_add(&clp->cl_share_link, &nfs_client_list);
+ spin_unlock(&nfs_client_lock);
+ dprintk("--> nfs_get_client() = %p [new]\n", clp);
+ return clp;
+
+ /* found an existing client
+ * - make sure it's ready before returning
+ */
+found_client:
+ spin_unlock(&nfs_client_lock);
+
+ if (new)
+ nfs_free_client(new);
+
+ if (clp->cl_cons_state == NFS_CS_INITING) {
+ DECLARE_WAITQUEUE(myself, current);
+
+ add_wait_queue(&nfs_client_active_wq, &myself);
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (signal_pending(current) ||
+ clp->cl_cons_state > NFS_CS_READY)
+ break;
+ schedule();
+ }
+
+ remove_wait_queue(&nfs_client_active_wq, &myself);
+
+ if (signal_pending(current)) {
+ nfs_put_client(clp);
+ return ERR_PTR(-ERESTARTSYS);
+ }
+ }
+
+ if (clp->cl_cons_state < NFS_CS_READY) {
+ error = clp->cl_cons_state;
+ nfs_put_client(clp);
+ return ERR_PTR(error);
+ }
+
+ BUG_ON(clp->cl_cons_state != NFS_CS_READY);
+
+ dprintk("--> nfs_get_client() = %p [share]\n", clp);
+ return clp;
+}
+
+/*
+ * Mark a server as ready or failed
+ */
+static void nfs_mark_client_ready(struct nfs_client *clp, int state)
+{
+ clp->cl_cons_state = state;
+ wake_up_all(&nfs_client_active_wq);
+}
+
+/*
+ * Initialise the timeout values for a connection
+ */
+static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
+ unsigned int timeo, unsigned int retrans)
+{
+ to->to_initval = timeo * HZ / 10;
+ to->to_retries = retrans;
+ if (!to->to_retries)
+ to->to_retries = 2;
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ if (!to->to_initval)
+ to->to_initval = 60 * HZ;
+ if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
+ to->to_initval = NFS_MAX_TCP_TIMEOUT;
+ to->to_increment = to->to_initval;
+ to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
+ to->to_exponential = 0;
+ break;
+ case IPPROTO_UDP:
+ default:
+ if (!to->to_initval)
+ to->to_initval = 11 * HZ / 10;
+ if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
+ to->to_initval = NFS_MAX_UDP_TIMEOUT;
+ to->to_maxval = NFS_MAX_UDP_TIMEOUT;
+ to->to_exponential = 1;
+ break;
+ }
+}
+
+/*
+ * Create an RPC client handle
+ */
+static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
+ unsigned int timeo,
+ unsigned int retrans,
+ rpc_authflavor_t flavor)
+{
+ struct rpc_timeout timeparms;
+ struct rpc_clnt *clnt = NULL;
+ struct rpc_create_args args = {
+ .protocol = proto,
+ .address = (struct sockaddr *)&clp->cl_addr,
+ .addrsize = sizeof(clp->cl_addr),
+ .timeout = &timeparms,
+ .servername = clp->cl_hostname,
+ .program = &nfs_program,
+ .version = clp->rpc_ops->version,
+ .authflavor = flavor,
+ };
+
+ if (!IS_ERR(clp->cl_rpcclient))
+ return 0;
+
+ nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
+ clp->retrans_timeo = timeparms.to_initval;
+ clp->retrans_count = timeparms.to_retries;
+
+ clnt = rpc_create(&args);
+ if (IS_ERR(clnt)) {
+ dprintk("%s: cannot create RPC client. Error = %ld\n",
+ __FUNCTION__, PTR_ERR(clnt));
+ return PTR_ERR(clnt);
+ }
+
+ clp->cl_rpcclient = clnt;
+ return 0;
+}
+
+/*
+ * Version 2 or 3 client destruction
+ */
+static void nfs_destroy_server(struct nfs_server *server)
+{
+ if (!IS_ERR(server->client_acl))
+ rpc_shutdown_client(server->client_acl);
+
+ if (!(server->flags & NFS_MOUNT_NONLM))
+ lockd_down(); /* release rpc.lockd */
+}
+
+/*
+ * Version 2 or 3 lockd setup
+ */
+static int nfs_start_lockd(struct nfs_server *server)
+{
+ int error = 0;
+
+ if (server->nfs_client->cl_nfsversion > 3)
+ goto out;
+ if (server->flags & NFS_MOUNT_NONLM)
+ goto out;
+ error = lockd_up();
+ if (error < 0)
+ server->flags |= NFS_MOUNT_NONLM;
+ else
+ server->destroy = nfs_destroy_server;
+out:
+ return error;
+}
+
+/*
+ * Initialise an NFSv3 ACL client connection
+ */
+#ifdef CONFIG_NFS_V3_ACL
+static void nfs_init_server_aclclient(struct nfs_server *server)
+{
+ if (server->nfs_client->cl_nfsversion != 3)
+ goto out_noacl;
+ if (server->flags & NFS_MOUNT_NOACL)
+ goto out_noacl;
+
+ server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
+ if (IS_ERR(server->client_acl))
+ goto out_noacl;
+
+ /* No errors! Assume that Sun nfsacls are supported */
+ server->caps |= NFS_CAP_ACLS;
+ return;
+
+out_noacl:
+ server->caps &= ~NFS_CAP_ACLS;
+}
+#else
+static inline void nfs_init_server_aclclient(struct nfs_server *server)
+{
+ server->flags &= ~NFS_MOUNT_NOACL;
+ server->caps &= ~NFS_CAP_ACLS;
+}
+#endif
+
+/*
+ * Create a general RPC client
+ */
+static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ server->client = rpc_clone_client(clp->cl_rpcclient);
+ if (IS_ERR(server->client)) {
+ dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
+ return PTR_ERR(server->client);
+ }
+
+ if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
+ struct rpc_auth *auth;
+
+ auth = rpcauth_create(pseudoflavour, server->client);
+ if (IS_ERR(auth)) {
+ dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
+ return PTR_ERR(auth);
+ }
+ }
+ server->client->cl_softrtry = 0;
+ if (server->flags & NFS_MOUNT_SOFT)
+ server->client->cl_softrtry = 1;
+
+ server->client->cl_intr = 0;
+ if (server->flags & NFS4_MOUNT_INTR)
+ server->client->cl_intr = 1;
+
+ return 0;
+}
+
+/*
+ * Initialise an NFS2 or NFS3 client
+ */
+static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data)
+{
+ int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+ int error;
+
+ if (clp->cl_cons_state == NFS_CS_READY) {
+ /* the client is already initialised */
+ dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
+ return 0;
+ }
+
+ /* Check NFS protocol revision and initialize RPC op vector */
+ clp->rpc_ops = &nfs_v2_clientops;
+#ifdef CONFIG_NFS_V3
+ if (clp->cl_nfsversion == 3)
+ clp->rpc_ops = &nfs_v3_clientops;
+#endif
+ /*
+ * Create a client RPC handle for doing FSSTAT with UNIX auth only
+ * - RFC 2623, sec 2.3.2
+ */
+ error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
+ RPC_AUTH_UNIX);
+ if (error < 0)
+ goto error;
+ nfs_mark_client_ready(clp, NFS_CS_READY);
+ return 0;
+
+error:
+ nfs_mark_client_ready(clp, error);
+ dprintk("<-- nfs_init_client() = xerror %d\n", error);
+ return error;
+}
+
+/*
+ * Create a version 2 or 3 client
+ */
+static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data)
+{
+ struct nfs_client *clp;
+ int error, nfsvers = 2;
+
+ dprintk("--> nfs_init_server()\n");
+
+#ifdef CONFIG_NFS_V3
+ if (data->flags & NFS_MOUNT_VER3)
+ nfsvers = 3;
+#endif
+
+ /* Allocate or find a client reference we can use */
+ clp = nfs_get_client(data->hostname, &data->addr, nfsvers);
+ if (IS_ERR(clp)) {
+ dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
+ return PTR_ERR(clp);
+ }
+
+ error = nfs_init_client(clp, data);
+ if (error < 0)
+ goto error;
+
+ server->nfs_client = clp;
+
+ /* Initialise the client representation from the mount data */
+ server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+
+ if (data->rsize)
+ server->rsize = nfs_block_size(data->rsize, NULL);
+ if (data->wsize)
+ server->wsize = nfs_block_size(data->wsize, NULL);
+
+ server->acregmin = data->acregmin * HZ;
+ server->acregmax = data->acregmax * HZ;
+ server->acdirmin = data->acdirmin * HZ;
+ server->acdirmax = data->acdirmax * HZ;
+
+ /* Start lockd here, before we might error out */
+ error = nfs_start_lockd(server);
+ if (error < 0)
+ goto error;
+
+ error = nfs_init_server_rpcclient(server, data->pseudoflavor);
+ if (error < 0)
+ goto error;
+
+ server->namelen = data->namlen;
+ /* Create a client RPC handle for the NFSv3 ACL management interface */
+ nfs_init_server_aclclient(server);
+ if (clp->cl_nfsversion == 3) {
+ if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+ server->namelen = NFS3_MAXNAMLEN;
+ server->caps |= NFS_CAP_READDIRPLUS;
+ } else {
+ if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+ server->namelen = NFS2_MAXNAMLEN;
+ }
+
+ dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
+ return 0;
+
+error:
+ server->nfs_client = NULL;
+ nfs_put_client(clp);
+ dprintk("<-- nfs_init_server() = xerror %d\n", error);
+ return error;
+}
+
+/*
+ * Load up the server record from information gained in an fsinfo record
+ */
+static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
+{
+ unsigned long max_rpc_payload;
+
+ /* Work out a lot of parameters */
+ if (server->rsize == 0)
+ server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
+ if (server->wsize == 0)
+ server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
+
+ if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
+ server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
+ if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
+ server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
+
+ max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
+ if (server->rsize > max_rpc_payload)
+ server->rsize = max_rpc_payload;
+ if (server->rsize > NFS_MAX_FILE_IO_SIZE)
+ server->rsize = NFS_MAX_FILE_IO_SIZE;
+ server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+
+ if (server->wsize > max_rpc_payload)
+ server->wsize = max_rpc_payload;
+ if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+ server->wsize = NFS_MAX_FILE_IO_SIZE;
+ server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+
+ server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+ if (server->dtsize > PAGE_CACHE_SIZE)
+ server->dtsize = PAGE_CACHE_SIZE;
+ if (server->dtsize > server->rsize)
+ server->dtsize = server->rsize;
+
+ if (server->flags & NFS_MOUNT_NOAC) {
+ server->acregmin = server->acregmax = 0;
+ server->acdirmin = server->acdirmax = 0;
+ }
+
+ server->maxfilesize = fsinfo->maxfilesize;
+
+ /* We're airborne Set socket buffersize */
+ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+}
+
+/*
+ * Probe filesystem information, including the FSID on v2/v3
+ */
+static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr)
+{
+ struct nfs_fsinfo fsinfo;
+ struct nfs_client *clp = server->nfs_client;
+ int error;
+
+ dprintk("--> nfs_probe_fsinfo()\n");
+
+ if (clp->rpc_ops->set_capabilities != NULL) {
+ error = clp->rpc_ops->set_capabilities(server, mntfh);
+ if (error < 0)
+ goto out_error;
+ }
+
+ fsinfo.fattr = fattr;
+ nfs_fattr_init(fattr);
+ error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
+ if (error < 0)
+ goto out_error;
+
+ nfs_server_set_fsinfo(server, &fsinfo);
+
+ /* Get some general file system info */
+ if (server->namelen == 0) {
+ struct nfs_pathconf pathinfo;
+
+ pathinfo.fattr = fattr;
+ nfs_fattr_init(fattr);
+
+ if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0)
+ server->namelen = pathinfo.max_namelen;
+ }
+
+ dprintk("<-- nfs_probe_fsinfo() = 0\n");
+ return 0;
+
+out_error:
+ dprintk("nfs_probe_fsinfo: error = %d\n", -error);
+ return error;
+}
+
+/*
+ * Copy useful information when duplicating a server record
+ */
+static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
+{
+ target->flags = source->flags;
+ target->acregmin = source->acregmin;
+ target->acregmax = source->acregmax;
+ target->acdirmin = source->acdirmin;
+ target->acdirmax = source->acdirmax;
+ target->caps = source->caps;
+}
+
+/*
+ * Allocate and initialise a server record
+ */
+static struct nfs_server *nfs_alloc_server(void)
+{
+ struct nfs_server *server;
+
+ server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
+ if (!server)
+ return NULL;
+
+ server->client = server->client_acl = ERR_PTR(-EINVAL);
+
+ /* Zero out the NFS state stuff */
+ INIT_LIST_HEAD(&server->client_link);
+ INIT_LIST_HEAD(&server->master_link);
+
+ server->io_stats = nfs_alloc_iostats();
+ if (!server->io_stats) {
+ kfree(server);
+ return NULL;
+ }
+
+ return server;
+}
+
+/*
+ * Free up a server record
+ */
+void nfs_free_server(struct nfs_server *server)
+{
+ dprintk("--> nfs_free_server()\n");
+
+ spin_lock(&nfs_client_lock);
+ list_del(&server->client_link);
+ list_del(&server->master_link);
+ spin_unlock(&nfs_client_lock);
+
+ if (server->destroy != NULL)
+ server->destroy(server);
+ if (!IS_ERR(server->client))
+ rpc_shutdown_client(server->client);
+
+ nfs_put_client(server->nfs_client);
+
+ nfs_free_iostats(server->io_stats);
+ kfree(server);
+ nfs_release_automount_timer();
+ dprintk("<-- nfs_free_server()\n");
+}
+
+/*
+ * Create a version 2 or 3 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
+ struct nfs_fh *mntfh)
+{
+ struct nfs_server *server;
+ struct nfs_fattr fattr;
+ int error;
+
+ server = nfs_alloc_server();
+ if (!server)
+ return ERR_PTR(-ENOMEM);
+
+ /* Get a client representation */
+ error = nfs_init_server(server, data);
+ if (error < 0)
+ goto error;
+
+ BUG_ON(!server->nfs_client);
+ BUG_ON(!server->nfs_client->rpc_ops);
+ BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+ /* Probe the root fh to retrieve its FSID */
+ error = nfs_probe_fsinfo(server, mntfh, &fattr);
+ if (error < 0)
+ goto error;
+ if (!(fattr.valid & NFS_ATTR_FATTR)) {
+ error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+ if (error < 0) {
+ dprintk("nfs_create_server: getattr error = %d\n", -error);
+ goto error;
+ }
+ }
+ memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+
+ dprintk("Server FSID: %llx:%llx\n",
+ (unsigned long long) server->fsid.major,
+ (unsigned long long) server->fsid.minor);
+
+ BUG_ON(!server->nfs_client);
+ BUG_ON(!server->nfs_client->rpc_ops);
+ BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+ spin_lock(&nfs_client_lock);
+ list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+ list_add_tail(&server->master_link, &nfs_volume_list);
+ spin_unlock(&nfs_client_lock);
+
+ server->mount_time = jiffies;
+ return server;
+
+error:
+ nfs_free_server(server);
+ return ERR_PTR(error);
+}
+
+#ifdef CONFIG_NFS_V4
+/*
+ * Initialise an NFS4 client record
+ */
+static int nfs4_init_client(struct nfs_client *clp,
+ int proto, int timeo, int retrans,
+ rpc_authflavor_t authflavour)
+{
+ int error;
+
+ if (clp->cl_cons_state == NFS_CS_READY) {
+ /* the client is initialised already */
+ dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
+ return 0;
+ }
+
+ /* Check NFS protocol revision and initialize RPC op vector */
+ clp->rpc_ops = &nfs_v4_clientops;
+
+ error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour);
+ if (error < 0)
+ goto error;
+
+ error = nfs_idmap_new(clp);
+ if (error < 0) {
+ dprintk("%s: failed to create idmapper. Error = %d\n",
+ __FUNCTION__, error);
+ goto error;
+ }
+ __set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
+
+ nfs_mark_client_ready(clp, NFS_CS_READY);
+ return 0;
+
+error:
+ nfs_mark_client_ready(clp, error);
+ dprintk("<-- nfs4_init_client() = xerror %d\n", error);
+ return error;
+}
+
+/*
+ * Set up an NFS4 client
+ */
+static int nfs4_set_client(struct nfs_server *server,
+ const char *hostname, const struct sockaddr_in *addr,
+ rpc_authflavor_t authflavour,
+ int proto, int timeo, int retrans)
+{
+ struct nfs_client *clp;
+ int error;
+
+ dprintk("--> nfs4_set_client()\n");
+
+ /* Allocate or find a client reference we can use */
+ clp = nfs_get_client(hostname, addr, 4);
+ if (IS_ERR(clp)) {
+ error = PTR_ERR(clp);
+ goto error;
+ }
+ error = nfs4_init_client(clp, proto, timeo, retrans, authflavour);
+ if (error < 0)
+ goto error_put;
+
+ server->nfs_client = clp;
+ dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
+ return 0;
+
+error_put:
+ nfs_put_client(clp);
+error:
+ dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+ return error;
+}
+
+/*
+ * Create a version 4 volume record
+ */
+static int nfs4_init_server(struct nfs_server *server,
+ const struct nfs4_mount_data *data, rpc_authflavor_t authflavour)
+{
+ int error;
+
+ dprintk("--> nfs4_init_server()\n");
+
+ /* Initialise the client representation from the mount data */
+ server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+ server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+ if (data->rsize)
+ server->rsize = nfs_block_size(data->rsize, NULL);
+ if (data->wsize)
+ server->wsize = nfs_block_size(data->wsize, NULL);
+
+ server->acregmin = data->acregmin * HZ;
+ server->acregmax = data->acregmax * HZ;
+ server->acdirmin = data->acdirmin * HZ;
+ server->acdirmax = data->acdirmax * HZ;
+
+ error = nfs_init_server_rpcclient(server, authflavour);
+
+ /* Done */
+ dprintk("<-- nfs4_init_server() = %d\n", error);
+ return error;
+}
+
+/*
+ * Create a version 4 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
+ const char *hostname,
+ const struct sockaddr_in *addr,
+ const char *mntpath,
+ const char *ip_addr,
+ rpc_authflavor_t authflavour,
+ struct nfs_fh *mntfh)
+{
+ struct nfs_fattr fattr;
+ struct nfs_server *server;
+ int error;
+
+ dprintk("--> nfs4_create_server()\n");
+
+ server = nfs_alloc_server();
+ if (!server)
+ return ERR_PTR(-ENOMEM);
+
+ /* Get a client record */
+ error = nfs4_set_client(server, hostname, addr, authflavour,
+ data->proto, data->timeo, data->retrans);
+ if (error < 0)
+ goto error;
+
+ /* set up the general RPC client */
+ error = nfs4_init_server(server, data, authflavour);
+ if (error < 0)
+ goto error;
+
+ BUG_ON(!server->nfs_client);
+ BUG_ON(!server->nfs_client->rpc_ops);
+ BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+ /* Probe the root fh to retrieve its FSID */
+ error = nfs4_path_walk(server, mntfh, mntpath);
+ if (error < 0)
+ goto error;
+
+ dprintk("Server FSID: %llx:%llx\n",
+ (unsigned long long) server->fsid.major,
+ (unsigned long long) server->fsid.minor);
+ dprintk("Mount FH: %d\n", mntfh->size);
+
+ error = nfs_probe_fsinfo(server, mntfh, &fattr);
+ if (error < 0)
+ goto error;
+
+ BUG_ON(!server->nfs_client);
+ BUG_ON(!server->nfs_client->rpc_ops);
+ BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+ spin_lock(&nfs_client_lock);
+ list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+ list_add_tail(&server->master_link, &nfs_volume_list);
+ spin_unlock(&nfs_client_lock);
+
+ server->mount_time = jiffies;
+ dprintk("<-- nfs4_create_server() = %p\n", server);
+ return server;
+
+error:
+ nfs_free_server(server);
+ dprintk("<-- nfs4_create_server() = error %d\n", error);
+ return ERR_PTR(error);
+}
+
+/*
+ * Create an NFS4 referral server record
+ */
+struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
+ struct nfs_fh *fh)
+{
+ struct nfs_client *parent_client;
+ struct nfs_server *server, *parent_server;
+ struct nfs_fattr fattr;
+ int error;
+
+ dprintk("--> nfs4_create_referral_server()\n");
+
+ server = nfs_alloc_server();
+ if (!server)
+ return ERR_PTR(-ENOMEM);
+
+ parent_server = NFS_SB(data->sb);
+ parent_client = parent_server->nfs_client;
+
+ /* Get a client representation.
+ * Note: NFSv4 always uses TCP, */
+ error = nfs4_set_client(server, data->hostname, data->addr,
+ data->authflavor,
+ parent_server->client->cl_xprt->prot,
+ parent_client->retrans_timeo,
+ parent_client->retrans_count);
+ if (error < 0)
+ goto error;
+
+ /* Initialise the client representation from the parent server */
+ nfs_server_copy_userdata(server, parent_server);
+ server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+ error = nfs_init_server_rpcclient(server, data->authflavor);
+ if (error < 0)
+ goto error;
+
+ BUG_ON(!server->nfs_client);
+ BUG_ON(!server->nfs_client->rpc_ops);
+ BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+ /* probe the filesystem info for this server filesystem */
+ error = nfs_probe_fsinfo(server, fh, &fattr);
+ if (error < 0)
+ goto error;
+
+ dprintk("Referral FSID: %llx:%llx\n",
+ (unsigned long long) server->fsid.major,
+ (unsigned long long) server->fsid.minor);
+
+ spin_lock(&nfs_client_lock);
+ list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+ list_add_tail(&server->master_link, &nfs_volume_list);
+ spin_unlock(&nfs_client_lock);
+
+ server->mount_time = jiffies;
+
+ dprintk("<-- nfs_create_referral_server() = %p\n", server);
+ return server;
+
+error:
+ nfs_free_server(server);
+ dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
+ return ERR_PTR(error);
+}
+
+#endif /* CONFIG_NFS_V4 */
+
+/*
+ * Clone an NFS2, NFS3 or NFS4 server record
+ */
+struct nfs_server *nfs_clone_server(struct nfs_server *source,
+ struct nfs_fh *fh,
+ struct nfs_fattr *fattr)
+{
+ struct nfs_server *server;
+ struct nfs_fattr fattr_fsinfo;
+ int error;
+
+ dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
+ (unsigned long long) fattr->fsid.major,
+ (unsigned long long) fattr->fsid.minor);
+
+ server = nfs_alloc_server();
+ if (!server)
+ return ERR_PTR(-ENOMEM);
+
+ /* Copy data from the source */
+ server->nfs_client = source->nfs_client;
+ atomic_inc(&server->nfs_client->cl_count);
+ nfs_server_copy_userdata(server, source);
+
+ server->fsid = fattr->fsid;
+
+ error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
+ if (error < 0)
+ goto out_free_server;
+ if (!IS_ERR(source->client_acl))
+ nfs_init_server_aclclient(server);
+
+ /* probe the filesystem info for this server filesystem */
+ error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo);
+ if (error < 0)
+ goto out_free_server;
+
+ dprintk("Cloned FSID: %llx:%llx\n",
+ (unsigned long long) server->fsid.major,
+ (unsigned long long) server->fsid.minor);
+
+ error = nfs_start_lockd(server);
+ if (error < 0)
+ goto out_free_server;
+
+ spin_lock(&nfs_client_lock);
+ list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+ list_add_tail(&server->master_link, &nfs_volume_list);
+ spin_unlock(&nfs_client_lock);
+
+ server->mount_time = jiffies;
+
+ dprintk("<-- nfs_clone_server() = %p\n", server);
+ return server;
+
+out_free_server:
+ nfs_free_server(server);
+ dprintk("<-- nfs_clone_server() = error %d\n", error);
+ return ERR_PTR(error);
+}
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *proc_fs_nfs;
+
+static int nfs_server_list_open(struct inode *inode, struct file *file);
+static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_server_list_stop(struct seq_file *p, void *v);
+static int nfs_server_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_server_list_ops = {
+ .start = nfs_server_list_start,
+ .next = nfs_server_list_next,
+ .stop = nfs_server_list_stop,
+ .show = nfs_server_list_show,
+};
+
+static struct file_operations nfs_server_list_fops = {
+ .open = nfs_server_list_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int nfs_volume_list_open(struct inode *inode, struct file *file);
+static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_volume_list_stop(struct seq_file *p, void *v);
+static int nfs_volume_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_volume_list_ops = {
+ .start = nfs_volume_list_start,
+ .next = nfs_volume_list_next,
+ .stop = nfs_volume_list_stop,
+ .show = nfs_volume_list_show,
+};
+
+static struct file_operations nfs_volume_list_fops = {
+ .open = nfs_volume_list_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+/*
+ * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
+ * we're dealing
+ */
+static int nfs_server_list_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *m;
+ int ret;
+
+ ret = seq_open(file, &nfs_server_list_ops);
+ if (ret < 0)
+ return ret;
+
+ m = file->private_data;
+ m->private = PDE(inode)->data;
+
+ return 0;
+}
+
+/*
+ * set up the iterator to start reading from the server list and return the first item
+ */
+static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
+{
+ struct list_head *_p;
+ loff_t pos = *_pos;
+
+ /* lock the list against modification */
+ spin_lock(&nfs_client_lock);
+
+ /* allow for the header line */
+ if (!pos)
+ return SEQ_START_TOKEN;
+ pos--;
+
+ /* find the n'th element in the list */
+ list_for_each(_p, &nfs_client_list)
+ if (!pos--)
+ break;
+
+ return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * move to next server
+ */
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+ struct list_head *_p;
+
+ (*pos)++;
+
+ _p = v;
+ _p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next;
+
+ return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_server_list_stop(struct seq_file *p, void *v)
+{
+ spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_server_list_show(struct seq_file *m, void *v)
+{
+ struct nfs_client *clp;
+
+ /* display header on line 1 */
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(m, "NV SERVER PORT USE HOSTNAME\n");
+ return 0;
+ }
+
+ /* display one transport per line on subsequent lines */
+ clp = list_entry(v, struct nfs_client, cl_share_link);
+
+ seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
+ clp->cl_nfsversion,
+ NIPQUAD(clp->cl_addr.sin_addr),
+ ntohs(clp->cl_addr.sin_port),
+ atomic_read(&clp->cl_count),
+ clp->cl_hostname);
+
+ return 0;
+}
+
+/*
+ * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
+ */
+static int nfs_volume_list_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *m;
+ int ret;
+
+ ret = seq_open(file, &nfs_volume_list_ops);
+ if (ret < 0)
+ return ret;
+
+ m = file->private_data;
+ m->private = PDE(inode)->data;
+
+ return 0;
+}
+
+/*
+ * set up the iterator to start reading from the volume list and return the first item
+ */
+static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
+{
+ struct list_head *_p;
+ loff_t pos = *_pos;
+
+ /* lock the list against modification */
+ spin_lock(&nfs_client_lock);
+
+ /* allow for the header line */
+ if (!pos)
+ return SEQ_START_TOKEN;
+ pos--;
+
+ /* find the n'th element in the list */
+ list_for_each(_p, &nfs_volume_list)
+ if (!pos--)
+ break;
+
+ return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * move to next volume
+ */
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+ struct list_head *_p;
+
+ (*pos)++;
+
+ _p = v;
+ _p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next;
+
+ return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_volume_list_stop(struct seq_file *p, void *v)
+{
+ spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_volume_list_show(struct seq_file *m, void *v)
+{
+ struct nfs_server *server;
+ struct nfs_client *clp;
+ char dev[8], fsid[17];
+
+ /* display header on line 1 */
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(m, "NV SERVER PORT DEV FSID\n");
+ return 0;
+ }
+ /* display one transport per line on subsequent lines */
+ server = list_entry(v, struct nfs_server, master_link);
+ clp = server->nfs_client;
+
+ snprintf(dev, 8, "%u:%u",
+ MAJOR(server->s_dev), MINOR(server->s_dev));
+
+ snprintf(fsid, 17, "%llx:%llx",
+ (unsigned long long) server->fsid.major,
+ (unsigned long long) server->fsid.minor);
+
+ seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
+ clp->cl_nfsversion,
+ NIPQUAD(clp->cl_addr.sin_addr),
+ ntohs(clp->cl_addr.sin_port),
+ dev,
+ fsid);
+
+ return 0;
+}
+
+/*
+ * initialise the /proc/fs/nfsfs/ directory
+ */
+int __init nfs_fs_proc_init(void)
+{
+ struct proc_dir_entry *p;
+
+ proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs);
+ if (!proc_fs_nfs)
+ goto error_0;
+
+ proc_fs_nfs->owner = THIS_MODULE;
+
+ /* a file of servers with which we're dealing */
+ p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs);
+ if (!p)
+ goto error_1;
+
+ p->proc_fops = &nfs_server_list_fops;
+ p->owner = THIS_MODULE;
+
+ /* a file of volumes that we have mounted */
+ p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs);
+ if (!p)
+ goto error_2;
+
+ p->proc_fops = &nfs_volume_list_fops;
+ p->owner = THIS_MODULE;
+ return 0;
+
+error_2:
+ remove_proc_entry("servers", proc_fs_nfs);
+error_1:
+ remove_proc_entry("nfsfs", proc_root_fs);
+error_0:
+ return -ENOMEM;
+}
+
+/*
+ * clean up the /proc/fs/nfsfs/ directory
+ */
+void nfs_fs_proc_exit(void)
+{
+ remove_proc_entry("volumes", proc_fs_nfs);
+ remove_proc_entry("servers", proc_fs_nfs);
+ remove_proc_entry("nfsfs", proc_root_fs);
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 9540a316c05e..841c99a9b11c 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -18,11 +18,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
-
-static struct nfs_delegation *nfs_alloc_delegation(void)
-{
- return (struct nfs_delegation *)kmalloc(sizeof(struct nfs_delegation), GFP_KERNEL);
-}
+#include "internal.h"
static void nfs_free_delegation(struct nfs_delegation *delegation)
{
@@ -52,7 +48,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
case -NFS4ERR_EXPIRED:
/* kill_proc(fl->fl_pid, SIGLOST, 1); */
case -NFS4ERR_STALE_CLIENTID:
- nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state);
+ nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client);
goto out_err;
}
}
@@ -114,7 +110,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
*/
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
{
- struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int status = 0;
@@ -123,7 +119,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
__nfs_revalidate_inode(NFS_SERVER(inode), inode);
- delegation = nfs_alloc_delegation();
+ delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
if (delegation == NULL)
return -ENOMEM;
memcpy(delegation->stateid.data, res->delegation.data,
@@ -145,7 +141,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
sizeof(delegation->stateid)) != 0 ||
delegation->type != nfsi->delegation->type) {
printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
- __FUNCTION__, NIPQUAD(clp->cl_addr));
+ __FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr));
status = -EIO;
}
}
@@ -176,7 +172,7 @@ static void nfs_msync_inode(struct inode *inode)
*/
int __nfs_inode_return_delegation(struct inode *inode)
{
- struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int res = 0;
@@ -208,7 +204,7 @@ int __nfs_inode_return_delegation(struct inode *inode)
*/
void nfs_return_all_delegations(struct super_block *sb)
{
- struct nfs4_client *clp = NFS_SB(sb)->nfs4_state;
+ struct nfs_client *clp = NFS_SB(sb)->nfs_client;
struct nfs_delegation *delegation;
struct inode *inode;
@@ -232,7 +228,7 @@ restart:
int nfs_do_expire_all_delegations(void *ptr)
{
- struct nfs4_client *clp = ptr;
+ struct nfs_client *clp = ptr;
struct nfs_delegation *delegation;
struct inode *inode;
@@ -254,11 +250,11 @@ restart:
}
out:
spin_unlock(&clp->cl_lock);
- nfs4_put_client(clp);
+ nfs_put_client(clp);
module_put_and_exit(0);
}
-void nfs_expire_all_delegations(struct nfs4_client *clp)
+void nfs_expire_all_delegations(struct nfs_client *clp)
{
struct task_struct *task;
@@ -266,17 +262,17 @@ void nfs_expire_all_delegations(struct nfs4_client *clp)
atomic_inc(&clp->cl_count);
task = kthread_run(nfs_do_expire_all_delegations, clp,
"%u.%u.%u.%u-delegreturn",
- NIPQUAD(clp->cl_addr));
+ NIPQUAD(clp->cl_addr.sin_addr));
if (!IS_ERR(task))
return;
- nfs4_put_client(clp);
+ nfs_put_client(clp);
module_put(THIS_MODULE);
}
/*
* Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
*/
-void nfs_handle_cb_pathdown(struct nfs4_client *clp)
+void nfs_handle_cb_pathdown(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
struct inode *inode;
@@ -299,7 +295,7 @@ restart:
struct recall_threadargs {
struct inode *inode;
- struct nfs4_client *clp;
+ struct nfs_client *clp;
const nfs4_stateid *stateid;
struct completion started;
@@ -310,7 +306,7 @@ static int recall_thread(void *data)
{
struct recall_threadargs *args = (struct recall_threadargs *)data;
struct inode *inode = igrab(args->inode);
- struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
@@ -371,7 +367,7 @@ out_module_put:
/*
* Retrieve the inode associated with a delegation
*/
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle)
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
{
struct nfs_delegation *delegation;
struct inode *res = NULL;
@@ -389,7 +385,7 @@ struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nf
/*
* Mark all delegations as needing to be reclaimed
*/
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
+void nfs_delegation_mark_reclaim(struct nfs_client *clp)
{
struct nfs_delegation *delegation;
spin_lock(&clp->cl_lock);
@@ -401,7 +397,7 @@ void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
/*
* Reap all unclaimed delegations after reboot recovery is done
*/
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
{
struct nfs_delegation *delegation, *n;
LIST_HEAD(head);
@@ -423,7 +419,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
{
- struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int res = 0;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 3858694652fa..2cfd4b24c7fe 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,13 +29,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
int __nfs_inode_return_delegation(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
void nfs_return_all_delegations(struct super_block *sb);
-void nfs_expire_all_delegations(struct nfs4_client *clp);
-void nfs_handle_cb_pathdown(struct nfs4_client *clp);
+void nfs_expire_all_delegations(struct nfs_client *clp);
+void nfs_handle_cb_pathdown(struct nfs_client *clp);
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
+void nfs_delegation_mark_reclaim(struct nfs_client *clp);
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e7ffb4deb3e5..7432f1a43f3d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -30,7 +30,9 @@
#include <linux/nfs_mount.h>
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
+#include <linux/pagevec.h>
#include <linux/namei.h>
+#include <linux/mount.h>
#include "nfs4_fs.h"
#include "delegation.h"
@@ -870,14 +872,14 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
return (nd->intent.open.flags & O_EXCL) != 0;
}
-static inline int nfs_reval_fsid(struct inode *dir,
- struct nfs_fh *fh, struct nfs_fattr *fattr)
+static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
+ struct nfs_fh *fh, struct nfs_fattr *fattr)
{
struct nfs_server *server = NFS_SERVER(dir);
if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
/* Revalidate fsid on root dir */
- return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode);
+ return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
return 0;
}
@@ -902,9 +904,15 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
lock_kernel();
- /* If we're doing an exclusive create, optimize away the lookup */
- if (nfs_is_exclusive_create(dir, nd))
- goto no_entry;
+ /*
+ * If we're doing an exclusive create, optimize away the lookup
+ * but don't hash the dentry.
+ */
+ if (nfs_is_exclusive_create(dir, nd)) {
+ d_instantiate(dentry, NULL);
+ res = NULL;
+ goto out_unlock;
+ }
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
if (error == -ENOENT)
@@ -913,7 +921,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
res = ERR_PTR(error);
goto out_unlock;
}
- error = nfs_reval_fsid(dir, &fhandle, &fattr);
+ error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
if (error < 0) {
res = ERR_PTR(error);
goto out_unlock;
@@ -922,8 +930,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
res = (struct dentry *)inode;
if (IS_ERR(res))
goto out_unlock;
+
no_entry:
- res = d_add_unique(dentry, inode);
+ res = d_materialise_unique(dentry, inode);
if (res != NULL)
dentry = res;
nfs_renew_times(dentry);
@@ -1117,11 +1126,13 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
dput(dentry);
return NULL;
}
- alias = d_add_unique(dentry, inode);
+
+ alias = d_materialise_unique(dentry, inode);
if (alias != NULL) {
dput(dentry);
dentry = alias;
}
+
nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
return dentry;
@@ -1143,23 +1154,22 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
struct inode *dir = dentry->d_parent->d_inode;
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
if (error)
- goto out_err;
+ return error;
}
if (!(fattr->valid & NFS_ATTR_FATTR)) {
struct nfs_server *server = NFS_SB(dentry->d_sb);
- error = server->rpc_ops->getattr(server, fhandle, fattr);
+ error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
if (error < 0)
- goto out_err;
+ return error;
}
inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
error = PTR_ERR(inode);
if (IS_ERR(inode))
- goto out_err;
+ return error;
d_instantiate(dentry, inode);
+ if (d_unhashed(dentry))
+ d_rehash(dentry);
return 0;
-out_err:
- d_drop(dentry);
- return error;
}
/*
@@ -1440,48 +1450,82 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
return error;
}
-static int
-nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+/*
+ * To create a symbolic link, most file systems instantiate a new inode,
+ * add a page to it containing the path, then write it out to the disk
+ * using prepare_write/commit_write.
+ *
+ * Unfortunately the NFS client can't create the in-core inode first
+ * because it needs a file handle to create an in-core inode (see
+ * fs/nfs/inode.c:nfs_fhget). We only have a file handle *after* the
+ * symlink request has completed on the server.
+ *
+ * So instead we allocate a raw page, copy the symname into it, then do
+ * the SYMLINK request with the page as the buffer. If it succeeds, we
+ * now have a new file handle and can instantiate an in-core NFS inode
+ * and move the raw page into its mapping.
+ */
+static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
{
+ struct pagevec lru_pvec;
+ struct page *page;
+ char *kaddr;
struct iattr attr;
- struct nfs_fattr sym_attr;
- struct nfs_fh sym_fh;
- struct qstr qsymname;
+ unsigned int pathlen = strlen(symname);
int error;
dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
dir->i_ino, dentry->d_name.name, symname);
-#ifdef NFS_PARANOIA
-if (dentry->d_inode)
-printk("nfs_proc_symlink: %s/%s not negative!\n",
-dentry->d_parent->d_name.name, dentry->d_name.name);
-#endif
- /*
- * Fill in the sattr for the call.
- * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
- */
- attr.ia_valid = ATTR_MODE;
- attr.ia_mode = S_IFLNK | S_IRWXUGO;
+ if (pathlen > PAGE_SIZE)
+ return -ENAMETOOLONG;
- qsymname.name = symname;
- qsymname.len = strlen(symname);
+ attr.ia_mode = S_IFLNK | S_IRWXUGO;
+ attr.ia_valid = ATTR_MODE;
lock_kernel();
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ unlock_kernel();
+ return -ENOMEM;
+ }
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ memcpy(kaddr, symname, pathlen);
+ if (pathlen < PAGE_SIZE)
+ memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
+ kunmap_atomic(kaddr, KM_USER0);
+
nfs_begin_data_update(dir);
- error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname,
- &attr, &sym_fh, &sym_attr);
+ error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
nfs_end_data_update(dir);
- if (!error) {
- error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
- } else {
- if (error == -EEXIST)
- printk("nfs_proc_symlink: %s/%s already exists??\n",
- dentry->d_parent->d_name.name, dentry->d_name.name);
+ if (error != 0) {
+ dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
+ dir->i_sb->s_id, dir->i_ino,
+ dentry->d_name.name, symname, error);
d_drop(dentry);
+ __free_page(page);
+ unlock_kernel();
+ return error;
}
+
+ /*
+ * No big deal if we can't add this page to the page cache here.
+ * READLINK will get the missing page from the server if needed.
+ */
+ pagevec_init(&lru_pvec, 0);
+ if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
+ GFP_KERNEL)) {
+ if (!pagevec_add(&lru_pvec, page))
+ __pagevec_lru_add(&lru_pvec);
+ SetPageUptodate(page);
+ unlock_page(page);
+ } else
+ __free_page(page);
+
unlock_kernel();
- return error;
+ return 0;
}
static int
@@ -1625,8 +1669,7 @@ out:
if (rehash)
d_rehash(rehash);
if (!error) {
- if (!S_ISDIR(old_inode->i_mode))
- d_move(old_dentry, new_dentry);
+ d_move(old_dentry, new_dentry);
nfs_renew_times(new_dentry);
nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
}
@@ -1638,35 +1681,211 @@ out:
return error;
}
+static DEFINE_SPINLOCK(nfs_access_lru_lock);
+static LIST_HEAD(nfs_access_lru_list);
+static atomic_long_t nfs_access_nr_entries;
+
+static void nfs_access_free_entry(struct nfs_access_entry *entry)
+{
+ put_rpccred(entry->cred);
+ kfree(entry);
+ smp_mb__before_atomic_dec();
+ atomic_long_dec(&nfs_access_nr_entries);
+ smp_mb__after_atomic_dec();
+}
+
+int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+{
+ LIST_HEAD(head);
+ struct nfs_inode *nfsi;
+ struct nfs_access_entry *cache;
+
+ spin_lock(&nfs_access_lru_lock);
+restart:
+ list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+ struct inode *inode;
+
+ if (nr_to_scan-- == 0)
+ break;
+ inode = igrab(&nfsi->vfs_inode);
+ if (inode == NULL)
+ continue;
+ spin_lock(&inode->i_lock);
+ if (list_empty(&nfsi->access_cache_entry_lru))
+ goto remove_lru_entry;
+ cache = list_entry(nfsi->access_cache_entry_lru.next,
+ struct nfs_access_entry, lru);
+ list_move(&cache->lru, &head);
+ rb_erase(&cache->rb_node, &nfsi->access_cache);
+ if (!list_empty(&nfsi->access_cache_entry_lru))
+ list_move_tail(&nfsi->access_cache_inode_lru,
+ &nfs_access_lru_list);
+ else {
+remove_lru_entry:
+ list_del_init(&nfsi->access_cache_inode_lru);
+ clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
+ }
+ spin_unlock(&inode->i_lock);
+ iput(inode);
+ goto restart;
+ }
+ spin_unlock(&nfs_access_lru_lock);
+ while (!list_empty(&head)) {
+ cache = list_entry(head.next, struct nfs_access_entry, lru);
+ list_del(&cache->lru);
+ nfs_access_free_entry(cache);
+ }
+ return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+}
+
+static void __nfs_access_zap_cache(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct rb_root *root_node = &nfsi->access_cache;
+ struct rb_node *n, *dispose = NULL;
+ struct nfs_access_entry *entry;
+
+ /* Unhook entries from the cache */
+ while ((n = rb_first(root_node)) != NULL) {
+ entry = rb_entry(n, struct nfs_access_entry, rb_node);
+ rb_erase(n, root_node);
+ list_del(&entry->lru);
+ n->rb_left = dispose;
+ dispose = n;
+ }
+ nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+ spin_unlock(&inode->i_lock);
+
+ /* Now kill them all! */
+ while (dispose != NULL) {
+ n = dispose;
+ dispose = n->rb_left;
+ nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
+ }
+}
+
+void nfs_access_zap_cache(struct inode *inode)
+{
+ /* Remove from global LRU init */
+ if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+ spin_lock(&nfs_access_lru_lock);
+ list_del_init(&NFS_I(inode)->access_cache_inode_lru);
+ spin_unlock(&nfs_access_lru_lock);
+ }
+
+ spin_lock(&inode->i_lock);
+ /* This will release the spinlock */
+ __nfs_access_zap_cache(inode);
+}
+
+static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
+{
+ struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
+ struct nfs_access_entry *entry;
+
+ while (n != NULL) {
+ entry = rb_entry(n, struct nfs_access_entry, rb_node);
+
+ if (cred < entry->cred)
+ n = n->rb_left;
+ else if (cred > entry->cred)
+ n = n->rb_right;
+ else
+ return entry;
+ }
+ return NULL;
+}
+
int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
{
struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_access_entry *cache = &nfsi->cache_access;
+ struct nfs_access_entry *cache;
+ int err = -ENOENT;
- if (cache->cred != cred
- || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
- || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS))
- return -ENOENT;
- memcpy(res, cache, sizeof(*res));
- return 0;
+ spin_lock(&inode->i_lock);
+ if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+ goto out_zap;
+ cache = nfs_access_search_rbtree(inode, cred);
+ if (cache == NULL)
+ goto out;
+ if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
+ goto out_stale;
+ res->jiffies = cache->jiffies;
+ res->cred = cache->cred;
+ res->mask = cache->mask;
+ list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
+ err = 0;
+out:
+ spin_unlock(&inode->i_lock);
+ return err;
+out_stale:
+ rb_erase(&cache->rb_node, &nfsi->access_cache);
+ list_del(&cache->lru);
+ spin_unlock(&inode->i_lock);
+ nfs_access_free_entry(cache);
+ return -ENOENT;
+out_zap:
+ /* This will release the spinlock */
+ __nfs_access_zap_cache(inode);
+ return -ENOENT;
}
-void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
{
struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_access_entry *cache = &nfsi->cache_access;
+ struct rb_root *root_node = &nfsi->access_cache;
+ struct rb_node **p = &root_node->rb_node;
+ struct rb_node *parent = NULL;
+ struct nfs_access_entry *entry;
- if (cache->cred != set->cred) {
- if (cache->cred)
- put_rpccred(cache->cred);
- cache->cred = get_rpccred(set->cred);
- }
- /* FIXME: replace current access_cache BKL reliance with inode->i_lock */
spin_lock(&inode->i_lock);
- nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct nfs_access_entry, rb_node);
+
+ if (set->cred < entry->cred)
+ p = &parent->rb_left;
+ else if (set->cred > entry->cred)
+ p = &parent->rb_right;
+ else
+ goto found;
+ }
+ rb_link_node(&set->rb_node, parent, p);
+ rb_insert_color(&set->rb_node, root_node);
+ list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
spin_unlock(&inode->i_lock);
+ return;
+found:
+ rb_replace_node(parent, &set->rb_node, root_node);
+ list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
+ list_del(&entry->lru);
+ spin_unlock(&inode->i_lock);
+ nfs_access_free_entry(entry);
+}
+
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+{
+ struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+ if (cache == NULL)
+ return;
+ RB_CLEAR_NODE(&cache->rb_node);
cache->jiffies = set->jiffies;
+ cache->cred = get_rpccred(set->cred);
cache->mask = set->mask;
+
+ nfs_access_add_rbtree(inode, cache);
+
+ /* Update accounting */
+ smp_mb__before_atomic_inc();
+ atomic_long_inc(&nfs_access_nr_entries);
+ smp_mb__after_atomic_inc();
+
+ /* Add inode to global LRU list */
+ if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+ spin_lock(&nfs_access_lru_lock);
+ list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
+ spin_unlock(&nfs_access_lru_lock);
+ }
}
static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 76ca1cbc38f9..377839bed172 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -855,6 +855,5 @@ int __init nfs_init_directcache(void)
*/
void nfs_destroy_directcache(void)
{
- if (kmem_cache_destroy(nfs_direct_cachep))
- printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
+ kmem_cache_destroy(nfs_direct_cachep);
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 48e892880d5b..be997d649127 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -111,7 +111,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
nfs_inc_stats(inode, NFSIOS_VFSOPEN);
lock_kernel();
- res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
+ res = NFS_PROTO(inode)->file_open(inode, filp);
unlock_kernel();
return res;
}
@@ -157,7 +157,7 @@ force_reval:
static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
{
/* origin == SEEK_END => we must revalidate the cached file length */
- if (origin == 2) {
+ if (origin == SEEK_END) {
struct inode *inode = filp->f_mapping->host;
int retval = nfs_revalidate_file_size(inode, filp);
if (retval < 0)
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
new file mode 100644
index 000000000000..76b08ae9ed82
--- /dev/null
+++ b/fs/nfs/getroot.c
@@ -0,0 +1,311 @@
+/* getroot.c: get the root dentry for an NFS mount
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/namei.h>
+#include <linux/namespace.h>
+#include <linux/security.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "nfs4_fs.h"
+#include "delegation.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY NFSDBG_CLIENT
+#define NFS_PARANOIA 1
+
+/*
+ * get an NFS2/NFS3 root dentry from the root filehandle
+ */
+struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+ struct nfs_server *server = NFS_SB(sb);
+ struct nfs_fsinfo fsinfo;
+ struct nfs_fattr fattr;
+ struct dentry *mntroot;
+ struct inode *inode;
+ int error;
+
+ /* create a dummy root dentry with dummy inode for this superblock */
+ if (!sb->s_root) {
+ struct nfs_fh dummyfh;
+ struct dentry *root;
+ struct inode *iroot;
+
+ memset(&dummyfh, 0, sizeof(dummyfh));
+ memset(&fattr, 0, sizeof(fattr));
+ nfs_fattr_init(&fattr);
+ fattr.valid = NFS_ATTR_FATTR;
+ fattr.type = NFDIR;
+ fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+ fattr.nlink = 2;
+
+ iroot = nfs_fhget(sb, &dummyfh, &fattr);
+ if (IS_ERR(iroot))
+ return ERR_PTR(PTR_ERR(iroot));
+
+ root = d_alloc_root(iroot);
+ if (!root) {
+ iput(iroot);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ sb->s_root = root;
+ }
+
+ /* get the actual root for this mount */
+ fsinfo.fattr = &fattr;
+
+ error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ if (error < 0) {
+ dprintk("nfs_get_root: getattr error = %d\n", -error);
+ return ERR_PTR(error);
+ }
+
+ inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
+ if (IS_ERR(inode)) {
+ dprintk("nfs_get_root: get root inode failed\n");
+ return ERR_PTR(PTR_ERR(inode));
+ }
+
+ /* root dentries normally start off anonymous and get spliced in later
+ * if the dentry tree reaches them; however if the dentry already
+ * exists, we'll pick it up at this point and use it as the root
+ */
+ mntroot = d_alloc_anon(inode);
+ if (!mntroot) {
+ iput(inode);
+ dprintk("nfs_get_root: get root dentry failed\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ security_d_instantiate(mntroot, inode);
+
+ if (!mntroot->d_op)
+ mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+ return mntroot;
+}
+
+#ifdef CONFIG_NFS_V4
+
+/*
+ * Do a simple pathwalk from the root FH of the server to the nominated target
+ * of the mountpoint
+ * - give error on symlinks
+ * - give error on ".." occurring in the path
+ * - follow traversals
+ */
+int nfs4_path_walk(struct nfs_server *server,
+ struct nfs_fh *mntfh,
+ const char *path)
+{
+ struct nfs_fsinfo fsinfo;
+ struct nfs_fattr fattr;
+ struct nfs_fh lastfh;
+ struct qstr name;
+ int ret;
+ //int referral_count = 0;
+
+ dprintk("--> nfs4_path_walk(,,%s)\n", path);
+
+ fsinfo.fattr = &fattr;
+ nfs_fattr_init(&fattr);
+
+ if (*path++ != '/') {
+ dprintk("nfs4_get_root: Path does not begin with a slash\n");
+ return -EINVAL;
+ }
+
+ /* Start by getting the root filehandle from the server */
+ ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ if (ret < 0) {
+ dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+ return ret;
+ }
+
+ if (fattr.type != NFDIR) {
+ printk(KERN_ERR "nfs4_get_root:"
+ " getroot encountered non-directory\n");
+ return -ENOTDIR;
+ }
+
+ if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+ printk(KERN_ERR "nfs4_get_root:"
+ " getroot obtained referral\n");
+ return -EREMOTE;
+ }
+
+next_component:
+ dprintk("Next: %s\n", path);
+
+ /* extract the next bit of the path */
+ if (!*path)
+ goto path_walk_complete;
+
+ name.name = path;
+ while (*path && *path != '/')
+ path++;
+ name.len = path - (const char *) name.name;
+
+eat_dot_dir:
+ while (*path == '/')
+ path++;
+
+ if (path[0] == '.' && (path[1] == '/' || !path[1])) {
+ path += 2;
+ goto eat_dot_dir;
+ }
+
+ if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
+ ) {
+ printk(KERN_ERR "nfs4_get_root:"
+ " Mount path contains reference to \"..\"\n");
+ return -EINVAL;
+ }
+
+ /* lookup the next FH in the sequence */
+ memcpy(&lastfh, mntfh, sizeof(lastfh));
+
+ dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path);
+
+ ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
+ mntfh, &fattr);
+ if (ret < 0) {
+ dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+ return ret;
+ }
+
+ if (fattr.type != NFDIR) {
+ printk(KERN_ERR "nfs4_get_root:"
+ " lookupfh encountered non-directory\n");
+ return -ENOTDIR;
+ }
+
+ if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+ printk(KERN_ERR "nfs4_get_root:"
+ " lookupfh obtained referral\n");
+ return -EREMOTE;
+ }
+
+ goto next_component;
+
+path_walk_complete:
+ memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+ dprintk("<-- nfs4_path_walk() = 0\n");
+ return 0;
+}
+
+/*
+ * get an NFS4 root dentry from the root filehandle
+ */
+struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+ struct nfs_server *server = NFS_SB(sb);
+ struct nfs_fattr fattr;
+ struct dentry *mntroot;
+ struct inode *inode;
+ int error;
+
+ dprintk("--> nfs4_get_root()\n");
+
+ /* create a dummy root dentry with dummy inode for this superblock */
+ if (!sb->s_root) {
+ struct nfs_fh dummyfh;
+ struct dentry *root;
+ struct inode *iroot;
+
+ memset(&dummyfh, 0, sizeof(dummyfh));
+ memset(&fattr, 0, sizeof(fattr));
+ nfs_fattr_init(&fattr);
+ fattr.valid = NFS_ATTR_FATTR;
+ fattr.type = NFDIR;
+ fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+ fattr.nlink = 2;
+
+ iroot = nfs_fhget(sb, &dummyfh, &fattr);
+ if (IS_ERR(iroot))
+ return ERR_PTR(PTR_ERR(iroot));
+
+ root = d_alloc_root(iroot);
+ if (!root) {
+ iput(iroot);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ sb->s_root = root;
+ }
+
+ /* get the info about the server and filesystem */
+ error = nfs4_server_capabilities(server, mntfh);
+ if (error < 0) {
+ dprintk("nfs_get_root: getcaps error = %d\n",
+ -error);
+ return ERR_PTR(error);
+ }
+
+ /* get the actual root for this mount */
+ error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+ if (error < 0) {
+ dprintk("nfs_get_root: getattr error = %d\n", -error);
+ return ERR_PTR(error);
+ }
+
+ inode = nfs_fhget(sb, mntfh, &fattr);
+ if (IS_ERR(inode)) {
+ dprintk("nfs_get_root: get root inode failed\n");
+ return ERR_PTR(PTR_ERR(inode));
+ }
+
+ /* root dentries normally start off anonymous and get spliced in later
+ * if the dentry tree reaches them; however if the dentry already
+ * exists, we'll pick it up at this point and use it as the root
+ */
+ mntroot = d_alloc_anon(inode);
+ if (!mntroot) {
+ iput(inode);
+ dprintk("nfs_get_root: get root dentry failed\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ security_d_instantiate(mntroot, inode);
+
+ if (!mntroot->d_op)
+ mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+ dprintk("<-- nfs4_get_root()\n");
+ return mntroot;
+}
+
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 07a5dd57646e..82ad7110a1c0 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -57,6 +57,20 @@
/* Default cache timeout is 10 minutes */
unsigned int nfs_idmap_cache_timeout = 600 * HZ;
+static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
+{
+ char *endp;
+ int num = simple_strtol(val, &endp, 0);
+ int jif = num * HZ;
+ if (endp == val || *endp || num < 0 || jif < num)
+ return -EINVAL;
+ *((int *)kp->arg) = jif;
+ return 0;
+}
+
+module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
+ &nfs_idmap_cache_timeout, 0644);
+
struct idmap_hashent {
unsigned long ih_expires;
__u32 ih_id;
@@ -70,7 +84,6 @@ struct idmap_hashtable {
};
struct idmap {
- char idmap_path[48];
struct dentry *idmap_dentry;
wait_queue_head_t idmap_wq;
struct idmap_msg idmap_im;
@@ -94,24 +107,23 @@ static struct rpc_pipe_ops idmap_upcall_ops = {
.destroy_msg = idmap_pipe_destroy_msg,
};
-void
-nfs_idmap_new(struct nfs4_client *clp)
+int
+nfs_idmap_new(struct nfs_client *clp)
{
struct idmap *idmap;
+ int error;
- if (clp->cl_idmap != NULL)
- return;
- if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
- return;
+ BUG_ON(clp->cl_idmap != NULL);
- snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
- "%s/idmap", clp->cl_rpcclient->cl_pathname);
+ if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+ return -ENOMEM;
- idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
+ idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap",
idmap, &idmap_upcall_ops, 0);
if (IS_ERR(idmap->idmap_dentry)) {
+ error = PTR_ERR(idmap->idmap_dentry);
kfree(idmap);
- return;
+ return error;
}
mutex_init(&idmap->idmap_lock);
@@ -121,10 +133,11 @@ nfs_idmap_new(struct nfs4_client *clp)
idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
clp->cl_idmap = idmap;
+ return 0;
}
void
-nfs_idmap_delete(struct nfs4_client *clp)
+nfs_idmap_delete(struct nfs_client *clp)
{
struct idmap *idmap = clp->cl_idmap;
@@ -477,27 +490,27 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
return (hash);
}
-int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
{
struct idmap *idmap = clp->cl_idmap;
return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
}
-int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
{
struct idmap *idmap = clp->cl_idmap;
return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
}
-int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf)
{
struct idmap *idmap = clp->cl_idmap;
return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
}
-int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf)
{
struct idmap *idmap = clp->cl_idmap;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d349fb2245da..bc9376ca86cd 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -76,19 +76,14 @@ int nfs_write_inode(struct inode *inode, int sync)
void nfs_clear_inode(struct inode *inode)
{
- struct nfs_inode *nfsi = NFS_I(inode);
- struct rpc_cred *cred;
-
/*
* The following should never happen...
*/
BUG_ON(nfs_have_writebacks(inode));
- BUG_ON (!list_empty(&nfsi->open_files));
+ BUG_ON(!list_empty(&NFS_I(inode)->open_files));
+ BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
nfs_zap_acl_cache(inode);
- cred = nfsi->cache_access.cred;
- if (cred)
- put_rpccred(cred);
- BUG_ON(atomic_read(&nfsi->data_updates) != 0);
+ nfs_access_zap_cache(inode);
}
/**
@@ -242,13 +237,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
/* Why so? Because we want revalidate for devices/FIFOs, and
* that's precisely what we have in nfs_file_inode_operations.
*/
- inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
+ inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
if (S_ISREG(inode->i_mode)) {
inode->i_fop = &nfs_file_operations;
inode->i_data.a_ops = &nfs_file_aops;
inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
} else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops;
+ inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
inode->i_fop = &nfs_dir_operations;
if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
&& fattr->size <= NFS_LIMIT_READDIRPLUS)
@@ -282,15 +277,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
* report the blocks in 512byte units
*/
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
- inode->i_blksize = inode->i_sb->s_blocksize;
} else {
inode->i_blocks = fattr->du.nfs2.blocks;
- inode->i_blksize = fattr->du.nfs2.blocksize;
}
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = jiffies;
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
- nfsi->cache_access.cred = NULL;
+ nfsi->access_cache = RB_ROOT;
unlock_new_inode(inode);
} else
@@ -448,7 +441,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
{
struct nfs_open_context *ctx;
- ctx = (struct nfs_open_context *)kmalloc(sizeof(*ctx), GFP_KERNEL);
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx != NULL) {
atomic_set(&ctx->count, 1);
ctx->dentry = dget(dentry);
@@ -722,13 +715,11 @@ void nfs_end_data_update(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
- if (!nfs_have_delegation(inode, FMODE_READ)) {
- /* Directories and symlinks: invalidate page cache */
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
- spin_lock(&inode->i_lock);
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
- spin_unlock(&inode->i_lock);
- }
+ /* Directories: invalidate page cache */
+ if (S_ISDIR(inode->i_mode)) {
+ spin_lock(&inode->i_lock);
+ nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ spin_unlock(&inode->i_lock);
}
nfsi->cache_change_attribute = jiffies;
atomic_dec(&nfsi->data_updates);
@@ -847,6 +838,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
*
* After an operation that has changed the inode metadata, mark the
* attribute cache as being invalid, then try to update it.
+ *
+ * NB: if the server didn't return any post op attributes, this
+ * function will force the retrieval of attributes before the next
+ * NFS request. Thus it should be used only for operations that
+ * are expected to change one or more attributes, to avoid
+ * unnecessary NFS requests and trips through nfs_update_inode().
*/
int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
@@ -970,10 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
* report the blocks in 512byte units
*/
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
- inode->i_blksize = inode->i_sb->s_blocksize;
} else {
inode->i_blocks = fattr->du.nfs2.blocks;
- inode->i_blksize = fattr->du.nfs2.blocksize;
}
if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
@@ -1025,7 +1020,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
out_fileid:
printk(KERN_ERR "NFS: server %s error: fileid changed\n"
"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
- NFS_SERVER(inode)->hostname, inode->i_sb->s_id,
+ NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
(long long)nfsi->fileid, (long long)fattr->fileid);
goto out_err;
}
@@ -1109,6 +1104,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
INIT_LIST_HEAD(&nfsi->dirty);
INIT_LIST_HEAD(&nfsi->commit);
INIT_LIST_HEAD(&nfsi->open_files);
+ INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+ INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
atomic_set(&nfsi->data_updates, 0);
nfsi->ndirty = 0;
@@ -1133,8 +1130,7 @@ static int __init nfs_init_inodecache(void)
static void nfs_destroy_inodecache(void)
{
- if (kmem_cache_destroy(nfs_inode_cachep))
- printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(nfs_inode_cachep);
}
/*
@@ -1144,6 +1140,10 @@ static int __init init_nfs_fs(void)
{
int err;
+ err = nfs_fs_proc_init();
+ if (err)
+ goto out5;
+
err = nfs_init_nfspagecache();
if (err)
goto out4;
@@ -1184,6 +1184,8 @@ out2:
out3:
nfs_destroy_nfspagecache();
out4:
+ nfs_fs_proc_exit();
+out5:
return err;
}
@@ -1198,6 +1200,7 @@ static void __exit exit_nfs_fs(void)
rpc_proc_unregister("nfs");
#endif
unregister_nfs_fs();
+ nfs_fs_proc_exit();
}
/* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e4f4e5def0fc..bea0b016bd70 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,6 +4,18 @@
#include <linux/mount.h>
+struct nfs_string;
+struct nfs_mount_data;
+struct nfs4_mount_data;
+
+/* Maximum number of readahead requests
+ * FIXME: this should really be a sysctl so that users may tune it to suit
+ * their needs. People that do NFS over a slow network, might for
+ * instance want to reduce it to something closer to 1 for improved
+ * interactive response.
+ */
+#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
+
struct nfs_clone_mount {
const struct super_block *sb;
const struct dentry *dentry;
@@ -15,7 +27,40 @@ struct nfs_clone_mount {
rpc_authflavor_t authflavor;
};
-/* namespace-nfs4.c */
+/* client.c */
+extern struct rpc_program nfs_program;
+
+extern void nfs_put_client(struct nfs_client *);
+extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
+extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
+ struct nfs_fh *);
+extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
+ const char *,
+ const struct sockaddr_in *,
+ const char *,
+ const char *,
+ rpc_authflavor_t,
+ struct nfs_fh *);
+extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
+ struct nfs_fh *);
+extern void nfs_free_server(struct nfs_server *server);
+extern struct nfs_server *nfs_clone_server(struct nfs_server *,
+ struct nfs_fh *,
+ struct nfs_fattr *);
+#ifdef CONFIG_PROC_FS
+extern int __init nfs_fs_proc_init(void);
+extern void nfs_fs_proc_exit(void);
+#else
+static inline int nfs_fs_proc_init(void)
+{
+ return 0;
+}
+static inline void nfs_fs_proc_exit(void)
+{
+}
+#endif
+
+/* nfs4namespace.c */
#ifdef CONFIG_NFS_V4
extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
#else
@@ -46,6 +91,7 @@ extern void nfs_destroy_directcache(void);
#endif
/* nfs2xdr.c */
+extern int nfs_stat_to_errno(int);
extern struct rpc_procinfo nfs_procedures[];
extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
@@ -54,8 +100,9 @@ extern struct rpc_procinfo nfs3_procedures[];
extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
/* nfs4xdr.c */
-extern int nfs_stat_to_errno(int);
+#ifdef CONFIG_NFS_V4
extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+#endif
/* nfs4proc.c */
#ifdef CONFIG_NFS_V4
@@ -66,6 +113,9 @@ extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
struct page *page);
#endif
+/* dir.c */
+extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+
/* inode.c */
extern struct inode *nfs_alloc_inode(struct super_block *sb);
extern void nfs_destroy_inode(struct inode *);
@@ -76,10 +126,10 @@ extern void nfs4_clear_inode(struct inode *);
#endif
/* super.c */
-extern struct file_system_type nfs_referral_nfs4_fs_type;
-extern struct file_system_type clone_nfs_fs_type;
+extern struct file_system_type nfs_xdev_fs_type;
#ifdef CONFIG_NFS_V4
-extern struct file_system_type clone_nfs4_fs_type;
+extern struct file_system_type nfs4_xdev_fs_type;
+extern struct file_system_type nfs4_referral_fs_type;
#endif
extern struct rpc_stat nfs_rpcstat;
@@ -88,30 +138,30 @@ extern int __init register_nfs_fs(void);
extern void __exit unregister_nfs_fs(void);
/* namespace.c */
-extern char *nfs_path(const char *base, const struct dentry *dentry,
+extern char *nfs_path(const char *base,
+ const struct dentry *droot,
+ const struct dentry *dentry,
char *buffer, ssize_t buflen);
-/*
- * Determine the mount path as a string
- */
-static inline char *
-nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
-{
+/* getroot.c */
+extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
#ifdef CONFIG_NFS_V4
- return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen);
-#else
- return NULL;
+extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
+
+extern int nfs4_path_walk(struct nfs_server *server,
+ struct nfs_fh *mntfh,
+ const char *path);
#endif
-}
/*
* Determine the device name as a string
*/
static inline char *nfs_devname(const struct vfsmount *mnt_parent,
- const struct dentry *dentry,
- char *buffer, ssize_t buflen)
+ const struct dentry *dentry,
+ char *buffer, ssize_t buflen)
{
- return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen);
+ return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root,
+ dentry, buffer, buflen);
}
/*
@@ -167,20 +217,3 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
sb->s_maxbytes = MAX_LFS_FILESIZE;
}
-
-/*
- * Check if the string represents a "valid" IPv4 address
- */
-static inline int valid_ipaddr4(const char *buf)
-{
- int rc, count, in[4];
-
- rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
- if (rc != 4)
- return -EINVAL;
- for (count = 0; count < 4; count++) {
- if (in[count] > 255)
- return -EINVAL;
- }
- return 0;
-}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 445abb4d4214..d507b021207f 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -14,7 +14,6 @@
#include <linux/net.h>
#include <linux/in.h>
#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/sched.h>
#include <linux/nfs_fs.h>
@@ -77,22 +76,19 @@ static struct rpc_clnt *
mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
int protocol)
{
- struct rpc_xprt *xprt;
- struct rpc_clnt *clnt;
-
- xprt = xprt_create_proto(protocol, srvaddr, NULL);
- if (IS_ERR(xprt))
- return (struct rpc_clnt *)xprt;
-
- clnt = rpc_create_client(xprt, hostname,
- &mnt_program, version,
- RPC_AUTH_UNIX);
- if (!IS_ERR(clnt)) {
- clnt->cl_softrtry = 1;
- clnt->cl_oneshot = 1;
- clnt->cl_intr = 1;
- }
- return clnt;
+ struct rpc_create_args args = {
+ .protocol = protocol,
+ .address = (struct sockaddr *)srvaddr,
+ .addrsize = sizeof(*srvaddr),
+ .servername = hostname,
+ .program = &mnt_program,
+ .version = version,
+ .authflavor = RPC_AUTH_UNIX,
+ .flags = (RPC_CLNT_CREATE_ONESHOT |
+ RPC_CLNT_CREATE_INTR),
+ };
+
+ return rpc_create(&args);
}
/*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 86b3169c8cac..60408646176b 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -2,6 +2,7 @@
* linux/fs/nfs/namespace.c
*
* Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
*
* NFS namespace
*/
@@ -25,9 +26,15 @@ LIST_HEAD(nfs_automount_list);
static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list);
int nfs_mountpoint_expiry_timeout = 500 * HZ;
+static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
+ const struct dentry *dentry,
+ struct nfs_fh *fh,
+ struct nfs_fattr *fattr);
+
/*
* nfs_path - reconstruct the path given an arbitrary dentry
* @base - arbitrary string to prepend to the path
+ * @droot - pointer to root dentry for mountpoint
* @dentry - pointer to dentry
* @buffer - result buffer
* @buflen - length of buffer
@@ -38,7 +45,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
* This is mainly for use in figuring out the path on the
* server side when automounting on top of an existing partition.
*/
-char *nfs_path(const char *base, const struct dentry *dentry,
+char *nfs_path(const char *base,
+ const struct dentry *droot,
+ const struct dentry *dentry,
char *buffer, ssize_t buflen)
{
char *end = buffer+buflen;
@@ -47,7 +56,7 @@ char *nfs_path(const char *base, const struct dentry *dentry,
*--end = '\0';
buflen--;
spin_lock(&dcache_lock);
- while (!IS_ROOT(dentry)) {
+ while (!IS_ROOT(dentry) && dentry != droot) {
namelen = dentry->d_name.len;
buflen -= namelen + 1;
if (buflen < 0)
@@ -96,15 +105,18 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
struct nfs_fattr fattr;
int err;
+ dprintk("--> nfs_follow_mountpoint()\n");
+
BUG_ON(IS_ROOT(dentry));
dprintk("%s: enter\n", __FUNCTION__);
dput(nd->dentry);
nd->dentry = dget(dentry);
- if (d_mountpoint(nd->dentry))
- goto out_follow;
+
/* Look it up again */
parent = dget_parent(nd->dentry);
- err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr);
+ err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
+ &nd->dentry->d_name,
+ &fh, &fattr);
dput(parent);
if (err != 0)
goto out_err;
@@ -132,6 +144,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
out:
dprintk("%s: done, returned %d\n", __FUNCTION__, err);
+
+ dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
return ERR_PTR(err);
out_err:
path_release(nd);
@@ -172,22 +186,23 @@ void nfs_release_automount_timer(void)
/*
* Clone a mountpoint of the appropriate type
*/
-static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname,
+static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
+ const char *devname,
struct nfs_clone_mount *mountdata)
{
#ifdef CONFIG_NFS_V4
struct vfsmount *mnt = NULL;
- switch (server->rpc_ops->version) {
+ switch (server->nfs_client->cl_nfsversion) {
case 2:
case 3:
- mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+ mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
break;
case 4:
- mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata);
+ mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata);
}
return mnt;
#else
- return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+ return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
#endif
}
@@ -199,9 +214,10 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devn
* @fattr - attributes for new root inode
*
*/
-struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
- const struct dentry *dentry, struct nfs_fh *fh,
- struct nfs_fattr *fattr)
+static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
+ const struct dentry *dentry,
+ struct nfs_fh *fh,
+ struct nfs_fattr *fattr)
{
struct nfs_clone_mount mountdata = {
.sb = mnt_parent->mnt_sb,
@@ -213,6 +229,8 @@ struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
char *page = (char *) __get_free_page(GFP_USER);
char *devname;
+ dprintk("--> nfs_do_submount()\n");
+
dprintk("%s: submounting on %s/%s\n", __FUNCTION__,
dentry->d_parent->d_name.name,
dentry->d_name.name);
@@ -227,5 +245,7 @@ free_page:
free_page((unsigned long)page);
out:
dprintk("%s: done\n", __FUNCTION__);
+
+ dprintk("<-- nfs_do_submount() = %p\n", mnt);
return mnt;
}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 67391eef6b93..b49501fc0a79 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -51,7 +51,7 @@
#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz)
#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz)
#define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz)
-#define NFS_symlinkargs_sz (NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz)
+#define NFS_symlinkargs_sz (NFS_diropargs_sz+1+NFS_sattr_sz)
#define NFS_readdirargs_sz (NFS_fhandle_sz+2)
#define NFS_attrstat_sz (1+NFS_fattr_sz)
@@ -351,11 +351,26 @@ nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
static int
nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
{
+ struct xdr_buf *sndbuf = &req->rq_snd_buf;
+ size_t pad;
+
p = xdr_encode_fhandle(p, args->fromfh);
p = xdr_encode_array(p, args->fromname, args->fromlen);
- p = xdr_encode_array(p, args->topath, args->tolen);
+ *p++ = htonl(args->pathlen);
+ sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+
+ xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen);
+
+ /*
+ * xdr_encode_pages may have added a few bytes to ensure the
+ * pathname ends on a 4-byte boundary. Start encoding the
+ * attributes after the pad bytes.
+ */
+ pad = sndbuf->tail->iov_len;
+ if (pad > 0)
+ p++;
p = xdr_encode_sattr(p, args->sattr);
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad;
return 0;
}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7143b1f82cea..3b234d4601e7 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -81,7 +81,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
}
/*
- * Bare-bones access to getattr: this is for nfs_read_super.
+ * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb
*/
static int
nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -90,8 +90,8 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
int status;
status = do_proc_get_root(server->client, fhandle, info);
- if (status && server->client_sys != server->client)
- status = do_proc_get_root(server->client_sys, fhandle, info);
+ if (status && server->nfs_client->cl_rpcclient != server->client)
+ status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info);
return status;
}
@@ -449,7 +449,7 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr
struct nfs_fattr res;
} *ptr;
- ptr = (struct unlinkxdr *)kmalloc(sizeof(*ptr), GFP_KERNEL);
+ ptr = kmalloc(sizeof(*ptr), GFP_KERNEL);
if (!ptr)
return -ENOMEM;
ptr->arg.fh = NFS_FH(dir->d_inode);
@@ -544,23 +544,23 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
}
static int
-nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
- struct iattr *sattr, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+ unsigned int len, struct iattr *sattr)
{
- struct nfs_fattr dir_attr;
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr, dir_attr;
struct nfs3_symlinkargs arg = {
.fromfh = NFS_FH(dir),
- .fromname = name->name,
- .fromlen = name->len,
- .topath = path->name,
- .tolen = path->len,
+ .fromname = dentry->d_name.name,
+ .fromlen = dentry->d_name.len,
+ .pages = &page,
+ .pathlen = len,
.sattr = sattr
};
struct nfs3_diropres res = {
.dir_attr = &dir_attr,
- .fh = fhandle,
- .fattr = fattr
+ .fh = &fhandle,
+ .fattr = &fattr
};
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK],
@@ -569,13 +569,19 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
};
int status;
- if (path->len > NFS3_MAXPATHLEN)
+ if (len > NFS3_MAXPATHLEN)
return -ENAMETOOLONG;
- dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
+
+ dprintk("NFS call symlink %s\n", dentry->d_name.name);
+
nfs_fattr_init(&dir_attr);
- nfs_fattr_init(fattr);
+ nfs_fattr_init(&fattr);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_post_op_update_inode(dir, &dir_attr);
+ if (status != 0)
+ goto out;
+ status = nfs_instantiate(dentry, &fhandle, &fattr);
+out:
dprintk("NFS reply symlink: %d\n", status);
return status;
}
@@ -785,7 +791,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr);
- status = rpc_call_sync(server->client_sys, &msg, 0);
+ status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status);
return status;
}
@@ -886,7 +892,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
}
-struct nfs_rpc_ops nfs_v3_clientops = {
+const struct nfs_rpc_ops nfs_v3_clientops = {
.version = 3, /* protocol version */
.dentry_ops = &nfs_dentry_operations,
.dir_inode_ops = &nfs3_dir_inode_operations,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 0250269e9753..16556fa4effb 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -56,7 +56,7 @@
#define NFS3_writeargs_sz (NFS3_fh_sz+5)
#define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
-#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz)
+#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz)
#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz)
#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz)
#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz)
@@ -398,8 +398,11 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args
p = xdr_encode_fhandle(p, args->fromfh);
p = xdr_encode_array(p, args->fromname, args->fromlen);
p = xdr_encode_sattr(p, args->sattr);
- p = xdr_encode_array(p, args->topath, args->tolen);
+ *p++ = htonl(args->pathlen);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+ /* Copy the page */
+ xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen);
return 0;
}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9a102860df37..61095fe4b5ca 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -43,55 +43,6 @@ enum nfs4_client_state {
};
/*
- * The nfs4_client identifies our client state to the server.
- */
-struct nfs4_client {
- struct list_head cl_servers; /* Global list of servers */
- struct in_addr cl_addr; /* Server identifier */
- u64 cl_clientid; /* constant */
- nfs4_verifier cl_confirm;
- unsigned long cl_state;
-
- u32 cl_lockowner_id;
-
- /*
- * The following rwsem ensures exclusive access to the server
- * while we recover the state following a lease expiration.
- */
- struct rw_semaphore cl_sem;
-
- struct list_head cl_delegations;
- struct list_head cl_state_owners;
- struct list_head cl_unused;
- int cl_nunused;
- spinlock_t cl_lock;
- atomic_t cl_count;
-
- struct rpc_clnt * cl_rpcclient;
-
- struct list_head cl_superblocks; /* List of nfs_server structs */
-
- unsigned long cl_lease_time;
- unsigned long cl_last_renewal;
- struct work_struct cl_renewd;
- struct work_struct cl_recoverd;
-
- struct rpc_wait_queue cl_rpcwaitq;
-
- /* used for the setclientid verifier */
- struct timespec cl_boot_time;
-
- /* idmapper */
- struct idmap * cl_idmap;
-
- /* Our own IP address, as a null-terminated string.
- * This is used to generate the clientid, and the callback address.
- */
- char cl_ipaddr[16];
- unsigned char cl_id_uniquifier;
-};
-
-/*
* struct rpc_sequence ensures that RPC calls are sent in the exact
* order that they appear on the list.
*/
@@ -127,7 +78,7 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
struct nfs4_state_owner {
spinlock_t so_lock;
struct list_head so_list; /* per-clientid list of state_owners */
- struct nfs4_client *so_client;
+ struct nfs_client *so_client;
u32 so_id; /* 32-bit identifier, unique */
atomic_t so_count;
@@ -210,10 +161,10 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
/* nfs4proc.c */
extern int nfs4_map_errors(int err);
-extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *);
-extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *);
+extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
+extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -231,19 +182,14 @@ extern const u32 nfs4_fsinfo_bitmap[2];
extern const u32 nfs4_fs_locations_bitmap[2];
/* nfs4renewd.c */
-extern void nfs4_schedule_state_renewal(struct nfs4_client *);
+extern void nfs4_schedule_state_renewal(struct nfs_client *);
extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
-extern void nfs4_kill_renewd(struct nfs4_client *);
+extern void nfs4_kill_renewd(struct nfs_client *);
extern void nfs4_renew_state(void *);
/* nfs4state.c */
-extern void init_nfsv4_state(struct nfs_server *);
-extern void destroy_nfsv4_state(struct nfs_server *);
-extern struct nfs4_client *nfs4_get_client(struct in_addr *);
-extern void nfs4_put_client(struct nfs4_client *clp);
-extern struct nfs4_client *nfs4_find_client(struct in_addr *);
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp);
-extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
+extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
extern void nfs4_put_state_owner(struct nfs4_state_owner *);
@@ -252,7 +198,7 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state
extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct nfs4_state *, mode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
-extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern void nfs4_schedule_state_recovery(struct nfs_client *);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
@@ -276,10 +222,6 @@ extern struct svc_version nfs4_callback_version1;
#else
-#define init_nfsv4_state(server) do { } while (0)
-#define destroy_nfsv4_state(server) do { } while (0)
-#define nfs4_put_state_owner(inode, owner) do { } while (0)
-#define nfs4_put_open_state(state) do { } while (0)
#define nfs4_close_state(a, b) do { } while (0)
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index ea38d27b74e6..24e47f3bbd17 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -2,6 +2,7 @@
* linux/fs/nfs/nfs4namespace.c
*
* Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
*
* NFSv4 namespace
*/
@@ -23,7 +24,7 @@
/*
* Check if fs_root is valid
*/
-static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
+static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
char *buffer, ssize_t buflen)
{
char *end = buffer + buflen;
@@ -34,7 +35,7 @@ static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
n = pathname->ncomponents;
while (--n >= 0) {
- struct nfs4_string *component = &pathname->components[n];
+ const struct nfs4_string *component = &pathname->components[n];
buflen -= component->len + 1;
if (buflen < 0)
goto Elong;
@@ -47,6 +48,68 @@ Elong:
return ERR_PTR(-ENAMETOOLONG);
}
+/*
+ * Determine the mount path as a string
+ */
+static char *nfs4_path(const struct vfsmount *mnt_parent,
+ const struct dentry *dentry,
+ char *buffer, ssize_t buflen)
+{
+ const char *srvpath;
+
+ srvpath = strchr(mnt_parent->mnt_devname, ':');
+ if (srvpath)
+ srvpath++;
+ else
+ srvpath = mnt_parent->mnt_devname;
+
+ return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
+}
+
+/*
+ * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
+ * believe to be the server path to this dentry
+ */
+static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
+ const struct dentry *dentry,
+ const struct nfs4_fs_locations *locations,
+ char *page, char *page2)
+{
+ const char *path, *fs_path;
+
+ path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
+ if (IS_ERR(fs_path))
+ return PTR_ERR(fs_path);
+
+ if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
+ dprintk("%s: path %s does not begin with fsroot %s\n",
+ __FUNCTION__, path, fs_path);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+/*
+ * Check if the string represents a "valid" IPv4 address
+ */
+static inline int valid_ipaddr4(const char *buf)
+{
+ int rc, count, in[4];
+
+ rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
+ if (rc != 4)
+ return -EINVAL;
+ for (count = 0; count < 4; count++) {
+ if (in[count] > 255)
+ return -EINVAL;
+ }
+ return 0;
+}
/**
* nfs_follow_referral - set up mountpoint when hitting a referral on moved error
@@ -60,7 +123,7 @@ Elong:
*/
static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
const struct dentry *dentry,
- struct nfs4_fs_locations *locations)
+ const struct nfs4_fs_locations *locations)
{
struct vfsmount *mnt = ERR_PTR(-ENOENT);
struct nfs_clone_mount mountdata = {
@@ -68,10 +131,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
.dentry = dentry,
.authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
};
- char *page, *page2;
- char *path, *fs_path;
+ char *page = NULL, *page2 = NULL;
char *devname;
- int loc, s;
+ int loc, s, error;
if (locations == NULL || locations->nlocations <= 0)
goto out;
@@ -79,36 +141,30 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
dprintk("%s: referral at %s/%s\n", __FUNCTION__,
dentry->d_parent->d_name.name, dentry->d_name.name);
- /* Ensure fs path is a prefix of current dentry path */
page = (char *) __get_free_page(GFP_USER);
- if (page == NULL)
+ if (!page)
goto out;
+
page2 = (char *) __get_free_page(GFP_USER);
- if (page2 == NULL)
+ if (!page2)
goto out;
- path = nfs4_path(dentry, page, PAGE_SIZE);
- if (IS_ERR(path))
- goto out_free;
-
- fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
- if (IS_ERR(fs_path))
- goto out_free;
-
- if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
- dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path);
- goto out_free;
+ /* Ensure fs path is a prefix of current dentry path */
+ error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2);
+ if (error < 0) {
+ mnt = ERR_PTR(error);
+ goto out;
}
devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
if (IS_ERR(devname)) {
mnt = (struct vfsmount *)devname;
- goto out_free;
+ goto out;
}
loc = 0;
while (loc < locations->nlocations && IS_ERR(mnt)) {
- struct nfs4_fs_location *location = &locations->locations[loc];
+ const struct nfs4_fs_location *location = &locations->locations[loc];
char *mnt_path;
if (location == NULL || location->nservers <= 0 ||
@@ -140,7 +196,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
addr.sin_port = htons(NFS_PORT);
mountdata.addr = &addr;
- mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata);
+ mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata);
if (!IS_ERR(mnt)) {
break;
}
@@ -149,10 +205,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
loc++;
}
-out_free:
- free_page((unsigned long)page);
- free_page((unsigned long)page2);
out:
+ free_page((unsigned long) page);
+ free_page((unsigned long) page2);
dprintk("%s: done\n", __FUNCTION__);
return mnt;
}
@@ -165,7 +220,7 @@ out:
*/
struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
{
- struct vfsmount *mnt = ERR_PTR(-ENOENT);
+ struct vfsmount *mnt = ERR_PTR(-ENOMEM);
struct dentry *parent;
struct nfs4_fs_locations *fs_locations = NULL;
struct page *page;
@@ -183,11 +238,16 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
goto out_free;
/* Get locations */
+ mnt = ERR_PTR(-ENOENT);
+
parent = dget_parent(dentry);
- dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name);
+ dprintk("%s: getting locations for %s/%s\n",
+ __FUNCTION__, parent->d_name.name, dentry->d_name.name);
+
err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page);
dput(parent);
- if (err != 0 || fs_locations->nlocations <= 0 ||
+ if (err != 0 ||
+ fs_locations->nlocations <= 0 ||
fs_locations->fs_path.ncomponents <= 0)
goto out_free;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b14145b7b87f..47c7e6e3910d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,7 +55,7 @@
#define NFSDBG_FACILITY NFSDBG_PROC
-#define NFS4_POLL_RETRY_MIN (1*HZ)
+#define NFS4_POLL_RETRY_MIN (HZ/10)
#define NFS4_POLL_RETRY_MAX (15*HZ)
struct nfs4_opendata;
@@ -64,7 +64,7 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf
static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp);
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
/* Prevent leaks of NFSv4 errors into userland */
int nfs4_map_errors(int err)
@@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
{
- struct nfs4_client *clp = server->nfs4_state;
+ struct nfs_client *clp = server->nfs_client;
spin_lock(&clp->cl_lock);
if (time_before(clp->cl_last_renewal,timestamp))
clp->cl_last_renewal = timestamp;
@@ -252,7 +252,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
atomic_inc(&sp->so_count);
p->o_arg.fh = NFS_FH(dir);
p->o_arg.open_flags = flags,
- p->o_arg.clientid = server->nfs4_state->cl_clientid;
+ p->o_arg.clientid = server->nfs_client->cl_clientid;
p->o_arg.id = sp->so_id;
p->o_arg.name = &dentry->d_name;
p->o_arg.server = server;
@@ -550,7 +550,7 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
/* Don't recall a delegation if it was lost */
- nfs4_schedule_state_recovery(server->nfs4_state);
+ nfs4_schedule_state_recovery(server->nfs_client);
return err;
}
err = nfs4_handle_exception(server, err, &exception);
@@ -758,7 +758,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
}
nfs_confirm_seqid(&data->owner->so_seqid, 0);
if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
- return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
+ return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
return 0;
}
@@ -792,11 +792,18 @@ out:
int nfs4_recover_expired_lease(struct nfs_server *server)
{
- struct nfs4_client *clp = server->nfs4_state;
+ struct nfs_client *clp = server->nfs_client;
+ int ret;
- if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+ for (;;) {
+ ret = nfs4_wait_clnt_recover(server->client, clp);
+ if (ret != 0)
+ return ret;
+ if (!test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+ break;
nfs4_schedule_state_recovery(clp);
- return nfs4_wait_clnt_recover(server->client, clp);
+ }
+ return 0;
}
/*
@@ -867,7 +874,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
{
struct nfs_delegation *delegation;
struct nfs_server *server = NFS_SERVER(inode);
- struct nfs4_client *clp = server->nfs4_state;
+ struct nfs_client *clp = server->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs4_state_owner *sp = NULL;
struct nfs4_state *state = NULL;
@@ -953,7 +960,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
struct nfs4_state_owner *sp;
struct nfs4_state *state = NULL;
struct nfs_server *server = NFS_SERVER(dir);
- struct nfs4_client *clp = server->nfs4_state;
+ struct nfs_client *clp = server->nfs_client;
struct nfs4_opendata *opendata;
int status;
@@ -1133,7 +1140,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
break;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
- nfs4_schedule_state_recovery(server->nfs4_state);
+ nfs4_schedule_state_recovery(server->nfs_client);
break;
default:
if (nfs4_async_handle_error(task, server) == -EAGAIN) {
@@ -1268,7 +1275,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
BUG_ON(nd->intent.open.flags & O_CREAT);
}
- cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+ cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
if (IS_ERR(cred))
return (struct dentry *)cred;
state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
@@ -1291,7 +1298,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
struct rpc_cred *cred;
struct nfs4_state *state;
- cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+ cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
if (IS_ERR(cred))
return PTR_ERR(cred);
state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
@@ -1393,70 +1400,19 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
return err;
}
+/*
+ * get the file handle for the "/" directory on the server
+ */
static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+ struct nfs_fsinfo *info)
{
- struct nfs_fattr * fattr = info->fattr;
- unsigned char * p;
- struct qstr q;
- struct nfs4_lookup_arg args = {
- .dir_fh = fhandle,
- .name = &q,
- .bitmask = nfs4_fattr_bitmap,
- };
- struct nfs4_lookup_res res = {
- .server = server,
- .fattr = fattr,
- .fh = fhandle,
- };
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
- .rpc_argp = &args,
- .rpc_resp = &res,
- };
int status;
- /*
- * Now we do a separate LOOKUP for each component of the mount path.
- * The LOOKUPs are done separately so that we can conveniently
- * catch an ERR_WRONGSEC if it occurs along the way...
- */
status = nfs4_lookup_root(server, fhandle, info);
- if (status)
- goto out;
-
- p = server->mnt_path;
- for (;;) {
- struct nfs4_exception exception = { };
-
- while (*p == '/')
- p++;
- if (!*p)
- break;
- q.name = p;
- while (*p && (*p != '/'))
- p++;
- q.len = p - q.name;
-
- do {
- nfs_fattr_init(fattr);
- status = nfs4_handle_exception(server,
- rpc_call_sync(server->client, &msg, 0),
- &exception);
- } while (exception.retry);
- if (status == 0)
- continue;
- if (status == -ENOENT) {
- printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
- printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
- }
- break;
- }
if (status == 0)
status = nfs4_server_capabilities(server, fhandle);
if (status == 0)
status = nfs4_do_fsinfo(server, fhandle, info);
-out:
return nfs4_map_errors(status);
}
@@ -1565,7 +1521,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
nfs_fattr_init(fattr);
- cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+ cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
if (IS_ERR(cred))
return PTR_ERR(cred);
@@ -1583,6 +1539,52 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
return status;
}
+static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+ struct qstr *name, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ int status;
+ struct nfs4_lookup_arg args = {
+ .bitmask = server->attr_bitmask,
+ .dir_fh = dirfh,
+ .name = name,
+ };
+ struct nfs4_lookup_res res = {
+ .server = server,
+ .fattr = fattr,
+ .fh = fhandle,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+
+ nfs_fattr_init(fattr);
+
+ dprintk("NFS call lookupfh %s\n", name->name);
+ status = rpc_call_sync(server->client, &msg, 0);
+ dprintk("NFS reply lookupfh: %d\n", status);
+ if (status == -NFS4ERR_MOVED)
+ status = -EREMOTE;
+ return status;
+}
+
+static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+ struct qstr *name, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_proc_lookupfh(server, dirfh, name,
+ fhandle, fattr),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
@@ -1881,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
struct rpc_cred *cred;
int status = 0;
- cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+ cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
if (IS_ERR(cred)) {
status = PTR_ERR(cred);
goto out;
@@ -2089,24 +2091,24 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n
return err;
}
-static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
- struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
+ struct page *page, unsigned int len, struct iattr *sattr)
{
struct nfs_server *server = NFS_SERVER(dir);
- struct nfs_fattr dir_fattr;
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr, dir_fattr;
struct nfs4_create_arg arg = {
.dir_fh = NFS_FH(dir),
.server = server,
- .name = name,
+ .name = &dentry->d_name,
.attrs = sattr,
.ftype = NF4LNK,
.bitmask = server->attr_bitmask,
};
struct nfs4_create_res res = {
.server = server,
- .fh = fhandle,
- .fattr = fattr,
+ .fh = &fhandle,
+ .fattr = &fattr,
.dir_fattr = &dir_fattr,
};
struct rpc_message msg = {
@@ -2116,29 +2118,32 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
};
int status;
- if (path->len > NFS4_MAXPATHLEN)
+ if (len > NFS4_MAXPATHLEN)
return -ENAMETOOLONG;
- arg.u.symlink = path;
- nfs_fattr_init(fattr);
+
+ arg.u.symlink.pages = &page;
+ arg.u.symlink.len = len;
+ nfs_fattr_init(&fattr);
nfs_fattr_init(&dir_fattr);
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
- if (!status)
+ if (!status) {
update_changeattr(dir, &res.dir_cinfo);
- nfs_post_op_update_inode(dir, res.dir_fattr);
+ nfs_post_op_update_inode(dir, res.dir_fattr);
+ status = nfs_instantiate(dentry, &fhandle, &fattr);
+ }
return status;
}
-static int nfs4_proc_symlink(struct inode *dir, struct qstr *name,
- struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
+ struct page *page, unsigned int len, struct iattr *sattr)
{
struct nfs4_exception exception = { };
int err;
do {
err = nfs4_handle_exception(NFS_SERVER(dir),
- _nfs4_proc_symlink(dir, name, path, sattr,
- fhandle, fattr),
+ _nfs4_proc_symlink(dir, dentry, page,
+ len, sattr),
&exception);
} while (exception.retry);
return err;
@@ -2521,7 +2526,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
*/
static void nfs4_renew_done(struct rpc_task *task, void *data)
{
- struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
+ struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp;
unsigned long timestamp = (unsigned long)data;
if (task->tk_status < 0) {
@@ -2543,7 +2548,7 @@ static const struct rpc_call_ops nfs4_renew_ops = {
.rpc_call_done = nfs4_renew_done,
};
-int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2555,7 +2560,7 @@ int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
&nfs4_renew_ops, (void *)jiffies);
}
-int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2770,7 +2775,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
return -EOPNOTSUPP;
nfs_inode_return_delegation(inode);
buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
- ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+ ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (ret == 0)
nfs4_write_cached_acl(inode, buf, buflen);
return ret;
@@ -2791,7 +2796,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
static int
nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
{
- struct nfs4_client *clp = server->nfs4_state;
+ struct nfs_client *clp = server->nfs_client;
if (!clp || task->tk_status >= 0)
return 0;
@@ -2828,7 +2833,7 @@ static int nfs4_wait_bit_interruptible(void *word)
return 0;
}
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
{
sigset_t oldset;
int res;
@@ -2871,7 +2876,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
*/
int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
{
- struct nfs4_client *clp = server->nfs4_state;
+ struct nfs_client *clp = server->nfs_client;
int ret = errorcode;
exception->retry = 0;
@@ -2886,6 +2891,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
if (ret == 0)
exception->retry = 1;
break;
+ case -NFS4ERR_FILE_OPEN:
case -NFS4ERR_GRACE:
case -NFS4ERR_DELAY:
ret = nfs4_delay(server->client, &exception->timeout);
@@ -2898,7 +2904,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
return nfs4_map_errors(ret);
}
-int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
+int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
{
nfs4_verifier sc_verifier;
struct nfs4_setclientid setclientid = {
@@ -2922,7 +2928,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
for(;;) {
setclientid.sc_name_len = scnprintf(setclientid.sc_name,
sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
- clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr),
+ clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
cred->cr_ops->cr_name,
clp->cl_id_uniquifier);
setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
@@ -2945,7 +2951,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
return status;
}
-static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
{
struct nfs_fsinfo fsinfo;
struct rpc_message msg = {
@@ -2969,7 +2975,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cr
return status;
}
-int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
{
long timeout;
int err;
@@ -3077,7 +3083,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
switch (err) {
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
- nfs4_schedule_state_recovery(server->nfs4_state);
+ nfs4_schedule_state_recovery(server->nfs_client);
case 0:
return 0;
}
@@ -3106,7 +3112,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
{
struct inode *inode = state->inode;
struct nfs_server *server = NFS_SERVER(inode);
- struct nfs4_client *clp = server->nfs4_state;
+ struct nfs_client *clp = server->nfs_client;
struct nfs_lockt_args arg = {
.fh = NFS_FH(inode),
.fl = request,
@@ -3231,7 +3237,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
break;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
- nfs4_schedule_state_recovery(calldata->server->nfs4_state);
+ nfs4_schedule_state_recovery(calldata->server->nfs_client);
break;
default:
if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) {
@@ -3343,7 +3349,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
if (p->arg.lock_seqid == NULL)
goto out_free;
p->arg.lock_stateid = &lsp->ls_stateid;
- p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid;
+ p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
p->arg.lock_owner.id = lsp->ls_id;
p->lsp = lsp;
atomic_inc(&lsp->ls_count);
@@ -3513,7 +3519,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
- struct nfs4_client *clp = state->owner->so_client;
+ struct nfs_client *clp = state->owner->so_client;
unsigned char fl_flags = request->fl_flags;
int status;
@@ -3715,7 +3721,7 @@ static struct inode_operations nfs4_file_inode_operations = {
.listxattr = nfs4_listxattr,
};
-struct nfs_rpc_ops nfs_v4_clientops = {
+const struct nfs_rpc_ops nfs_v4_clientops = {
.version = 4, /* protocol version */
.dentry_ops = &nfs4_dentry_operations,
.dir_inode_ops = &nfs4_dir_inode_operations,
@@ -3723,6 +3729,7 @@ struct nfs_rpc_ops nfs_v4_clientops = {
.getroot = nfs4_proc_get_root,
.getattr = nfs4_proc_getattr,
.setattr = nfs4_proc_setattr,
+ .lookupfh = nfs4_proc_lookupfh,
.lookup = nfs4_proc_lookup,
.access = nfs4_proc_access,
.readlink = nfs4_proc_readlink,
@@ -3743,6 +3750,7 @@ struct nfs_rpc_ops nfs_v4_clientops = {
.statfs = nfs4_proc_statfs,
.fsinfo = nfs4_proc_fsinfo,
.pathconf = nfs4_proc_pathconf,
+ .set_capabilities = nfs4_server_capabilities,
.decode_dirent = nfs4_decode_dirent,
.read_setup = nfs4_proc_read_setup,
.read_done = nfs4_read_done,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 5d764d8e6d8a..7b6df1852e75 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -61,7 +61,7 @@
void
nfs4_renew_state(void *data)
{
- struct nfs4_client *clp = (struct nfs4_client *)data;
+ struct nfs_client *clp = (struct nfs_client *)data;
struct rpc_cred *cred;
long lease, timeout;
unsigned long last, now;
@@ -108,7 +108,7 @@ out:
/* Must be called with clp->cl_sem locked for writes */
void
-nfs4_schedule_state_renewal(struct nfs4_client *clp)
+nfs4_schedule_state_renewal(struct nfs_client *clp)
{
long timeout;
@@ -121,32 +121,20 @@ nfs4_schedule_state_renewal(struct nfs4_client *clp)
__FUNCTION__, (timeout + HZ - 1) / HZ);
cancel_delayed_work(&clp->cl_renewd);
schedule_delayed_work(&clp->cl_renewd, timeout);
+ set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
spin_unlock(&clp->cl_lock);
}
void
nfs4_renewd_prepare_shutdown(struct nfs_server *server)
{
- struct nfs4_client *clp = server->nfs4_state;
-
- if (!clp)
- return;
flush_scheduled_work();
- down_write(&clp->cl_sem);
- if (!list_empty(&server->nfs4_siblings))
- list_del_init(&server->nfs4_siblings);
- up_write(&clp->cl_sem);
}
-/* Must be called with clp->cl_sem locked for writes */
void
-nfs4_kill_renewd(struct nfs4_client *clp)
+nfs4_kill_renewd(struct nfs_client *clp)
{
down_read(&clp->cl_sem);
- if (!list_empty(&clp->cl_superblocks)) {
- up_read(&clp->cl_sem);
- return;
- }
cancel_delayed_work(&clp->cl_renewd);
up_read(&clp->cl_sem);
flush_scheduled_work();
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 090a36b07a22..5fffbdfa971f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -50,149 +50,15 @@
#include "nfs4_fs.h"
#include "callback.h"
#include "delegation.h"
+#include "internal.h"
#define OPENOWNER_POOL_SIZE 8
const nfs4_stateid zero_stateid;
-static DEFINE_SPINLOCK(state_spinlock);
static LIST_HEAD(nfs4_clientid_list);
-void
-init_nfsv4_state(struct nfs_server *server)
-{
- server->nfs4_state = NULL;
- INIT_LIST_HEAD(&server->nfs4_siblings);
-}
-
-void
-destroy_nfsv4_state(struct nfs_server *server)
-{
- kfree(server->mnt_path);
- server->mnt_path = NULL;
- if (server->nfs4_state) {
- nfs4_put_client(server->nfs4_state);
- server->nfs4_state = NULL;
- }
-}
-
-/*
- * nfs4_get_client(): returns an empty client structure
- * nfs4_put_client(): drops reference to client structure
- *
- * Since these are allocated/deallocated very rarely, we don't
- * bother putting them in a slab cache...
- */
-static struct nfs4_client *
-nfs4_alloc_client(struct in_addr *addr)
-{
- struct nfs4_client *clp;
-
- if (nfs_callback_up() < 0)
- return NULL;
- if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
- nfs_callback_down();
- return NULL;
- }
- memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
- init_rwsem(&clp->cl_sem);
- INIT_LIST_HEAD(&clp->cl_delegations);
- INIT_LIST_HEAD(&clp->cl_state_owners);
- INIT_LIST_HEAD(&clp->cl_unused);
- spin_lock_init(&clp->cl_lock);
- atomic_set(&clp->cl_count, 1);
- INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
- INIT_LIST_HEAD(&clp->cl_superblocks);
- rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
- clp->cl_rpcclient = ERR_PTR(-EINVAL);
- clp->cl_boot_time = CURRENT_TIME;
- clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
- return clp;
-}
-
-static void
-nfs4_free_client(struct nfs4_client *clp)
-{
- struct nfs4_state_owner *sp;
-
- while (!list_empty(&clp->cl_unused)) {
- sp = list_entry(clp->cl_unused.next,
- struct nfs4_state_owner,
- so_list);
- list_del(&sp->so_list);
- kfree(sp);
- }
- BUG_ON(!list_empty(&clp->cl_state_owners));
- nfs_idmap_delete(clp);
- if (!IS_ERR(clp->cl_rpcclient))
- rpc_shutdown_client(clp->cl_rpcclient);
- kfree(clp);
- nfs_callback_down();
-}
-
-static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
-{
- struct nfs4_client *clp;
- list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
- if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
- atomic_inc(&clp->cl_count);
- return clp;
- }
- }
- return NULL;
-}
-
-struct nfs4_client *nfs4_find_client(struct in_addr *addr)
-{
- struct nfs4_client *clp;
- spin_lock(&state_spinlock);
- clp = __nfs4_find_client(addr);
- spin_unlock(&state_spinlock);
- return clp;
-}
-
-struct nfs4_client *
-nfs4_get_client(struct in_addr *addr)
-{
- struct nfs4_client *clp, *new = NULL;
-
- spin_lock(&state_spinlock);
- for (;;) {
- clp = __nfs4_find_client(addr);
- if (clp != NULL)
- break;
- clp = new;
- if (clp != NULL) {
- list_add(&clp->cl_servers, &nfs4_clientid_list);
- new = NULL;
- break;
- }
- spin_unlock(&state_spinlock);
- new = nfs4_alloc_client(addr);
- spin_lock(&state_spinlock);
- if (new == NULL)
- break;
- }
- spin_unlock(&state_spinlock);
- if (new)
- nfs4_free_client(new);
- return clp;
-}
-
-void
-nfs4_put_client(struct nfs4_client *clp)
-{
- if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
- return;
- list_del(&clp->cl_servers);
- spin_unlock(&state_spinlock);
- BUG_ON(!list_empty(&clp->cl_superblocks));
- rpc_wake_up(&clp->cl_rpcwaitq);
- nfs4_kill_renewd(clp);
- nfs4_free_client(clp);
-}
-
-static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
+static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
{
int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
nfs_callback_tcpport, cred);
@@ -204,13 +70,13 @@ static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
}
u32
-nfs4_alloc_lockowner_id(struct nfs4_client *clp)
+nfs4_alloc_lockowner_id(struct nfs_client *clp)
{
return clp->cl_lockowner_id ++;
}
static struct nfs4_state_owner *
-nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
{
struct nfs4_state_owner *sp = NULL;
@@ -224,7 +90,7 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
return sp;
}
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
{
struct nfs4_state_owner *sp;
struct rpc_cred *cred = NULL;
@@ -238,7 +104,7 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
return cred;
}
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
{
struct nfs4_state_owner *sp;
@@ -251,7 +117,7 @@ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
}
static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
{
struct nfs4_state_owner *sp, *res = NULL;
@@ -294,7 +160,7 @@ nfs4_alloc_state_owner(void)
void
nfs4_drop_state_owner(struct nfs4_state_owner *sp)
{
- struct nfs4_client *clp = sp->so_client;
+ struct nfs_client *clp = sp->so_client;
spin_lock(&clp->cl_lock);
list_del_init(&sp->so_list);
spin_unlock(&clp->cl_lock);
@@ -306,7 +172,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
*/
struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
{
- struct nfs4_client *clp = server->nfs4_state;
+ struct nfs_client *clp = server->nfs_client;
struct nfs4_state_owner *sp, *new;
get_rpccred(cred);
@@ -337,7 +203,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
*/
void nfs4_put_state_owner(struct nfs4_state_owner *sp)
{
- struct nfs4_client *clp = sp->so_client;
+ struct nfs_client *clp = sp->so_client;
struct rpc_cred *cred = sp->so_cred;
if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
@@ -540,7 +406,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
{
struct nfs4_lock_state *lsp;
- struct nfs4_client *clp = state->owner->so_client;
+ struct nfs_client *clp = state->owner->so_client;
lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
if (lsp == NULL)
@@ -752,7 +618,7 @@ out:
static int reclaimer(void *);
-static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
+static inline void nfs4_clear_recover_bit(struct nfs_client *clp)
{
smp_mb__before_clear_bit();
clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state);
@@ -764,25 +630,25 @@ static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
/*
* State recovery routine
*/
-static void nfs4_recover_state(struct nfs4_client *clp)
+static void nfs4_recover_state(struct nfs_client *clp)
{
struct task_struct *task;
__module_get(THIS_MODULE);
atomic_inc(&clp->cl_count);
task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim",
- NIPQUAD(clp->cl_addr));
+ NIPQUAD(clp->cl_addr.sin_addr));
if (!IS_ERR(task))
return;
nfs4_clear_recover_bit(clp);
- nfs4_put_client(clp);
+ nfs_put_client(clp);
module_put(THIS_MODULE);
}
/*
* Schedule a state recovery attempt
*/
-void nfs4_schedule_state_recovery(struct nfs4_client *clp)
+void nfs4_schedule_state_recovery(struct nfs_client *clp)
{
if (!clp)
return;
@@ -879,7 +745,7 @@ out_err:
return status;
}
-static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
+static void nfs4_state_mark_reclaim(struct nfs_client *clp)
{
struct nfs4_state_owner *sp;
struct nfs4_state *state;
@@ -903,7 +769,7 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
static int reclaimer(void *ptr)
{
- struct nfs4_client *clp = ptr;
+ struct nfs_client *clp = ptr;
struct nfs4_state_owner *sp;
struct nfs4_state_recovery_ops *ops;
struct rpc_cred *cred;
@@ -970,12 +836,12 @@ out:
if (status == -NFS4ERR_CB_PATH_DOWN)
nfs_handle_cb_pathdown(clp);
nfs4_clear_recover_bit(clp);
- nfs4_put_client(clp);
+ nfs_put_client(clp);
module_put_and_exit(0);
return 0;
out_error:
printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
- NIPQUAD(clp->cl_addr.s_addr), -status);
+ NIPQUAD(clp->cl_addr.sin_addr), -status);
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
goto out;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 730ec8fb31c6..3dd413f52da1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -58,7 +58,7 @@
/* Mapping from NFS error code to "errno" error code. */
#define errno_NFSERR_IO EIO
-static int nfs_stat_to_errno(int);
+static int nfs4_stat_to_errno(int);
/* NFSv4 COMPOUND tags are only wanted for debugging purposes */
#ifdef DEBUG
@@ -128,7 +128,7 @@ static int nfs_stat_to_errno(int);
#define decode_link_maxsz (op_decode_hdr_maxsz + 5)
#define encode_symlink_maxsz (op_encode_hdr_maxsz + \
1 + nfs4_name_maxsz + \
- nfs4_path_maxsz + \
+ 1 + \
nfs4_fattr_maxsz)
#define decode_symlink_maxsz (op_decode_hdr_maxsz + 8)
#define encode_create_maxsz (op_encode_hdr_maxsz + \
@@ -529,7 +529,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
if (iap->ia_valid & ATTR_MODE)
len += 4;
if (iap->ia_valid & ATTR_UID) {
- owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name);
+ owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
if (owner_namelen < 0) {
printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
iap->ia_uid);
@@ -541,7 +541,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
}
if (iap->ia_valid & ATTR_GID) {
- owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group);
+ owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
if (owner_grouplen < 0) {
printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
iap->ia_gid);
@@ -673,9 +673,9 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c
switch (create->ftype) {
case NF4LNK:
- RESERVE_SPACE(4 + create->u.symlink->len);
- WRITE32(create->u.symlink->len);
- WRITEMEM(create->u.symlink->name, create->u.symlink->len);
+ RESERVE_SPACE(4);
+ WRITE32(create->u.symlink.len);
+ xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
break;
case NF4BLK: case NF4CHR:
@@ -1160,7 +1160,7 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con
return 0;
}
-static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid)
+static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid)
{
uint32_t *p;
@@ -1246,7 +1246,7 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien
return 0;
}
-static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state)
+static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state)
{
uint32_t *p;
@@ -1945,7 +1945,7 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const str
/*
* a RENEW request
*/
-static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
{
struct xdr_stream xdr;
struct compound_hdr hdr = {
@@ -1975,7 +1975,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nf
/*
* a SETCLIENTID_CONFIRM request
*/
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
{
struct xdr_stream xdr;
struct compound_hdr hdr = {
@@ -2127,12 +2127,12 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
}
READ32(nfserr);
if (nfserr != NFS_OK)
- return -nfs_stat_to_errno(nfserr);
+ return -nfs4_stat_to_errno(nfserr);
return 0;
}
/* Dummy routine */
-static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp)
+static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
{
uint32_t *p;
unsigned int strlen;
@@ -2636,7 +2636,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
return 0;
}
-static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid)
+static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid)
{
uint32_t len, *p;
@@ -2660,7 +2660,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
return 0;
}
-static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid)
+static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid)
{
uint32_t len, *p;
@@ -3051,9 +3051,9 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
fattr->mode |= fmode;
if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
goto xdr_error;
- if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0)
+ if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0)
goto xdr_error;
- if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0)
+ if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0)
goto xdr_error;
if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
goto xdr_error;
@@ -3254,7 +3254,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
if (decode_space_limit(xdr, &res->maxsize) < 0)
return -EIO;
}
- return decode_ace(xdr, NULL, res->server->nfs4_state);
+ return decode_ace(xdr, NULL, res->server->nfs_client);
}
static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3565,7 +3565,7 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
return 0;
}
-static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
+static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
{
uint32_t *p;
uint32_t opnum;
@@ -3598,7 +3598,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
READ_BUF(len);
return -NFSERR_CLID_INUSE;
} else
- return -nfs_stat_to_errno(nfserr);
+ return -nfs4_stat_to_errno(nfserr);
return 0;
}
@@ -4256,7 +4256,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsi
if (!status)
status = decode_fsinfo(&xdr, fsinfo);
if (!status)
- status = -nfs_stat_to_errno(hdr.status);
+ status = -nfs4_stat_to_errno(hdr.status);
return status;
}
@@ -4335,7 +4335,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
* a SETCLIENTID request
*/
static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
- struct nfs4_client *clp)
+ struct nfs_client *clp)
{
struct xdr_stream xdr;
struct compound_hdr hdr;
@@ -4346,7 +4346,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
if (!status)
status = decode_setclientid(&xdr, clp);
if (!status)
- status = -nfs_stat_to_errno(hdr.status);
+ status = -nfs4_stat_to_errno(hdr.status);
return status;
}
@@ -4368,7 +4368,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s
if (!status)
status = decode_fsinfo(&xdr, fsinfo);
if (!status)
- status = -nfs_stat_to_errno(hdr.status);
+ status = -nfs4_stat_to_errno(hdr.status);
return status;
}
@@ -4521,7 +4521,7 @@ static struct {
* This one is used jointly by NFSv2 and NFSv3.
*/
static int
-nfs_stat_to_errno(int stat)
+nfs4_stat_to_errno(int stat)
{
int i;
for (i = 0; nfs_errtbl[i].stat != -1; i++) {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 36e902a88ca1..829af323f288 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -392,7 +392,6 @@ int __init nfs_init_nfspagecache(void)
void nfs_destroy_nfspagecache(void)
{
- if (kmem_cache_destroy(nfs_page_cachep))
- printk(KERN_INFO "nfs_page: not all structures were freed\n");
+ kmem_cache_destroy(nfs_page_cachep);
}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b3899ea3229e..4529cc4f3f8f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -66,14 +66,14 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
dprintk("%s: call getattr\n", __FUNCTION__);
nfs_fattr_init(fattr);
- status = rpc_call_sync(server->client_sys, &msg, 0);
+ status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
if (status)
return status;
dprintk("%s: call statfs\n", __FUNCTION__);
msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
msg.rpc_resp = &fsinfo;
- status = rpc_call_sync(server->client_sys, &msg, 0);
+ status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
if (status)
return status;
@@ -352,7 +352,7 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *
{
struct nfs_diropargs *arg;
- arg = (struct nfs_diropargs *)kmalloc(sizeof(*arg), GFP_KERNEL);
+ arg = kmalloc(sizeof(*arg), GFP_KERNEL);
if (!arg)
return -ENOMEM;
arg->fh = NFS_FH(dir->d_inode);
@@ -425,16 +425,17 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
}
static int
-nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
- struct iattr *sattr, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+ unsigned int len, struct iattr *sattr)
{
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr;
struct nfs_symlinkargs arg = {
.fromfh = NFS_FH(dir),
- .fromname = name->name,
- .fromlen = name->len,
- .topath = path->name,
- .tolen = path->len,
+ .fromname = dentry->d_name.name,
+ .fromlen = dentry->d_name.len,
+ .pages = &page,
+ .pathlen = len,
.sattr = sattr
};
struct rpc_message msg = {
@@ -443,13 +444,25 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
};
int status;
- if (path->len > NFS2_MAXPATHLEN)
+ if (len > NFS2_MAXPATHLEN)
return -ENAMETOOLONG;
- dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
- nfs_fattr_init(fattr);
- fhandle->size = 0;
+
+ dprintk("NFS call symlink %s\n", dentry->d_name.name);
+
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
+
+ /*
+ * V2 SYMLINK requests don't return any attributes. Setting the
+ * filehandle size to zero indicates to nfs_instantiate that it
+ * should fill in the data with a LOOKUP call on the wire.
+ */
+ if (status == 0) {
+ nfs_fattr_init(&fattr);
+ fhandle.size = 0;
+ status = nfs_instantiate(dentry, &fhandle, &fattr);
+ }
+
dprintk("NFS reply symlink: %d\n", status);
return status;
}
@@ -671,7 +684,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
}
-struct nfs_rpc_ops nfs_v2_clientops = {
+const struct nfs_rpc_ops nfs_v2_clientops = {
.version = 2, /* protocol version */
.dentry_ops = &nfs_dentry_operations,
.dir_inode_ops = &nfs_dir_inode_operations,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f0aff824a291..c2e49c397a27 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -171,7 +171,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
rdata->args.offset = page_offset(page) + rdata->args.pgbase;
dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
- NFS_SERVER(inode)->hostname,
+ NFS_SERVER(inode)->nfs_client->cl_hostname,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
(unsigned long long)rdata->args.pgbase,
@@ -568,8 +568,13 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
- /* Is this a short read? */
- if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+ if (task->tk_status < 0) {
+ if (task->tk_status == -ESTALE) {
+ set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+ nfs_mark_for_revalidate(data->inode);
+ }
+ } else if (resp->count < argp->count && !resp->eof) {
+ /* This is a short read! */
nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
if (resp->count != 0) {
@@ -616,6 +621,10 @@ int nfs_readpage(struct file *file, struct page *page)
if (error)
goto out_error;
+ error = -ESTALE;
+ if (NFS_STALE(inode))
+ goto out_error;
+
if (file == NULL) {
ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
if (ctx == NULL)
@@ -678,7 +687,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
};
struct inode *inode = mapping->host;
struct nfs_server *server = NFS_SERVER(inode);
- int ret;
+ int ret = -ESTALE;
dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
inode->i_sb->s_id,
@@ -686,6 +695,9 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
nr_pages);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
+ if (NFS_STALE(inode))
+ goto out;
+
if (filp == NULL) {
desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
if (desc.ctx == NULL)
@@ -701,6 +713,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
ret = err;
}
put_nfs_open_context(desc.ctx);
+out:
return ret;
}
@@ -724,6 +737,5 @@ int __init nfs_init_readpagecache(void)
void nfs_destroy_readpagecache(void)
{
mempool_destroy(nfs_rdata_mempool);
- if (kmem_cache_destroy(nfs_rdata_cachep))
- printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
+ kmem_cache_destroy(nfs_rdata_cachep);
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e8a9bee74d9d..e8d40030cab4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -13,6 +13,11 @@
*
* Split from inode.c by David Howells <dhowells@redhat.com>
*
+ * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a
+ * particular server are held in the same superblock
+ * - NFS superblocks can have several effective roots to the dentry tree
+ * - directory type roots are spliced into the tree when a path from one root reaches the root
+ * of another (see nfs_lookup())
*/
#include <linux/config.h>
@@ -52,66 +57,12 @@
#define NFSDBG_FACILITY NFSDBG_VFS
-/* Maximum number of readahead requests
- * FIXME: this should really be a sysctl so that users may tune it to suit
- * their needs. People that do NFS over a slow network, might for
- * instance want to reduce it to something closer to 1 for improved
- * interactive response.
- */
-#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
-
-/*
- * RPC cruft for NFS
- */
-static struct rpc_version * nfs_version[] = {
- NULL,
- NULL,
- &nfs_version2,
-#if defined(CONFIG_NFS_V3)
- &nfs_version3,
-#elif defined(CONFIG_NFS_V4)
- NULL,
-#endif
-#if defined(CONFIG_NFS_V4)
- &nfs_version4,
-#endif
-};
-
-static struct rpc_program nfs_program = {
- .name = "nfs",
- .number = NFS_PROGRAM,
- .nrvers = ARRAY_SIZE(nfs_version),
- .version = nfs_version,
- .stats = &nfs_rpcstat,
- .pipe_dir_name = "/nfs",
-};
-
-struct rpc_stat nfs_rpcstat = {
- .program = &nfs_program
-};
-
-
-#ifdef CONFIG_NFS_V3_ACL
-static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
-static struct rpc_version * nfsacl_version[] = {
- [3] = &nfsacl_version3,
-};
-
-struct rpc_program nfsacl_program = {
- .name = "nfsacl",
- .number = NFS_ACL_PROGRAM,
- .nrvers = ARRAY_SIZE(nfsacl_version),
- .version = nfsacl_version,
- .stats = &nfsacl_rpcstat,
-};
-#endif /* CONFIG_NFS_V3_ACL */
-
static void nfs_umount_begin(struct vfsmount *, int);
static int nfs_statfs(struct dentry *, struct kstatfs *);
static int nfs_show_options(struct seq_file *, struct vfsmount *);
static int nfs_show_stats(struct seq_file *, struct vfsmount *);
static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
+static int nfs_xdev_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
static void nfs_kill_super(struct super_block *);
@@ -120,15 +71,15 @@ static struct file_system_type nfs_fs_type = {
.name = "nfs",
.get_sb = nfs_get_sb,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
-struct file_system_type clone_nfs_fs_type = {
+struct file_system_type nfs_xdev_fs_type = {
.owner = THIS_MODULE,
.name = "nfs",
- .get_sb = nfs_clone_nfs_sb,
+ .get_sb = nfs_xdev_get_sb,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
static struct super_operations nfs_sops = {
@@ -145,10 +96,10 @@ static struct super_operations nfs_sops = {
#ifdef CONFIG_NFS_V4
static int nfs4_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_referral_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
static void nfs4_kill_super(struct super_block *sb);
static struct file_system_type nfs4_fs_type = {
@@ -156,23 +107,23 @@ static struct file_system_type nfs4_fs_type = {
.name = "nfs4",
.get_sb = nfs4_get_sb,
.kill_sb = nfs4_kill_super,
- .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
-struct file_system_type clone_nfs4_fs_type = {
+struct file_system_type nfs4_xdev_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
- .get_sb = nfs_clone_nfs4_sb,
+ .get_sb = nfs4_xdev_get_sb,
.kill_sb = nfs4_kill_super,
- .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
-struct file_system_type nfs_referral_nfs4_fs_type = {
+struct file_system_type nfs4_referral_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
- .get_sb = nfs_referral_nfs4_sb,
+ .get_sb = nfs4_referral_get_sb,
.kill_sb = nfs4_kill_super,
- .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
static struct super_operations nfs4_sops = {
@@ -187,39 +138,7 @@ static struct super_operations nfs4_sops = {
};
#endif
-#ifdef CONFIG_NFS_V4
-static const int nfs_set_port_min = 0;
-static const int nfs_set_port_max = 65535;
-
-static int param_set_port(const char *val, struct kernel_param *kp)
-{
- char *endp;
- int num = simple_strtol(val, &endp, 0);
- if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
- return -EINVAL;
- *((int *)kp->arg) = num;
- return 0;
-}
-
-module_param_call(callback_tcpport, param_set_port, param_get_int,
- &nfs_callback_set_tcpport, 0644);
-#endif
-
-#ifdef CONFIG_NFS_V4
-static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
-{
- char *endp;
- int num = simple_strtol(val, &endp, 0);
- int jif = num * HZ;
- if (endp == val || *endp || num < 0 || jif < num)
- return -EINVAL;
- *((int *)kp->arg) = jif;
- return 0;
-}
-
-module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
- &nfs_idmap_cache_timeout, 0644);
-#endif
+static struct shrinker *acl_shrinker;
/*
* Register the NFS filesystems
@@ -240,6 +159,7 @@ int __init register_nfs_fs(void)
if (ret < 0)
goto error_2;
#endif
+ acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
return 0;
#ifdef CONFIG_NFS_V4
@@ -257,6 +177,8 @@ error_0:
*/
void __exit unregister_nfs_fs(void)
{
+ if (acl_shrinker != NULL)
+ remove_shrinker(acl_shrinker);
#ifdef CONFIG_NFS_V4
unregister_filesystem(&nfs4_fs_type);
nfs_unregister_sysctl();
@@ -269,11 +191,10 @@ void __exit unregister_nfs_fs(void)
*/
static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
- struct super_block *sb = dentry->d_sb;
- struct nfs_server *server = NFS_SB(sb);
+ struct nfs_server *server = NFS_SB(dentry->d_sb);
unsigned char blockbits;
unsigned long blockres;
- struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
+ struct nfs_fh *fh = NFS_FH(dentry->d_inode);
struct nfs_fattr fattr;
struct nfs_fsstat res = {
.fattr = &fattr,
@@ -282,7 +203,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
lock_kernel();
- error = server->rpc_ops->statfs(server, rootfh, &res);
+ error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
buf->f_type = NFS_SUPER_MAGIC;
if (error < 0)
goto out_err;
@@ -292,7 +213,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
* case where f_frsize != f_bsize. Eventually we want to
* report the value of wtmult in this field.
*/
- buf->f_frsize = sb->s_blocksize;
+ buf->f_frsize = dentry->d_sb->s_blocksize;
/*
* On most *nix systems, f_blocks, f_bfree, and f_bavail
@@ -301,8 +222,8 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
* thus historically Linux's sys_statfs reports these
* fields in units of f_bsize.
*/
- buf->f_bsize = sb->s_blocksize;
- blockbits = sb->s_blocksize_bits;
+ buf->f_bsize = dentry->d_sb->s_blocksize;
+ blockbits = dentry->d_sb->s_blocksize_bits;
blockres = (1 << blockbits) - 1;
buf->f_blocks = (res.tbytes + blockres) >> blockbits;
buf->f_bfree = (res.fbytes + blockres) >> blockbits;
@@ -323,9 +244,12 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
}
+/*
+ * Map the security flavour number to a name
+ */
static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
{
- static struct {
+ static const struct {
rpc_authflavor_t flavour;
const char *str;
} sec_flavours[] = {
@@ -356,10 +280,10 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
*/
static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
{
- static struct proc_nfs_info {
+ static const struct proc_nfs_info {
int flag;
- char *str;
- char *nostr;
+ const char *str;
+ const char *nostr;
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", ",hard" },
{ NFS_MOUNT_INTR, ",intr", "" },
@@ -369,11 +293,12 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
{ NFS_MOUNT_NOACL, ",noacl", "" },
{ 0, NULL, NULL }
};
- struct proc_nfs_info *nfs_infop;
+ const struct proc_nfs_info *nfs_infop;
+ struct nfs_client *clp = nfss->nfs_client;
char buf[12];
- char *proto;
+ const char *proto;
- seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
+ seq_printf(m, ",vers=%d", clp->rpc_ops->version);
seq_printf(m, ",rsize=%d", nfss->rsize);
seq_printf(m, ",wsize=%d", nfss->wsize);
if (nfss->acregmin != 3*HZ || showdefaults)
@@ -402,8 +327,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
proto = buf;
}
seq_printf(m, ",proto=%s", proto);
- seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
- seq_printf(m, ",retrans=%u", nfss->retrans_count);
+ seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
+ seq_printf(m, ",retrans=%u", clp->retrans_count);
seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
}
@@ -417,7 +342,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
nfs_show_mount_options(m, nfss, 0);
seq_puts(m, ",addr=");
- seq_escape(m, nfss->hostname, " \t\n\\");
+ seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\");
return 0;
}
@@ -454,7 +379,7 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
seq_printf(m, ",namelen=%d", nfss->namelen);
#ifdef CONFIG_NFS_V4
- if (nfss->rpc_ops->version == 4) {
+ if (nfss->nfs_client->cl_nfsversion == 4) {
seq_printf(m, "\n\tnfsv4:\t");
seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
@@ -501,782 +426,353 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
/*
* Begin unmount by attempting to remove all automounted mountpoints we added
- * in response to traversals
+ * in response to xdev traversals and referrals
*/
static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
{
- struct nfs_server *server;
- struct rpc_clnt *rpc;
-
shrink_submounts(vfsmnt, &nfs_automount_list);
- if (!(flags & MNT_FORCE))
- return;
- /* -EIO all pending I/O */
- server = NFS_SB(vfsmnt->mnt_sb);
- rpc = server->client;
- if (!IS_ERR(rpc))
- rpc_killall_tasks(rpc);
- rpc = server->client_acl;
- if (!IS_ERR(rpc))
- rpc_killall_tasks(rpc);
}
/*
- * Obtain the root inode of the file system.
+ * Validate the NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
*/
-static struct inode *
-nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
+static int nfs_validate_mount_data(struct nfs_mount_data *data,
+ struct nfs_fh *mntfh)
{
- struct nfs_server *server = NFS_SB(sb);
- int error;
-
- error = server->rpc_ops->getroot(server, rootfh, fsinfo);
- if (error < 0) {
- dprintk("nfs_get_root: getattr error = %d\n", -error);
- return ERR_PTR(error);
+ if (data == NULL) {
+ dprintk("%s: missing data argument\n", __FUNCTION__);
+ return -EINVAL;
}
- server->fsid = fsinfo->fattr->fsid;
- return nfs_fhget(sb, rootfh, fsinfo->fattr);
-}
-
-/*
- * Do NFS version-independent mount processing, and sanity checking
- */
-static int
-nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
-{
- struct nfs_server *server;
- struct inode *root_inode;
- struct nfs_fattr fattr;
- struct nfs_fsinfo fsinfo = {
- .fattr = &fattr,
- };
- struct nfs_pathconf pathinfo = {
- .fattr = &fattr,
- };
- int no_root_error = 0;
- unsigned long max_rpc_payload;
-
- /* We probably want something more informative here */
- snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
-
- server = NFS_SB(sb);
+ if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
+ dprintk("%s: bad mount version\n", __FUNCTION__);
+ return -EINVAL;
+ }
- sb->s_magic = NFS_SUPER_MAGIC;
+ switch (data->version) {
+ case 1:
+ data->namlen = 0;
+ case 2:
+ data->bsize = 0;
+ case 3:
+ if (data->flags & NFS_MOUNT_VER3) {
+ dprintk("%s: mount structure version %d does not support NFSv3\n",
+ __FUNCTION__,
+ data->version);
+ return -EINVAL;
+ }
+ data->root.size = NFS2_FHSIZE;
+ memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+ case 4:
+ if (data->flags & NFS_MOUNT_SECFLAVOUR) {
+ dprintk("%s: mount structure version %d does not support strong security\n",
+ __FUNCTION__,
+ data->version);
+ return -EINVAL;
+ }
+ case 5:
+ memset(data->context, 0, sizeof(data->context));
+ }
- server->io_stats = nfs_alloc_iostats();
- if (server->io_stats == NULL)
- return -ENOMEM;
+ /* Set the pseudoflavor */
+ if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+ data->pseudoflavor = RPC_AUTH_UNIX;
- root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
- /* Did getting the root inode fail? */
- if (IS_ERR(root_inode)) {
- no_root_error = PTR_ERR(root_inode);
- goto out_no_root;
- }
- sb->s_root = d_alloc_root(root_inode);
- if (!sb->s_root) {
- no_root_error = -ENOMEM;
- goto out_no_root;
+#ifndef CONFIG_NFS_V3
+ /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
+ if (data->flags & NFS_MOUNT_VER3) {
+ dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
+ return -EPROTONOSUPPORT;
}
- sb->s_root->d_op = server->rpc_ops->dentry_ops;
-
- /* mount time stamp, in seconds */
- server->mount_time = jiffies;
-
- /* Get some general file system info */
- if (server->namelen == 0 &&
- server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
- server->namelen = pathinfo.max_namelen;
- /* Work out a lot of parameters */
- if (server->rsize == 0)
- server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
- if (server->wsize == 0)
- server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
-
- if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
- server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
- if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
- server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
-
- max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
- if (server->rsize > max_rpc_payload)
- server->rsize = max_rpc_payload;
- if (server->rsize > NFS_MAX_FILE_IO_SIZE)
- server->rsize = NFS_MAX_FILE_IO_SIZE;
- server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
- if (server->wsize > max_rpc_payload)
- server->wsize = max_rpc_payload;
- if (server->wsize > NFS_MAX_FILE_IO_SIZE)
- server->wsize = NFS_MAX_FILE_IO_SIZE;
- server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+#endif /* CONFIG_NFS_V3 */
- if (sb->s_blocksize == 0)
- sb->s_blocksize = nfs_block_bits(server->wsize,
- &sb->s_blocksize_bits);
- server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL);
-
- server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
- if (server->dtsize > PAGE_CACHE_SIZE)
- server->dtsize = PAGE_CACHE_SIZE;
- if (server->dtsize > server->rsize)
- server->dtsize = server->rsize;
-
- if (server->flags & NFS_MOUNT_NOAC) {
- server->acregmin = server->acregmax = 0;
- server->acdirmin = server->acdirmax = 0;
- sb->s_flags |= MS_SYNCHRONOUS;
+ /* We now require that the mount process passes the remote address */
+ if (data->addr.sin_addr.s_addr == INADDR_ANY) {
+ dprintk("%s: mount program didn't pass remote address!\n",
+ __FUNCTION__);
+ return -EINVAL;
}
- server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
- nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
+ /* Prepare the root filehandle */
+ if (data->flags & NFS_MOUNT_VER3)
+ mntfh->size = data->root.size;
+ else
+ mntfh->size = NFS2_FHSIZE;
+
+ if (mntfh->size > sizeof(mntfh->data)) {
+ dprintk("%s: invalid root filehandle\n", __FUNCTION__);
+ return -EINVAL;
+ }
- server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
- server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
+ memcpy(mntfh->data, data->root.data, mntfh->size);
+ if (mntfh->size < sizeof(mntfh->data))
+ memset(mntfh->data + mntfh->size, 0,
+ sizeof(mntfh->data) - mntfh->size);
- /* We're airborne Set socket buffersize */
- rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
return 0;
- /* Yargs. It didn't work out. */
-out_no_root:
- dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
- if (!IS_ERR(root_inode))
- iput(root_inode);
- return no_root_error;
}
/*
- * Initialise the timeout values for a connection
+ * Initialise the common bits of the superblock
*/
-static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans)
+static inline void nfs_initialise_sb(struct super_block *sb)
{
- to->to_initval = timeo * HZ / 10;
- to->to_retries = retrans;
- if (!to->to_retries)
- to->to_retries = 2;
-
- switch (proto) {
- case IPPROTO_TCP:
- if (!to->to_initval)
- to->to_initval = 60 * HZ;
- if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
- to->to_initval = NFS_MAX_TCP_TIMEOUT;
- to->to_increment = to->to_initval;
- to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
- to->to_exponential = 0;
- break;
- case IPPROTO_UDP:
- default:
- if (!to->to_initval)
- to->to_initval = 11 * HZ / 10;
- if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
- to->to_initval = NFS_MAX_UDP_TIMEOUT;
- to->to_maxval = NFS_MAX_UDP_TIMEOUT;
- to->to_exponential = 1;
- break;
- }
-}
+ struct nfs_server *server = NFS_SB(sb);
-/*
- * Create an RPC client handle.
- */
-static struct rpc_clnt *
-nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
-{
- struct rpc_timeout timeparms;
- struct rpc_xprt *xprt = NULL;
- struct rpc_clnt *clnt = NULL;
- int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
-
- nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
-
- server->retrans_timeo = timeparms.to_initval;
- server->retrans_count = timeparms.to_retries;
-
- /* create transport and client */
- xprt = xprt_create_proto(proto, &server->addr, &timeparms);
- if (IS_ERR(xprt)) {
- dprintk("%s: cannot create RPC transport. Error = %ld\n",
- __FUNCTION__, PTR_ERR(xprt));
- return (struct rpc_clnt *)xprt;
- }
- clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
- server->rpc_ops->version, data->pseudoflavor);
- if (IS_ERR(clnt)) {
- dprintk("%s: cannot create RPC client. Error = %ld\n",
- __FUNCTION__, PTR_ERR(xprt));
- goto out_fail;
- }
+ sb->s_magic = NFS_SUPER_MAGIC;
- clnt->cl_intr = 1;
- clnt->cl_softrtry = 1;
+ /* We probably want something more informative here */
+ snprintf(sb->s_id, sizeof(sb->s_id),
+ "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
+
+ if (sb->s_blocksize == 0)
+ sb->s_blocksize = nfs_block_bits(server->wsize,
+ &sb->s_blocksize_bits);
- return clnt;
+ if (server->flags & NFS_MOUNT_NOAC)
+ sb->s_flags |= MS_SYNCHRONOUS;
-out_fail:
- return clnt;
+ nfs_super_set_maxbytes(sb, server->maxfilesize);
}
/*
- * Clone a server record
+ * Finish setting up an NFS2/3 superblock
*/
-static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data)
+static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data)
{
struct nfs_server *server = NFS_SB(sb);
- struct nfs_server *parent = NFS_SB(data->sb);
- struct inode *root_inode;
- struct nfs_fsinfo fsinfo;
- void *err = ERR_PTR(-ENOMEM);
-
- sb->s_op = data->sb->s_op;
- sb->s_blocksize = data->sb->s_blocksize;
- sb->s_blocksize_bits = data->sb->s_blocksize_bits;
- sb->s_maxbytes = data->sb->s_maxbytes;
-
- server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
- server->io_stats = nfs_alloc_iostats();
- if (server->io_stats == NULL)
- goto out;
-
- server->client = rpc_clone_client(parent->client);
- if (IS_ERR((err = server->client)))
- goto out;
-
- if (!IS_ERR(parent->client_sys)) {
- server->client_sys = rpc_clone_client(parent->client_sys);
- if (IS_ERR((err = server->client_sys)))
- goto out;
- }
- if (!IS_ERR(parent->client_acl)) {
- server->client_acl = rpc_clone_client(parent->client_acl);
- if (IS_ERR((err = server->client_acl)))
- goto out;
- }
- root_inode = nfs_fhget(sb, data->fh, data->fattr);
- if (!root_inode)
- goto out;
- sb->s_root = d_alloc_root(root_inode);
- if (!sb->s_root)
- goto out_put_root;
- fsinfo.fattr = data->fattr;
- if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
- nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
- sb->s_root->d_op = server->rpc_ops->dentry_ops;
- sb->s_flags |= MS_ACTIVE;
- return server;
-out_put_root:
- iput(root_inode);
-out:
- return err;
-}
-/*
- * Copy an existing superblock and attach revised data
- */
-static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
- struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *),
- struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *),
- struct vfsmount *mnt)
-{
- struct nfs_server *server;
- struct nfs_server *parent = NFS_SB(data->sb);
- struct super_block *sb = ERR_PTR(-EINVAL);
- char *hostname;
- int error = -ENOMEM;
- int len;
-
- server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
- if (server == NULL)
- goto out_err;
- memcpy(server, parent, sizeof(*server));
- hostname = (data->hostname != NULL) ? data->hostname : parent->hostname;
- len = strlen(hostname) + 1;
- server->hostname = kmalloc(len, GFP_KERNEL);
- if (server->hostname == NULL)
- goto free_server;
- memcpy(server->hostname, hostname, len);
- error = rpciod_up();
- if (error != 0)
- goto free_hostname;
-
- sb = fill_sb(server, data);
- if (IS_ERR(sb)) {
- error = PTR_ERR(sb);
- goto kill_rpciod;
- }
-
- if (sb->s_root)
- goto out_rpciod_down;
+ sb->s_blocksize_bits = 0;
+ sb->s_blocksize = 0;
+ if (data->bsize)
+ sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
- server = fill_server(sb, data);
- if (IS_ERR(server)) {
- error = PTR_ERR(server);
- goto out_deactivate;
+ if (server->flags & NFS_MOUNT_VER3) {
+ /* The VFS shouldn't apply the umask to mode bits. We will do
+ * so ourselves when necessary.
+ */
+ sb->s_flags |= MS_POSIXACL;
+ sb->s_time_gran = 1;
}
- return simple_set_mnt(mnt, sb);
-out_deactivate:
- up_write(&sb->s_umount);
- deactivate_super(sb);
- return error;
-out_rpciod_down:
- rpciod_down();
- kfree(server->hostname);
- kfree(server);
- return simple_set_mnt(mnt, sb);
-kill_rpciod:
- rpciod_down();
-free_hostname:
- kfree(server->hostname);
-free_server:
- kfree(server);
-out_err:
- return error;
+
+ sb->s_op = &nfs_sops;
+ nfs_initialise_sb(sb);
}
/*
- * Set up an NFS2/3 superblock
- *
- * The way this works is that the mount process passes a structure
- * in the data argument which contains the server's IP address
- * and the root file handle obtained from the server's mount
- * daemon. We stash these away in the private superblock fields.
+ * Finish setting up a cloned NFS2/3 superblock
*/
-static int
-nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
+static void nfs_clone_super(struct super_block *sb,
+ const struct super_block *old_sb)
{
- struct nfs_server *server;
- rpc_authflavor_t authflavor;
+ struct nfs_server *server = NFS_SB(sb);
- server = NFS_SB(sb);
- sb->s_blocksize_bits = 0;
- sb->s_blocksize = 0;
- if (data->bsize)
- sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
- if (data->rsize)
- server->rsize = nfs_block_size(data->rsize, NULL);
- if (data->wsize)
- server->wsize = nfs_block_size(data->wsize, NULL);
- server->flags = data->flags & NFS_MOUNT_FLAGMASK;
-
- server->acregmin = data->acregmin*HZ;
- server->acregmax = data->acregmax*HZ;
- server->acdirmin = data->acdirmin*HZ;
- server->acdirmax = data->acdirmax*HZ;
-
- /* Start lockd here, before we might error out */
- if (!(server->flags & NFS_MOUNT_NONLM))
- lockd_up();
-
- server->namelen = data->namlen;
- server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
- if (!server->hostname)
- return -ENOMEM;
- strcpy(server->hostname, data->hostname);
-
- /* Check NFS protocol revision and initialize RPC op vector
- * and file handle pool. */
-#ifdef CONFIG_NFS_V3
- if (server->flags & NFS_MOUNT_VER3) {
- server->rpc_ops = &nfs_v3_clientops;
- server->caps |= NFS_CAP_READDIRPLUS;
- } else {
- server->rpc_ops = &nfs_v2_clientops;
- }
-#else
- server->rpc_ops = &nfs_v2_clientops;
-#endif
+ sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+ sb->s_blocksize = old_sb->s_blocksize;
+ sb->s_maxbytes = old_sb->s_maxbytes;
- /* Fill in pseudoflavor for mount version < 5 */
- if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
- data->pseudoflavor = RPC_AUTH_UNIX;
- authflavor = data->pseudoflavor; /* save for sb_init() */
- /* XXX maybe we want to add a server->pseudoflavor field */
-
- /* Create RPC client handles */
- server->client = nfs_create_client(server, data);
- if (IS_ERR(server->client))
- return PTR_ERR(server->client);
- /* RFC 2623, sec 2.3.2 */
- if (authflavor != RPC_AUTH_UNIX) {
- struct rpc_auth *auth;
-
- server->client_sys = rpc_clone_client(server->client);
- if (IS_ERR(server->client_sys))
- return PTR_ERR(server->client_sys);
- auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
- if (IS_ERR(auth))
- return PTR_ERR(auth);
- } else {
- atomic_inc(&server->client->cl_count);
- server->client_sys = server->client;
- }
if (server->flags & NFS_MOUNT_VER3) {
-#ifdef CONFIG_NFS_V3_ACL
- if (!(server->flags & NFS_MOUNT_NOACL)) {
- server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
- /* No errors! Assume that Sun nfsacls are supported */
- if (!IS_ERR(server->client_acl))
- server->caps |= NFS_CAP_ACLS;
- }
-#else
- server->flags &= ~NFS_MOUNT_NOACL;
-#endif /* CONFIG_NFS_V3_ACL */
- /*
- * The VFS shouldn't apply the umask to mode bits. We will
- * do so ourselves when necessary.
+ /* The VFS shouldn't apply the umask to mode bits. We will do
+ * so ourselves when necessary.
*/
sb->s_flags |= MS_POSIXACL;
- if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
- server->namelen = NFS3_MAXNAMLEN;
sb->s_time_gran = 1;
- } else {
- if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
- server->namelen = NFS2_MAXNAMLEN;
}
- sb->s_op = &nfs_sops;
- return nfs_sb_init(sb, authflavor);
+ sb->s_op = old_sb->s_op;
+ nfs_initialise_sb(sb);
}
-static int nfs_set_super(struct super_block *s, void *data)
+static int nfs_set_super(struct super_block *s, void *_server)
{
- s->s_fs_info = data;
- return set_anon_super(s, data);
+ struct nfs_server *server = _server;
+ int ret;
+
+ s->s_fs_info = server;
+ ret = set_anon_super(s, server);
+ if (ret == 0)
+ server->s_dev = s->s_dev;
+ return ret;
}
static int nfs_compare_super(struct super_block *sb, void *data)
{
- struct nfs_server *server = data;
- struct nfs_server *old = NFS_SB(sb);
+ struct nfs_server *server = data, *old = NFS_SB(sb);
- if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr)
+ if (old->nfs_client != server->nfs_client)
return 0;
- if (old->addr.sin_port != server->addr.sin_port)
+ if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
return 0;
- return !nfs_compare_fh(&old->fh, &server->fh);
+ return 1;
}
static int nfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
{
- int error;
struct nfs_server *server = NULL;
struct super_block *s;
- struct nfs_fh *root;
+ struct nfs_fh mntfh;
struct nfs_mount_data *data = raw_data;
+ struct dentry *mntroot;
+ int error;
- error = -EINVAL;
- if (data == NULL) {
- dprintk("%s: missing data argument\n", __FUNCTION__);
- goto out_err_noserver;
- }
- if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
- dprintk("%s: bad mount version\n", __FUNCTION__);
- goto out_err_noserver;
- }
- switch (data->version) {
- case 1:
- data->namlen = 0;
- case 2:
- data->bsize = 0;
- case 3:
- if (data->flags & NFS_MOUNT_VER3) {
- dprintk("%s: mount structure version %d does not support NFSv3\n",
- __FUNCTION__,
- data->version);
- goto out_err_noserver;
- }
- data->root.size = NFS2_FHSIZE;
- memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
- case 4:
- if (data->flags & NFS_MOUNT_SECFLAVOUR) {
- dprintk("%s: mount structure version %d does not support strong security\n",
- __FUNCTION__,
- data->version);
- goto out_err_noserver;
- }
- case 5:
- memset(data->context, 0, sizeof(data->context));
- }
-#ifndef CONFIG_NFS_V3
- /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
- error = -EPROTONOSUPPORT;
- if (data->flags & NFS_MOUNT_VER3) {
- dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
- goto out_err_noserver;
- }
-#endif /* CONFIG_NFS_V3 */
+ /* Validate the mount data */
+ error = nfs_validate_mount_data(data, &mntfh);
+ if (error < 0)
+ return error;
- error = -ENOMEM;
- server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
- if (!server)
+ /* Get a volume representation */
+ server = nfs_create_server(data, &mntfh);
+ if (IS_ERR(server)) {
+ error = PTR_ERR(server);
goto out_err_noserver;
- /* Zero out the NFS state stuff */
- init_nfsv4_state(server);
- server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
-
- root = &server->fh;
- if (data->flags & NFS_MOUNT_VER3)
- root->size = data->root.size;
- else
- root->size = NFS2_FHSIZE;
- error = -EINVAL;
- if (root->size > sizeof(root->data)) {
- dprintk("%s: invalid root filehandle\n", __FUNCTION__);
- goto out_err;
- }
- memcpy(root->data, data->root.data, root->size);
-
- /* We now require that the mount process passes the remote address */
- memcpy(&server->addr, &data->addr, sizeof(server->addr));
- if (server->addr.sin_addr.s_addr == INADDR_ANY) {
- dprintk("%s: mount program didn't pass remote address!\n",
- __FUNCTION__);
- goto out_err;
- }
-
- /* Fire up rpciod if not yet running */
- error = rpciod_up();
- if (error < 0) {
- dprintk("%s: couldn't start rpciod! Error = %d\n",
- __FUNCTION__, error);
- goto out_err;
}
+ /* Get a superblock - note that we may end up sharing one that already exists */
s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
if (IS_ERR(s)) {
error = PTR_ERR(s);
- goto out_err_rpciod;
+ goto out_err_nosb;
}
- if (s->s_root)
- goto out_rpciod_down;
+ if (s->s_fs_info != server) {
+ nfs_free_server(server);
+ server = NULL;
+ }
- s->s_flags = flags;
+ if (!s->s_root) {
+ /* initial superblock/root creation */
+ s->s_flags = flags;
+ nfs_fill_super(s, data);
+ }
- error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
- if (error) {
- up_write(&s->s_umount);
- deactivate_super(s);
- return error;
+ mntroot = nfs_get_root(s, &mntfh);
+ if (IS_ERR(mntroot)) {
+ error = PTR_ERR(mntroot);
+ goto error_splat_super;
}
- s->s_flags |= MS_ACTIVE;
- return simple_set_mnt(mnt, s);
-out_rpciod_down:
- rpciod_down();
- kfree(server);
- return simple_set_mnt(mnt, s);
+ s->s_flags |= MS_ACTIVE;
+ mnt->mnt_sb = s;
+ mnt->mnt_root = mntroot;
+ return 0;
-out_err_rpciod:
- rpciod_down();
-out_err:
- kfree(server);
+out_err_nosb:
+ nfs_free_server(server);
out_err_noserver:
return error;
+
+error_splat_super:
+ up_write(&s->s_umount);
+ deactivate_super(s);
+ return error;
}
+/*
+ * Destroy an NFS2/3 superblock
+ */
static void nfs_kill_super(struct super_block *s)
{
struct nfs_server *server = NFS_SB(s);
kill_anon_super(s);
-
- if (!IS_ERR(server->client))
- rpc_shutdown_client(server->client);
- if (!IS_ERR(server->client_sys))
- rpc_shutdown_client(server->client_sys);
- if (!IS_ERR(server->client_acl))
- rpc_shutdown_client(server->client_acl);
-
- if (!(server->flags & NFS_MOUNT_NONLM))
- lockd_down(); /* release rpc.lockd */
-
- rpciod_down(); /* release rpciod */
-
- nfs_free_iostats(server->io_stats);
- kfree(server->hostname);
- kfree(server);
- nfs_release_automount_timer();
-}
-
-static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
- struct super_block *sb;
-
- server->fsid = data->fattr->fsid;
- nfs_copy_fh(&server->fh, data->fh);
- sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
- if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM))
- lockd_up();
- return sb;
+ nfs_free_server(server);
}
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+/*
+ * Clone an NFS2/3 server record on xdev traversal (FSID-change)
+ */
+static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data,
+ struct vfsmount *mnt)
{
struct nfs_clone_mount *data = raw_data;
- return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt);
-}
+ struct super_block *s;
+ struct nfs_server *server;
+ struct dentry *mntroot;
+ int error;
-#ifdef CONFIG_NFS_V4
-static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
- struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
-{
- struct nfs4_client *clp;
- struct rpc_xprt *xprt = NULL;
- struct rpc_clnt *clnt = NULL;
- int err = -EIO;
-
- clp = nfs4_get_client(&server->addr.sin_addr);
- if (!clp) {
- dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
- return ERR_PTR(err);
- }
+ dprintk("--> nfs_xdev_get_sb()\n");
- /* Now create transport and client */
- down_write(&clp->cl_sem);
- if (IS_ERR(clp->cl_rpcclient)) {
- xprt = xprt_create_proto(proto, &server->addr, timeparms);
- if (IS_ERR(xprt)) {
- up_write(&clp->cl_sem);
- err = PTR_ERR(xprt);
- dprintk("%s: cannot create RPC transport. Error = %d\n",
- __FUNCTION__, err);
- goto out_fail;
- }
- /* Bind to a reserved port! */
- xprt->resvport = 1;
- clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
- server->rpc_ops->version, flavor);
- if (IS_ERR(clnt)) {
- up_write(&clp->cl_sem);
- err = PTR_ERR(clnt);
- dprintk("%s: cannot create RPC client. Error = %d\n",
- __FUNCTION__, err);
- goto out_fail;
- }
- clnt->cl_intr = 1;
- clnt->cl_softrtry = 1;
- clp->cl_rpcclient = clnt;
- memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
- nfs_idmap_new(clp);
- }
- list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
- clnt = rpc_clone_client(clp->cl_rpcclient);
- if (!IS_ERR(clnt))
- server->nfs4_state = clp;
- up_write(&clp->cl_sem);
- clp = NULL;
-
- if (IS_ERR(clnt)) {
- dprintk("%s: cannot create RPC client. Error = %d\n",
- __FUNCTION__, err);
- return clnt;
+ /* create a new volume representation */
+ server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+ if (IS_ERR(server)) {
+ error = PTR_ERR(server);
+ goto out_err_noserver;
}
- if (server->nfs4_state->cl_idmap == NULL) {
- dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
- return ERR_PTR(-ENOMEM);
+ /* Get a superblock - note that we may end up sharing one that already exists */
+ s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_err_nosb;
}
- if (clnt->cl_auth->au_flavor != flavor) {
- struct rpc_auth *auth;
-
- auth = rpcauth_create(flavor, clnt);
- if (IS_ERR(auth)) {
- dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
- return (struct rpc_clnt *)auth;
- }
+ if (s->s_fs_info != server) {
+ nfs_free_server(server);
+ server = NULL;
}
- return clnt;
-
- out_fail:
- if (clp)
- nfs4_put_client(clp);
- return ERR_PTR(err);
-}
-
-/*
- * Set up an NFS4 superblock
- */
-static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
-{
- struct nfs_server *server;
- struct rpc_timeout timeparms;
- rpc_authflavor_t authflavour;
- int err = -EIO;
- sb->s_blocksize_bits = 0;
- sb->s_blocksize = 0;
- server = NFS_SB(sb);
- if (data->rsize != 0)
- server->rsize = nfs_block_size(data->rsize, NULL);
- if (data->wsize != 0)
- server->wsize = nfs_block_size(data->wsize, NULL);
- server->flags = data->flags & NFS_MOUNT_FLAGMASK;
- server->caps = NFS_CAP_ATOMIC_OPEN;
+ if (!s->s_root) {
+ /* initial superblock/root creation */
+ s->s_flags = flags;
+ nfs_clone_super(s, data->sb);
+ }
- server->acregmin = data->acregmin*HZ;
- server->acregmax = data->acregmax*HZ;
- server->acdirmin = data->acdirmin*HZ;
- server->acdirmax = data->acdirmax*HZ;
+ mntroot = nfs_get_root(s, data->fh);
+ if (IS_ERR(mntroot)) {
+ error = PTR_ERR(mntroot);
+ goto error_splat_super;
+ }
- server->rpc_ops = &nfs_v4_clientops;
+ s->s_flags |= MS_ACTIVE;
+ mnt->mnt_sb = s;
+ mnt->mnt_root = mntroot;
- nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
+ dprintk("<-- nfs_xdev_get_sb() = 0\n");
+ return 0;
- server->retrans_timeo = timeparms.to_initval;
- server->retrans_count = timeparms.to_retries;
+out_err_nosb:
+ nfs_free_server(server);
+out_err_noserver:
+ dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
+ return error;
- /* Now create transport and client */
- authflavour = RPC_AUTH_UNIX;
- if (data->auth_flavourlen != 0) {
- if (data->auth_flavourlen != 1) {
- dprintk("%s: Invalid number of RPC auth flavours %d.\n",
- __FUNCTION__, data->auth_flavourlen);
- err = -EINVAL;
- goto out_fail;
- }
- if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
- err = -EFAULT;
- goto out_fail;
- }
- }
+error_splat_super:
+ up_write(&s->s_umount);
+ deactivate_super(s);
+ dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
+ return error;
+}
- server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour);
- if (IS_ERR(server->client)) {
- err = PTR_ERR(server->client);
- dprintk("%s: cannot create RPC client. Error = %d\n",
- __FUNCTION__, err);
- goto out_fail;
- }
+#ifdef CONFIG_NFS_V4
+/*
+ * Finish setting up a cloned NFS4 superblock
+ */
+static void nfs4_clone_super(struct super_block *sb,
+ const struct super_block *old_sb)
+{
+ sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+ sb->s_blocksize = old_sb->s_blocksize;
+ sb->s_maxbytes = old_sb->s_maxbytes;
sb->s_time_gran = 1;
-
- sb->s_op = &nfs4_sops;
- err = nfs_sb_init(sb, authflavour);
-
- out_fail:
- return err;
+ sb->s_op = old_sb->s_op;
+ nfs_initialise_sb(sb);
}
-static int nfs4_compare_super(struct super_block *sb, void *data)
+/*
+ * Set up an NFS4 superblock
+ */
+static void nfs4_fill_super(struct super_block *sb)
{
- struct nfs_server *server = data;
- struct nfs_server *old = NFS_SB(sb);
-
- if (strcmp(server->hostname, old->hostname) != 0)
- return 0;
- if (strcmp(server->mnt_path, old->mnt_path) != 0)
- return 0;
- return 1;
+ sb->s_time_gran = 1;
+ sb->s_op = &nfs4_sops;
+ nfs_initialise_sb(sb);
}
-static void *
-nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
{
void *p = NULL;
@@ -1297,14 +793,22 @@ nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
return dst;
}
+/*
+ * Get the superblock for an NFS4 mountpoint
+ */
static int nfs4_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
{
- int error;
- struct nfs_server *server;
- struct super_block *s;
struct nfs4_mount_data *data = raw_data;
+ struct super_block *s;
+ struct nfs_server *server;
+ struct sockaddr_in addr;
+ rpc_authflavor_t authflavour;
+ struct nfs_fh mntfh;
+ struct dentry *mntroot;
+ char *mntpath = NULL, *hostname = NULL, ip_addr[16];
void *p;
+ int error;
if (data == NULL) {
dprintk("%s: missing data argument\n", __FUNCTION__);
@@ -1315,84 +819,112 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
return -EINVAL;
}
- server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
- if (!server)
- return -ENOMEM;
- /* Zero out the NFS state stuff */
- init_nfsv4_state(server);
- server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+ /* We now require that the mount process passes the remote address */
+ if (data->host_addrlen != sizeof(addr))
+ return -EINVAL;
+
+ if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
+ return -EFAULT;
+
+ if (addr.sin_family != AF_INET ||
+ addr.sin_addr.s_addr == INADDR_ANY
+ ) {
+ dprintk("%s: mount program didn't pass remote IP address!\n",
+ __FUNCTION__);
+ return -EINVAL;
+ }
+ /* RFC3530: The default port for NFS is 2049 */
+ if (addr.sin_port == 0)
+ addr.sin_port = NFS_PORT;
+
+ /* Grab the authentication type */
+ authflavour = RPC_AUTH_UNIX;
+ if (data->auth_flavourlen != 0) {
+ if (data->auth_flavourlen != 1) {
+ dprintk("%s: Invalid number of RPC auth flavours %d.\n",
+ __FUNCTION__, data->auth_flavourlen);
+ error = -EINVAL;
+ goto out_err_noserver;
+ }
+
+ if (copy_from_user(&authflavour, data->auth_flavours,
+ sizeof(authflavour))) {
+ error = -EFAULT;
+ goto out_err_noserver;
+ }
+ }
p = nfs_copy_user_string(NULL, &data->hostname, 256);
if (IS_ERR(p))
goto out_err;
- server->hostname = p;
+ hostname = p;
p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
if (IS_ERR(p))
goto out_err;
- server->mnt_path = p;
+ mntpath = p;
+
+ dprintk("MNTPATH: %s\n", mntpath);
- p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
- sizeof(server->ip_addr) - 1);
+ p = nfs_copy_user_string(ip_addr, &data->client_addr,
+ sizeof(ip_addr) - 1);
if (IS_ERR(p))
goto out_err;
- /* We now require that the mount process passes the remote address */
- if (data->host_addrlen != sizeof(server->addr)) {
- error = -EINVAL;
- goto out_free;
- }
- if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
- error = -EFAULT;
- goto out_free;
- }
- if (server->addr.sin_family != AF_INET ||
- server->addr.sin_addr.s_addr == INADDR_ANY) {
- dprintk("%s: mount program didn't pass remote IP address!\n",
- __FUNCTION__);
- error = -EINVAL;
- goto out_free;
- }
-
- /* Fire up rpciod if not yet running */
- error = rpciod_up();
- if (error < 0) {
- dprintk("%s: couldn't start rpciod! Error = %d\n",
- __FUNCTION__, error);
- goto out_free;
+ /* Get a volume representation */
+ server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
+ authflavour, &mntfh);
+ if (IS_ERR(server)) {
+ error = PTR_ERR(server);
+ goto out_err_noserver;
}
- s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
-
+ /* Get a superblock - note that we may end up sharing one that already exists */
+ s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_free;
}
- if (s->s_root) {
- kfree(server->mnt_path);
- kfree(server->hostname);
- kfree(server);
- return simple_set_mnt(mnt, s);
+ if (s->s_fs_info != server) {
+ nfs_free_server(server);
+ server = NULL;
}
- s->s_flags = flags;
+ if (!s->s_root) {
+ /* initial superblock/root creation */
+ s->s_flags = flags;
+ nfs4_fill_super(s);
+ }
- error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
- if (error) {
- up_write(&s->s_umount);
- deactivate_super(s);
- return error;
+ mntroot = nfs4_get_root(s, &mntfh);
+ if (IS_ERR(mntroot)) {
+ error = PTR_ERR(mntroot);
+ goto error_splat_super;
}
+
s->s_flags |= MS_ACTIVE;
- return simple_set_mnt(mnt, s);
+ mnt->mnt_sb = s;
+ mnt->mnt_root = mntroot;
+ kfree(mntpath);
+ kfree(hostname);
+ return 0;
+
out_err:
error = PTR_ERR(p);
+ goto out_err_noserver;
+
out_free:
- kfree(server->mnt_path);
- kfree(server->hostname);
- kfree(server);
+ nfs_free_server(server);
+out_err_noserver:
+ kfree(mntpath);
+ kfree(hostname);
return error;
+
+error_splat_super:
+ up_write(&s->s_umount);
+ deactivate_super(s);
+ goto out_err_noserver;
}
static void nfs4_kill_super(struct super_block *sb)
@@ -1403,135 +935,140 @@ static void nfs4_kill_super(struct super_block *sb)
kill_anon_super(sb);
nfs4_renewd_prepare_shutdown(server);
+ nfs_free_server(server);
+}
+
+/*
+ * Clone an NFS4 server record on xdev traversal (FSID-change)
+ */
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data,
+ struct vfsmount *mnt)
+{
+ struct nfs_clone_mount *data = raw_data;
+ struct super_block *s;
+ struct nfs_server *server;
+ struct dentry *mntroot;
+ int error;
+
+ dprintk("--> nfs4_xdev_get_sb()\n");
+
+ /* create a new volume representation */
+ server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+ if (IS_ERR(server)) {
+ error = PTR_ERR(server);
+ goto out_err_noserver;
+ }
+
+ /* Get a superblock - note that we may end up sharing one that already exists */
+ s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_err_nosb;
+ }
- if (server->client != NULL && !IS_ERR(server->client))
- rpc_shutdown_client(server->client);
+ if (s->s_fs_info != server) {
+ nfs_free_server(server);
+ server = NULL;
+ }
- destroy_nfsv4_state(server);
+ if (!s->s_root) {
+ /* initial superblock/root creation */
+ s->s_flags = flags;
+ nfs4_clone_super(s, data->sb);
+ }
+
+ mntroot = nfs4_get_root(s, data->fh);
+ if (IS_ERR(mntroot)) {
+ error = PTR_ERR(mntroot);
+ goto error_splat_super;
+ }
- rpciod_down();
+ s->s_flags |= MS_ACTIVE;
+ mnt->mnt_sb = s;
+ mnt->mnt_root = mntroot;
+
+ dprintk("<-- nfs4_xdev_get_sb() = 0\n");
+ return 0;
+
+out_err_nosb:
+ nfs_free_server(server);
+out_err_noserver:
+ dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
+ return error;
- nfs_free_iostats(server->io_stats);
- kfree(server->hostname);
- kfree(server);
- nfs_release_automount_timer();
+error_splat_super:
+ up_write(&s->s_umount);
+ deactivate_super(s);
+ dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
+ return error;
}
/*
- * Constructs the SERVER-side path
+ * Create an NFS4 server record on referral traversal
*/
-static inline char *nfs4_dup_path(const struct dentry *dentry)
+static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data,
+ struct vfsmount *mnt)
{
- char *page = (char *) __get_free_page(GFP_USER);
- char *path;
+ struct nfs_clone_mount *data = raw_data;
+ struct super_block *s;
+ struct nfs_server *server;
+ struct dentry *mntroot;
+ struct nfs_fh mntfh;
+ int error;
- path = nfs4_path(dentry, page, PAGE_SIZE);
- if (!IS_ERR(path)) {
- int len = PAGE_SIZE + page - path;
- char *tmp = path;
+ dprintk("--> nfs4_referral_get_sb()\n");
- path = kmalloc(len, GFP_KERNEL);
- if (path)
- memcpy(path, tmp, len);
- else
- path = ERR_PTR(-ENOMEM);
+ /* create a new volume representation */
+ server = nfs4_create_referral_server(data, &mntfh);
+ if (IS_ERR(server)) {
+ error = PTR_ERR(server);
+ goto out_err_noserver;
}
- free_page((unsigned long)page);
- return path;
-}
-static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
- const struct dentry *dentry = data->dentry;
- struct nfs4_client *clp = server->nfs4_state;
- struct super_block *sb;
-
- server->fsid = data->fattr->fsid;
- nfs_copy_fh(&server->fh, data->fh);
- server->mnt_path = nfs4_dup_path(dentry);
- if (IS_ERR(server->mnt_path)) {
- sb = (struct super_block *)server->mnt_path;
- goto err;
+ /* Get a superblock - note that we may end up sharing one that already exists */
+ s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto out_err_nosb;
}
- sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
- if (IS_ERR(sb) || sb->s_root)
- goto free_path;
- nfs4_server_capabilities(server, &server->fh);
-
- down_write(&clp->cl_sem);
- atomic_inc(&clp->cl_count);
- list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
- up_write(&clp->cl_sem);
- return sb;
-free_path:
- kfree(server->mnt_path);
-err:
- server->mnt_path = NULL;
- return sb;
-}
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
-{
- struct nfs_clone_mount *data = raw_data;
- return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt);
-}
+ if (s->s_fs_info != server) {
+ nfs_free_server(server);
+ server = NULL;
+ }
-static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
- struct super_block *sb = ERR_PTR(-ENOMEM);
- int len;
-
- len = strlen(data->mnt_path) + 1;
- server->mnt_path = kmalloc(len, GFP_KERNEL);
- if (server->mnt_path == NULL)
- goto err;
- memcpy(server->mnt_path, data->mnt_path, len);
- memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in));
-
- sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
- if (IS_ERR(sb) || sb->s_root)
- goto free_path;
- return sb;
-free_path:
- kfree(server->mnt_path);
-err:
- server->mnt_path = NULL;
- return sb;
-}
+ if (!s->s_root) {
+ /* initial superblock/root creation */
+ s->s_flags = flags;
+ nfs4_fill_super(s);
+ }
-static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data)
-{
- struct nfs_server *server = NFS_SB(sb);
- struct rpc_timeout timeparms;
- int proto, timeo, retrans;
- void *err;
-
- proto = IPPROTO_TCP;
- /* Since we are following a referral and there may be alternatives,
- set the timeouts and retries to low values */
- timeo = 2;
- retrans = 1;
- nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
-
- server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor);
- if (IS_ERR((err = server->client)))
- goto out_err;
+ mntroot = nfs4_get_root(s, data->fh);
+ if (IS_ERR(mntroot)) {
+ error = PTR_ERR(mntroot);
+ goto error_splat_super;
+ }
- sb->s_time_gran = 1;
- sb->s_op = &nfs4_sops;
- err = ERR_PTR(nfs_sb_init(sb, data->authflavor));
- if (!IS_ERR(err))
- return server;
-out_err:
- return (struct nfs_server *)err;
-}
+ s->s_flags |= MS_ACTIVE;
+ mnt->mnt_sb = s;
+ mnt->mnt_root = mntroot;
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
-{
- struct nfs_clone_mount *data = raw_data;
- return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt);
+ dprintk("<-- nfs4_referral_get_sb() = 0\n");
+ return 0;
+
+out_err_nosb:
+ nfs_free_server(server);
+out_err_noserver:
+ dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
+ return error;
+
+error_splat_super:
+ up_write(&s->s_umount);
+ deactivate_super(s);
+ dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
+ return error;
}
-#endif
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7084ac9a6455..b674462793d3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -396,6 +396,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
out:
clear_bit(BDI_write_congested, &bdi->state);
wake_up_all(&nfs_write_congestion);
+ writeback_congestion_end();
return err;
}
@@ -1252,7 +1253,13 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
task->tk_pid, task->tk_status);
- /* Call the NFS version-specific code */
+ /*
+ * ->write_done will attempt to use post-op attributes to detect
+ * conflicting writes by other clients. A strict interpretation
+ * of close-to-open would allow us to continue caching even if
+ * another writer had changed the file, but some applications
+ * depend on tighter cache coherency when writing.
+ */
status = NFS_PROTO(data->inode)->write_done(task, data);
if (status != 0)
return status;
@@ -1273,7 +1280,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
if (time_before(complain, jiffies)) {
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
- NFS_SERVER(data->inode)->hostname,
+ NFS_SERVER(data->inode)->nfs_client->cl_hostname,
resp->verf->committed, argp->stable);
complain = jiffies + 300 * HZ;
}
@@ -1558,7 +1565,6 @@ void nfs_destroy_writepagecache(void)
{
mempool_destroy(nfs_commit_mempool);
mempool_destroy(nfs_wdata_mempool);
- if (kmem_cache_destroy(nfs_wdata_cachep))
- printk(KERN_INFO "nfs_write_data: not all structures were freed\n");
+ kmem_cache_destroy(nfs_wdata_cachep);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 54b37b1d2e3a..8583d99ee740 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -375,16 +375,28 @@ nfsd4_probe_callback(struct nfs4_client *clp)
{
struct sockaddr_in addr;
struct nfs4_callback *cb = &clp->cl_callback;
- struct rpc_timeout timeparms;
- struct rpc_xprt * xprt;
+ struct rpc_timeout timeparms = {
+ .to_initval = (NFSD_LEASE_TIME/4) * HZ,
+ .to_retries = 5,
+ .to_maxval = (NFSD_LEASE_TIME/2) * HZ,
+ .to_exponential = 1,
+ };
struct rpc_program * program = &cb->cb_program;
- struct rpc_stat * stat = &cb->cb_stat;
- struct rpc_clnt * clnt;
+ struct rpc_create_args args = {
+ .protocol = IPPROTO_TCP,
+ .address = (struct sockaddr *)&addr,
+ .addrsize = sizeof(addr),
+ .timeout = &timeparms,
+ .servername = clp->cl_name.data,
+ .program = program,
+ .version = nfs_cb_version[1]->number,
+ .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
+ .flags = (RPC_CLNT_CREATE_NOPING),
+ };
struct rpc_message msg = {
.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
.rpc_argp = clp,
};
- char hostname[32];
int status;
if (atomic_read(&cb->cb_set))
@@ -396,51 +408,27 @@ nfsd4_probe_callback(struct nfs4_client *clp)
addr.sin_port = htons(cb->cb_port);
addr.sin_addr.s_addr = htonl(cb->cb_addr);
- /* Initialize timeout */
- timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
- timeparms.to_retries = 0;
- timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
- timeparms.to_exponential = 1;
-
- /* Create RPC transport */
- xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms);
- if (IS_ERR(xprt)) {
- dprintk("NFSD: couldn't create callback transport!\n");
- goto out_err;
- }
-
/* Initialize rpc_program */
program->name = "nfs4_cb";
program->number = cb->cb_prog;
program->nrvers = ARRAY_SIZE(nfs_cb_version);
program->version = nfs_cb_version;
- program->stats = stat;
+ program->stats = &cb->cb_stat;
/* Initialize rpc_stat */
- memset(stat, 0, sizeof(struct rpc_stat));
- stat->program = program;
-
- /* Create RPC client
- *
- * XXX AUTH_UNIX only - need AUTH_GSS....
- */
- sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
- clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
- if (IS_ERR(clnt)) {
+ memset(program->stats, 0, sizeof(cb->cb_stat));
+ program->stats->program = program;
+
+ /* Create RPC client */
+ cb->cb_client = rpc_create(&args);
+ if (!cb->cb_client) {
dprintk("NFSD: couldn't create callback client\n");
goto out_err;
}
- clnt->cl_intr = 0;
- clnt->cl_softrtry = 1;
/* Kick rpciod, put the call on the wire. */
-
- if (rpciod_up() != 0) {
- dprintk("nfsd: couldn't start rpciod for callbacks!\n");
+ if (rpciod_up() != 0)
goto out_clnt;
- }
-
- cb->cb_client = clnt;
/* the task holds a reference to the nfs4_client struct */
atomic_inc(&clp->cl_count);
@@ -448,7 +436,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
msg.rpc_cred = nfsd4_lookupcred(clp,0);
if (IS_ERR(msg.rpc_cred))
goto out_rpciod;
- status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
+ status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
put_rpccred(msg.rpc_cred);
if (status != 0) {
@@ -462,7 +450,7 @@ out_rpciod:
rpciod_down();
cb->cb_client = NULL;
out_clnt:
- rpc_shutdown_client(clnt);
+ rpc_shutdown_client(cb->cb_client);
out_err:
dprintk("NFSD: warning: no callback path to client %.*s\n",
(int)clp->cl_name.len, clp->cl_name.data);
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index bea6b9478114..b1902ebaab41 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -573,10 +573,9 @@ idmap_lookup(struct svc_rqst *rqstp,
struct idmap_defer_req *mdr;
int ret;
- mdr = kmalloc(sizeof(*mdr), GFP_KERNEL);
+ mdr = kzalloc(sizeof(*mdr), GFP_KERNEL);
if (!mdr)
return -ENOMEM;
- memset(mdr, 0, sizeof(*mdr));
atomic_set(&mdr->count, 1);
init_waitqueue_head(&mdr->waitq);
mdr->req.defer = idmap_defer;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9daa0b9feb8d..ebcf226a9e4a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -339,8 +339,7 @@ alloc_client(struct xdr_netobj name)
{
struct nfs4_client *clp;
- if ((clp = kmalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) {
- memset(clp, 0, sizeof(*clp));
+ if ((clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) {
if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) {
memcpy(clp->cl_name.data, name.data, name.len);
clp->cl_name.len = name.len;
@@ -1006,13 +1005,10 @@ alloc_init_file(struct inode *ino)
static void
nfsd4_free_slab(kmem_cache_t **slab)
{
- int status;
-
if (*slab == NULL)
return;
- status = kmem_cache_destroy(*slab);
+ kmem_cache_destroy(*slab);
*slab = NULL;
- WARN_ON(status);
}
static void
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index d1e2c6f9f05e..85c36b8ca452 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1149,8 +1149,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
* Allocate a buffer to store the current name being processed
* converted to format determined by current NLS.
*/
- name = (u8*)kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1,
- GFP_NOFS);
+ name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
if (unlikely(!name)) {
err = -ENOMEM;
goto err_out;
@@ -1191,7 +1190,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
* map the mft record without deadlocking.
*/
rc = le32_to_cpu(ctx->attr->data.resident.value_length);
- ir = (INDEX_ROOT*)kmalloc(rc, GFP_NOFS);
+ ir = kmalloc(rc, GFP_NOFS);
if (unlikely(!ir)) {
err = -ENOMEM;
goto err_out;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index d313f356e66a..933dbd89c2a4 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -137,7 +137,7 @@ static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
BUG_ON(!na->name);
i = na->name_len * sizeof(ntfschar);
- ni->name = (ntfschar*)kmalloc(i + sizeof(ntfschar), GFP_ATOMIC);
+ ni->name = kmalloc(i + sizeof(ntfschar), GFP_ATOMIC);
if (!ni->name)
return -ENOMEM;
memcpy(ni->name, na->name, i);
@@ -556,8 +556,6 @@ static int ntfs_read_locked_inode(struct inode *vi)
/* Setup the generic vfs inode parts now. */
- /* This is the optimal IO size (for stat), not the fs block size. */
- vi->i_blksize = PAGE_CACHE_SIZE;
/*
* This is for checking whether an inode has changed w.r.t. a file so
* that the file can be updated if necessary (compare with f_version).
@@ -1234,7 +1232,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
base_ni = NTFS_I(base_vi);
/* Just mirror the values from the base inode. */
- vi->i_blksize = base_vi->i_blksize;
vi->i_version = base_vi->i_version;
vi->i_uid = base_vi->i_uid;
vi->i_gid = base_vi->i_gid;
@@ -1504,7 +1501,6 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
ni = NTFS_I(vi);
base_ni = NTFS_I(base_vi);
/* Just mirror the values from the base inode. */
- vi->i_blksize = base_vi->i_blksize;
vi->i_version = base_vi->i_version;
vi->i_uid = base_vi->i_uid;
vi->i_gid = base_vi->i_gid;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 2438c00ec0ce..584260fd6848 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -331,7 +331,7 @@ map_err_out:
ntfs_inode **tmp;
int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
- tmp = (ntfs_inode **)kmalloc(new_size, GFP_NOFS);
+ tmp = kmalloc(new_size, GFP_NOFS);
if (unlikely(!tmp)) {
ntfs_error(base_ni->vol->sb, "Failed to allocate "
"internal buffer.");
@@ -2638,11 +2638,6 @@ mft_rec_already_initialized:
}
vi->i_ino = bit;
/*
- * This is the optimal IO size (for stat), not the fs block
- * size.
- */
- vi->i_blksize = PAGE_CACHE_SIZE;
- /*
* This is for checking whether an inode has changed w.r.t. a
* file so that the file can be updated if necessary (compare
* with f_version).
@@ -2893,7 +2888,7 @@ rollback:
if (!(base_ni->nr_extents & 3)) {
int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*);
- extent_nis = (ntfs_inode**)kmalloc(new_size, GFP_NOFS);
+ extent_nis = kmalloc(new_size, GFP_NOFS);
if (unlikely(!extent_nis)) {
ntfs_error(vol->sb, "Failed to allocate internal "
"buffer during rollback.%s", es);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 74e0ee8fce72..6b2712f10dd2 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3248,32 +3248,14 @@ ictx_err_out:
static void __exit exit_ntfs_fs(void)
{
- int err = 0;
-
ntfs_debug("Unregistering NTFS driver.");
unregister_filesystem(&ntfs_fs_type);
-
- if (kmem_cache_destroy(ntfs_big_inode_cache) && (err = 1))
- printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
- ntfs_big_inode_cache_name);
- if (kmem_cache_destroy(ntfs_inode_cache) && (err = 1))
- printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
- ntfs_inode_cache_name);
- if (kmem_cache_destroy(ntfs_name_cache) && (err = 1))
- printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
- ntfs_name_cache_name);
- if (kmem_cache_destroy(ntfs_attr_ctx_cache) && (err = 1))
- printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
- ntfs_attr_ctx_cache_name);
- if (kmem_cache_destroy(ntfs_index_ctx_cache) && (err = 1))
- printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
- ntfs_index_ctx_cache_name);
- if (err)
- printk(KERN_CRIT "NTFS: This causes memory to leak! There is "
- "probably a BUG in the driver! Please report "
- "you saw this message to "
- "linux-ntfs-dev@lists.sourceforge.net\n");
+ kmem_cache_destroy(ntfs_big_inode_cache);
+ kmem_cache_destroy(ntfs_inode_cache);
+ kmem_cache_destroy(ntfs_name_cache);
+ kmem_cache_destroy(ntfs_attr_ctx_cache);
+ kmem_cache_destroy(ntfs_index_ctx_cache);
/* Unregister the ntfs sysctls. */
ntfs_sysctl(0);
}
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index b123c0fa6bf6..a1b572196fe4 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -350,7 +350,7 @@ int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins,
}
if (!ns) {
ns_len = ins_len * NLS_MAX_CHARSET_SIZE;
- ns = (unsigned char*)kmalloc(ns_len + 1, GFP_NOFS);
+ ns = kmalloc(ns_len + 1, GFP_NOFS);
if (!ns)
goto mem_err_out;
}
@@ -365,7 +365,7 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
else if (wc == -ENAMETOOLONG && ns != *outs) {
unsigned char *tc;
/* Grow in multiples of 64 bytes. */
- tc = (unsigned char*)kmalloc((ns_len + 64) &
+ tc = kmalloc((ns_len + 64) &
~63, GFP_NOFS);
if (tc) {
memcpy(tc, ns, ns_len);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index ff9e2e2104c2..4b46aac7d243 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,11 +44,17 @@
* locking semantics of the file system using the protocol. It should
* be somewhere else, I'm sure, but right now it isn't.
*
+ * New in version 4:
+ * - Remove i_generation from lock names for better stat performance.
+ *
+ * New in version 3:
+ * - Replace dentry votes with a cluster lock
+ *
* New in version 2:
* - full 64 bit i_size in the metadata lock lvbs
* - introduction of "rw" lock and pushing meta/data locking down
*/
-#define O2NET_PROTOCOL_VERSION 2ULL
+#define O2NET_PROTOCOL_VERSION 4ULL
struct o2net_handshake {
__be64 protocol_version;
__be64 connector_id;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 1a01380e3878..014e73978dac 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -35,15 +35,17 @@
#include "alloc.h"
#include "dcache.h"
+#include "dlmglue.h"
#include "file.h"
#include "inode.h"
+
static int ocfs2_dentry_revalidate(struct dentry *dentry,
struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
int ret = 0; /* if all else fails, just return false */
- struct ocfs2_super *osb;
+ struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
mlog_entry("(0x%p, '%.*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name);
@@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
goto bail;
}
- osb = OCFS2_SB(inode->i_sb);
-
BUG_ON(!osb);
- if (inode != osb->root_inode) {
- spin_lock(&OCFS2_I(inode)->ip_lock);
- /* did we or someone else delete this inode? */
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
- spin_unlock(&OCFS2_I(inode)->ip_lock);
- mlog(0, "inode (%llu) deleted, returning false\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno);
- goto bail;
- }
+ if (inode == osb->root_inode || is_bad_inode(inode))
+ goto bail;
+
+ spin_lock(&OCFS2_I(inode)->ip_lock);
+ /* did we or someone else delete this inode? */
+ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
spin_unlock(&OCFS2_I(inode)->ip_lock);
+ mlog(0, "inode (%llu) deleted, returning false\n",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ goto bail;
+ }
+ spin_unlock(&OCFS2_I(inode)->ip_lock);
- if (!inode->i_nlink) {
- mlog(0, "Inode %llu orphaned, returning false "
- "dir = %d\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
- S_ISDIR(inode->i_mode));
- goto bail;
- }
+ /*
+ * We don't need a cluster lock to test this because once an
+ * inode nlink hits zero, it never goes back.
+ */
+ if (inode->i_nlink == 0) {
+ mlog(0, "Inode %llu orphaned, returning false "
+ "dir = %d\n",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ S_ISDIR(inode->i_mode));
+ goto bail;
}
ret = 1;
@@ -87,6 +92,322 @@ bail:
return ret;
}
+static int ocfs2_match_dentry(struct dentry *dentry,
+ u64 parent_blkno,
+ int skip_unhashed)
+{
+ struct inode *parent;
+
+ /*
+ * ocfs2_lookup() does a d_splice_alias() _before_ attaching
+ * to the lock data, so we skip those here, otherwise
+ * ocfs2_dentry_attach_lock() will get its original dentry
+ * back.
+ */
+ if (!dentry->d_fsdata)
+ return 0;
+
+ if (!dentry->d_parent)
+ return 0;
+
+ if (skip_unhashed && d_unhashed(dentry))
+ return 0;
+
+ parent = dentry->d_parent->d_inode;
+ /* Negative parent dentry? */
+ if (!parent)
+ return 0;
+
+ /* Name is in a different directory. */
+ if (OCFS2_I(parent)->ip_blkno != parent_blkno)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Walk the inode alias list, and find a dentry which has a given
+ * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
+ * is looking for a dentry_lock reference. The vote thread is looking
+ * to unhash aliases, so we allow it to skip any that already have
+ * that property.
+ */
+struct dentry *ocfs2_find_local_alias(struct inode *inode,
+ u64 parent_blkno,
+ int skip_unhashed)
+{
+ struct list_head *p;
+ struct dentry *dentry = NULL;
+
+ spin_lock(&dcache_lock);
+
+ list_for_each(p, &inode->i_dentry) {
+ dentry = list_entry(p, struct dentry, d_alias);
+
+ if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
+ mlog(0, "dentry found: %.*s\n",
+ dentry->d_name.len, dentry->d_name.name);
+
+ dget_locked(dentry);
+ break;
+ }
+
+ dentry = NULL;
+ }
+
+ spin_unlock(&dcache_lock);
+
+ return dentry;
+}
+
+DEFINE_SPINLOCK(dentry_attach_lock);
+
+/*
+ * Attach this dentry to a cluster lock.
+ *
+ * Dentry locks cover all links in a given directory to a particular
+ * inode. We do this so that ocfs2 can build a lock name which all
+ * nodes in the cluster can agree on at all times. Shoving full names
+ * in the cluster lock won't work due to size restrictions. Covering
+ * links inside of a directory is a good compromise because it still
+ * allows us to use the parent directory lock to synchronize
+ * operations.
+ *
+ * Call this function with the parent dir semaphore and the parent dir
+ * cluster lock held.
+ *
+ * The dir semaphore will protect us from having to worry about
+ * concurrent processes on our node trying to attach a lock at the
+ * same time.
+ *
+ * The dir cluster lock (held at either PR or EX mode) protects us
+ * from unlink and rename on other nodes.
+ *
+ * A dput() can happen asynchronously due to pruning, so we cover
+ * attaching and detaching the dentry lock with a
+ * dentry_attach_lock.
+ *
+ * A node which has done lookup on a name retains a protected read
+ * lock until final dput. If the user requests and unlink or rename,
+ * the protected read is upgraded to an exclusive lock. Other nodes
+ * who have seen the dentry will then be informed that they need to
+ * downgrade their lock, which will involve d_delete on the
+ * dentry. This happens in ocfs2_dentry_convert_worker().
+ */
+int ocfs2_dentry_attach_lock(struct dentry *dentry,
+ struct inode *inode,
+ u64 parent_blkno)
+{
+ int ret;
+ struct dentry *alias;
+ struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+
+ mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n",
+ dentry->d_name.len, dentry->d_name.name,
+ (unsigned long long)parent_blkno, dl);
+
+ /*
+ * Negative dentry. We ignore these for now.
+ *
+ * XXX: Could we can improve ocfs2_dentry_revalidate() by
+ * tracking these?
+ */
+ if (!inode)
+ return 0;
+
+ if (dl) {
+ mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
+ " \"%.*s\": old parent: %llu, new: %llu\n",
+ dentry->d_name.len, dentry->d_name.name,
+ (unsigned long long)parent_blkno,
+ (unsigned long long)dl->dl_parent_blkno);
+ return 0;
+ }
+
+ alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
+ if (alias) {
+ /*
+ * Great, an alias exists, which means we must have a
+ * dentry lock already. We can just grab the lock off
+ * the alias and add it to the list.
+ *
+ * We're depending here on the fact that this dentry
+ * was found and exists in the dcache and so must have
+ * a reference to the dentry_lock because we can't
+ * race creates. Final dput() cannot happen on it
+ * since we have it pinned, so our reference is safe.
+ */
+ dl = alias->d_fsdata;
+ mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n",
+ (unsigned long long)parent_blkno,
+ (unsigned long long)OCFS2_I(inode)->ip_blkno);
+
+ mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
+ " \"%.*s\": old parent: %llu, new: %llu\n",
+ dentry->d_name.len, dentry->d_name.name,
+ (unsigned long long)parent_blkno,
+ (unsigned long long)dl->dl_parent_blkno);
+
+ mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
+
+ goto out_attach;
+ }
+
+ /*
+ * There are no other aliases
+ */
+ dl = kmalloc(sizeof(*dl), GFP_NOFS);
+ if (!dl) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ return ret;
+ }
+
+ dl->dl_count = 0;
+ /*
+ * Does this have to happen below, for all attaches, in case
+ * the struct inode gets blown away by votes?
+ */
+ dl->dl_inode = igrab(inode);
+ dl->dl_parent_blkno = parent_blkno;
+ ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
+
+out_attach:
+ spin_lock(&dentry_attach_lock);
+ dentry->d_fsdata = dl;
+ dl->dl_count++;
+ spin_unlock(&dentry_attach_lock);
+
+ /*
+ * This actually gets us our PRMODE level lock. From now on,
+ * we'll have a notification if one of these names is
+ * destroyed on another node.
+ */
+ ret = ocfs2_dentry_lock(dentry, 0);
+ if (!ret)
+ ocfs2_dentry_unlock(dentry, 0);
+ else
+ mlog_errno(ret);
+
+ dput(alias);
+
+ return ret;
+}
+
+/*
+ * ocfs2_dentry_iput() and friends.
+ *
+ * At this point, our particular dentry is detached from the inodes
+ * alias list, so there's no way that the locking code can find it.
+ *
+ * The interesting stuff happens when we determine that our lock needs
+ * to go away because this is the last subdir alias in the
+ * system. This function needs to handle a couple things:
+ *
+ * 1) Synchronizing lock shutdown with the downconvert threads. This
+ * is already handled for us via the lockres release drop function
+ * called in ocfs2_release_dentry_lock()
+ *
+ * 2) A race may occur when we're doing our lock shutdown and
+ * another process wants to create a new dentry lock. Right now we
+ * let them race, which means that for a very short while, this
+ * node might have two locks on a lock resource. This should be a
+ * problem though because one of them is in the process of being
+ * thrown out.
+ */
+static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
+ struct ocfs2_dentry_lock *dl)
+{
+ ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
+ ocfs2_lock_res_free(&dl->dl_lockres);
+ iput(dl->dl_inode);
+ kfree(dl);
+}
+
+void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
+ struct ocfs2_dentry_lock *dl)
+{
+ int unlock = 0;
+
+ BUG_ON(dl->dl_count == 0);
+
+ spin_lock(&dentry_attach_lock);
+ dl->dl_count--;
+ unlock = !dl->dl_count;
+ spin_unlock(&dentry_attach_lock);
+
+ if (unlock)
+ ocfs2_drop_dentry_lock(osb, dl);
+}
+
+static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
+{
+ struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+
+ mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED),
+ "dentry: %.*s\n", dentry->d_name.len,
+ dentry->d_name.name);
+
+ if (!dl)
+ goto out;
+
+ mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
+ dentry->d_name.len, dentry->d_name.name,
+ dl->dl_count);
+
+ ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
+
+out:
+ iput(inode);
+}
+
+/*
+ * d_move(), but keep the locks in sync.
+ *
+ * When we are done, "dentry" will have the parent dir and name of
+ * "target", which will be thrown away.
+ *
+ * We manually update the lock of "dentry" if need be.
+ *
+ * "target" doesn't have it's dentry lock touched - we allow the later
+ * dput() to handle this for us.
+ *
+ * This is called during ocfs2_rename(), while holding parent
+ * directory locks. The dentries have already been deleted on other
+ * nodes via ocfs2_remote_dentry_delete().
+ *
+ * Normally, the VFS handles the d_move() for the file sytem, after
+ * the ->rename() callback. OCFS2 wants to handle this internally, so
+ * the new lock can be created atomically with respect to the cluster.
+ */
+void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
+ struct inode *old_dir, struct inode *new_dir)
+{
+ int ret;
+ struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
+ struct inode *inode = dentry->d_inode;
+
+ /*
+ * Move within the same directory, so the actual lock info won't
+ * change.
+ *
+ * XXX: Is there any advantage to dropping the lock here?
+ */
+ if (old_dir == new_dir)
+ goto out_move;
+
+ ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
+
+ dentry->d_fsdata = NULL;
+ ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno);
+ if (ret)
+ mlog_errno(ret);
+
+out_move:
+ d_move(dentry, target);
+}
+
struct dentry_operations ocfs2_dentry_ops = {
.d_revalidate = ocfs2_dentry_revalidate,
+ .d_iput = ocfs2_dentry_iput,
};
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index 90072771114b..c091c34d9883 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -28,4 +28,31 @@
extern struct dentry_operations ocfs2_dentry_ops;
+struct ocfs2_dentry_lock {
+ unsigned int dl_count;
+ u64 dl_parent_blkno;
+
+ /*
+ * The ocfs2_dentry_lock keeps an inode reference until
+ * dl_lockres has been destroyed. This is usually done in
+ * ->d_iput() anyway, so there should be minimal impact.
+ */
+ struct inode *dl_inode;
+ struct ocfs2_lock_res dl_lockres;
+};
+
+int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
+ u64 parent_blkno);
+
+void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
+ struct ocfs2_dentry_lock *dl);
+
+struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
+ int skip_unhashed);
+
+void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
+ struct inode *old_dir, struct inode *new_dir);
+
+extern spinlock_t dentry_attach_lock;
+
#endif /* OCFS2_DCACHE_H */
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
index 53652f51c0e1..cfd5cb65cab0 100644
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -182,6 +182,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm,
struct dlm_lockstatus *lksb,
int flags,
const char *name,
+ int namelen,
dlm_astlockfunc_t *ast,
void *data,
dlm_bastlockfunc_t *bast);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index f13a4bac41f0..681046d51393 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -320,8 +320,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
res = dlm_lookup_lockres(dlm, name, locklen);
if (!res) {
- mlog(ML_ERROR, "got %sast for unknown lockres! "
- "cookie=%u:%llu, name=%.*s, namelen=%u\n",
+ mlog(0, "got %sast for unknown lockres! "
+ "cookie=%u:%llu, name=%.*s, namelen=%u\n",
past->type == DLM_AST ? "" : "b",
dlm_get_lock_cookie_node(cookie),
dlm_get_lock_cookie_seq(cookie),
@@ -462,7 +462,7 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
mlog(ML_ERROR, "sent AST to node %u, it returned "
"DLM_MIGRATING!\n", lock->ml.node);
BUG();
- } else if (status != DLM_NORMAL) {
+ } else if (status != DLM_NORMAL && status != DLM_IVLOCKID) {
mlog(ML_ERROR, "AST to node %u returned %d!\n",
lock->ml.node, status);
/* ignore it */
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 14530ee7e11d..fa968180b072 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -747,6 +747,7 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
u8 owner);
struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
const char *lockid,
+ int namelen,
int flags);
struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
const char *name,
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 033ad1701232..0368c6402182 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -335,7 +335,6 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -362,7 +361,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -629,9 +627,7 @@ static void __exit exit_dlmfs_fs(void)
flush_workqueue(user_dlm_worker);
destroy_workqueue(user_dlm_worker);
- if (kmem_cache_destroy(dlmfs_inode_cache))
- printk(KERN_INFO "dlmfs_inode_cache: not all structures "
- "were freed\n");
+ kmem_cache_destroy(dlmfs_inode_cache);
}
MODULE_AUTHOR("Oracle");
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 5ca57ec650c7..42a1b91979b5 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -540,8 +540,8 @@ static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie)
enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
struct dlm_lockstatus *lksb, int flags,
- const char *name, dlm_astlockfunc_t *ast, void *data,
- dlm_bastlockfunc_t *bast)
+ const char *name, int namelen, dlm_astlockfunc_t *ast,
+ void *data, dlm_bastlockfunc_t *bast)
{
enum dlm_status status;
struct dlm_lock_resource *res = NULL;
@@ -571,7 +571,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
recovery = (flags & LKM_RECOVERY);
if (recovery &&
- (!dlm_is_recovery_lock(name, strlen(name)) || convert) ) {
+ (!dlm_is_recovery_lock(name, namelen) || convert) ) {
dlm_error(status);
goto error;
}
@@ -643,7 +643,7 @@ retry_convert:
}
status = DLM_IVBUFLEN;
- if (strlen(name) > DLM_LOCKID_NAME_MAX || strlen(name) < 1) {
+ if (namelen > DLM_LOCKID_NAME_MAX || namelen < 1) {
dlm_error(status);
goto error;
}
@@ -659,7 +659,7 @@ retry_convert:
dlm_wait_for_recovery(dlm);
/* find or create the lock resource */
- res = dlm_get_lock_resource(dlm, name, flags);
+ res = dlm_get_lock_resource(dlm, name, namelen, flags);
if (!res) {
status = DLM_IVLOCKID;
dlm_error(status);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9503240ef0e5..f784177b6241 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -740,6 +740,7 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
*/
struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
const char *lockid,
+ int namelen,
int flags)
{
struct dlm_lock_resource *tmpres=NULL, *res=NULL;
@@ -748,13 +749,12 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
int blocked = 0;
int ret, nodenum;
struct dlm_node_iter iter;
- unsigned int namelen, hash;
+ unsigned int hash;
int tries = 0;
int bit, wait_on_recovery = 0;
BUG_ON(!lockid);
- namelen = strlen(lockid);
hash = dlm_lockid_hash(lockid, namelen);
mlog(0, "get lockres %s (len %d)\n", lockid, namelen);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 594745fab0b5..9d950d7cea38 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2285,7 +2285,8 @@ again:
memset(&lksb, 0, sizeof(lksb));
ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
- DLM_RECOVERY_LOCK_NAME, dlm_reco_ast, dlm, dlm_reco_bast);
+ DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN,
+ dlm_reco_ast, dlm, dlm_reco_bast);
mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n",
dlm->name, ret, lksb.status);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index e641b084b343..eead48bbfac6 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -102,10 +102,10 @@ static inline void user_recover_from_dlm_error(struct user_lock_res *lockres)
spin_unlock(&lockres->l_lock);
}
-#define user_log_dlm_error(_func, _stat, _lockres) do { \
- mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \
- "resource %s: %s\n", dlm_errname(_stat), _func, \
- _lockres->l_name, dlm_errmsg(_stat)); \
+#define user_log_dlm_error(_func, _stat, _lockres) do { \
+ mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \
+ "resource %.*s: %s\n", dlm_errname(_stat), _func, \
+ _lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \
} while (0)
/* WARNING: This function lives in a world where the only three lock
@@ -127,21 +127,22 @@ static void user_ast(void *opaque)
struct user_lock_res *lockres = opaque;
struct dlm_lockstatus *lksb;
- mlog(0, "AST fired for lockres %s\n", lockres->l_name);
+ mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen,
+ lockres->l_name);
spin_lock(&lockres->l_lock);
lksb = &(lockres->l_lksb);
if (lksb->status != DLM_NORMAL) {
- mlog(ML_ERROR, "lksb status value of %u on lockres %s\n",
- lksb->status, lockres->l_name);
+ mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n",
+ lksb->status, lockres->l_namelen, lockres->l_name);
spin_unlock(&lockres->l_lock);
return;
}
mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
- "Lockres %s, requested ivmode. flags 0x%x\n",
- lockres->l_name, lockres->l_flags);
+ "Lockres %.*s, requested ivmode. flags 0x%x\n",
+ lockres->l_namelen, lockres->l_name, lockres->l_flags);
/* we're downconverting. */
if (lockres->l_requested < lockres->l_level) {
@@ -213,8 +214,8 @@ static void user_bast(void *opaque, int level)
{
struct user_lock_res *lockres = opaque;
- mlog(0, "Blocking AST fired for lockres %s. Blocking level %d\n",
- lockres->l_name, level);
+ mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n",
+ lockres->l_namelen, lockres->l_name, level);
spin_lock(&lockres->l_lock);
lockres->l_flags |= USER_LOCK_BLOCKED;
@@ -231,7 +232,8 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
{
struct user_lock_res *lockres = opaque;
- mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
+ mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen,
+ lockres->l_name);
if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
mlog(ML_ERROR, "Dlm returns status %d\n", status);
@@ -244,8 +246,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
&& !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
lockres->l_level = LKM_IVMODE;
} else if (status == DLM_CANCELGRANT) {
- mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
- lockres->l_name, lockres->l_flags);
/* We tried to cancel a convert request, but it was
* already granted. Don't clear the busy flag - the
* ast should've done this already. */
@@ -255,8 +255,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
} else {
BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
/* Cancel succeeded, we want to re-queue */
- mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
- lockres->l_name, lockres->l_flags);
lockres->l_requested = LKM_IVMODE; /* cancel an
* upconvert
* request. */
@@ -287,13 +285,14 @@ static void user_dlm_unblock_lock(void *opaque)
struct user_lock_res *lockres = (struct user_lock_res *) opaque;
struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
- mlog(0, "processing lockres %s\n", lockres->l_name);
+ mlog(0, "processing lockres %.*s\n", lockres->l_namelen,
+ lockres->l_name);
spin_lock(&lockres->l_lock);
mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
- "Lockres %s, flags 0x%x\n",
- lockres->l_name, lockres->l_flags);
+ "Lockres %.*s, flags 0x%x\n",
+ lockres->l_namelen, lockres->l_name, lockres->l_flags);
/* notice that we don't clear USER_LOCK_BLOCKED here. If it's
* set, we want user_ast clear it. */
@@ -305,22 +304,16 @@ static void user_dlm_unblock_lock(void *opaque)
* flag, and finally we might get another bast which re-queues
* us before our ast for the downconvert is called. */
if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
- mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
- lockres->l_name, lockres->l_flags);
spin_unlock(&lockres->l_lock);
goto drop_ref;
}
if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
- mlog(0, "lock is in teardown so we do nothing\n");
spin_unlock(&lockres->l_lock);
goto drop_ref;
}
if (lockres->l_flags & USER_LOCK_BUSY) {
- mlog(0, "Cancel lock %s, flags 0x%x\n",
- lockres->l_name, lockres->l_flags);
-
if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
spin_unlock(&lockres->l_lock);
goto drop_ref;
@@ -372,6 +365,7 @@ static void user_dlm_unblock_lock(void *opaque)
&lockres->l_lksb,
LKM_CONVERT|LKM_VALBLK,
lockres->l_name,
+ lockres->l_namelen,
user_ast,
lockres,
user_bast);
@@ -420,16 +414,16 @@ int user_dlm_cluster_lock(struct user_lock_res *lockres,
if (level != LKM_EXMODE &&
level != LKM_PRMODE) {
- mlog(ML_ERROR, "lockres %s: invalid request!\n",
- lockres->l_name);
+ mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
+ lockres->l_namelen, lockres->l_name);
status = -EINVAL;
goto bail;
}
- mlog(0, "lockres %s: asking for %s lock, passed flags = 0x%x\n",
- lockres->l_name,
- (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE",
- lkm_flags);
+ mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n",
+ lockres->l_namelen, lockres->l_name,
+ (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE",
+ lkm_flags);
again:
if (signal_pending(current)) {
@@ -474,15 +468,13 @@ again:
BUG_ON(level == LKM_IVMODE);
BUG_ON(level == LKM_NLMODE);
- mlog(0, "lock %s, get lock from %d to level = %d\n",
- lockres->l_name, lockres->l_level, level);
-
/* call dlm_lock to upgrade lock now */
status = dlmlock(dlm,
level,
&lockres->l_lksb,
local_flags,
lockres->l_name,
+ lockres->l_namelen,
user_ast,
lockres,
user_bast);
@@ -498,9 +490,6 @@ again:
goto bail;
}
- mlog(0, "lock %s, successfull return from dlmlock\n",
- lockres->l_name);
-
user_wait_on_busy_lock(lockres);
goto again;
}
@@ -508,9 +497,6 @@ again:
user_dlm_inc_holders(lockres, level);
spin_unlock(&lockres->l_lock);
- mlog(0, "lockres %s: Got %s lock!\n", lockres->l_name,
- (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
-
status = 0;
bail:
return status;
@@ -538,13 +524,11 @@ void user_dlm_cluster_unlock(struct user_lock_res *lockres,
{
if (level != LKM_EXMODE &&
level != LKM_PRMODE) {
- mlog(ML_ERROR, "lockres %s: invalid request!\n", lockres->l_name);
+ mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
+ lockres->l_namelen, lockres->l_name);
return;
}
- mlog(0, "lockres %s: dropping %s lock\n", lockres->l_name,
- (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
-
spin_lock(&lockres->l_lock);
user_dlm_dec_holders(lockres, level);
__user_dlm_cond_queue_lockres(lockres);
@@ -602,6 +586,7 @@ void user_dlm_lock_res_init(struct user_lock_res *lockres,
memcpy(lockres->l_name,
dentry->d_name.name,
dentry->d_name.len);
+ lockres->l_namelen = dentry->d_name.len;
}
int user_dlm_destroy_lock(struct user_lock_res *lockres)
@@ -609,11 +594,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
int status = -EBUSY;
struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
- mlog(0, "asked to destroy %s\n", lockres->l_name);
+ mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name);
spin_lock(&lockres->l_lock);
if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
- mlog(0, "Lock is already torn down\n");
spin_unlock(&lockres->l_lock);
return 0;
}
@@ -623,8 +607,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
while (lockres->l_flags & USER_LOCK_BUSY) {
spin_unlock(&lockres->l_lock);
- mlog(0, "lock %s is busy\n", lockres->l_name);
-
user_wait_on_busy_lock(lockres);
spin_lock(&lockres->l_lock);
@@ -632,14 +614,12 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
if (lockres->l_ro_holders || lockres->l_ex_holders) {
spin_unlock(&lockres->l_lock);
- mlog(0, "lock %s has holders\n", lockres->l_name);
goto bail;
}
status = 0;
if (!(lockres->l_flags & USER_LOCK_ATTACHED)) {
spin_unlock(&lockres->l_lock);
- mlog(0, "lock %s is not attached\n", lockres->l_name);
goto bail;
}
@@ -647,7 +627,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
lockres->l_flags |= USER_LOCK_BUSY;
spin_unlock(&lockres->l_lock);
- mlog(0, "unlocking lockres %s\n", lockres->l_name);
status = dlmunlock(dlm,
&lockres->l_lksb,
LKM_VALBLK,
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h
index 04178bc40b76..c400e93bbf79 100644
--- a/fs/ocfs2/dlm/userdlm.h
+++ b/fs/ocfs2/dlm/userdlm.h
@@ -53,6 +53,7 @@ struct user_lock_res {
#define USER_DLM_LOCK_ID_MAX_LEN 32
char l_name[USER_DLM_LOCK_ID_MAX_LEN];
+ int l_namelen;
int l_level;
unsigned int l_ro_holders;
unsigned int l_ex_holders;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 151b41781eab..8801e41afe80 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -46,6 +46,7 @@
#include "ocfs2.h"
#include "alloc.h"
+#include "dcache.h"
#include "dlmglue.h"
#include "extent_map.h"
#include "heartbeat.h"
@@ -66,78 +67,161 @@ struct ocfs2_mask_waiter {
unsigned long mw_goal;
};
-static void ocfs2_inode_ast_func(void *opaque);
-static void ocfs2_inode_bast_func(void *opaque,
- int level);
-static void ocfs2_super_ast_func(void *opaque);
-static void ocfs2_super_bast_func(void *opaque,
- int level);
-static void ocfs2_rename_ast_func(void *opaque);
-static void ocfs2_rename_bast_func(void *opaque,
- int level);
-
-/* so far, all locks have gotten along with the same unlock ast */
-static void ocfs2_unlock_ast_func(void *opaque,
- enum dlm_status status);
-static int ocfs2_do_unblock_meta(struct inode *inode,
- int *requeue);
-static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
- int *requeue);
-static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
- int *requeue);
-static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
- int *requeue);
-static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
- int *requeue);
-typedef void (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int);
-static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
- struct ocfs2_lock_res *lockres,
- int *requeue,
- ocfs2_convert_worker_t *worker);
+static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
+static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
+/*
+ * Return value from ->downconvert_worker functions.
+ *
+ * These control the precise actions of ocfs2_unblock_lock()
+ * and ocfs2_process_blocked_lock()
+ *
+ */
+enum ocfs2_unblock_action {
+ UNBLOCK_CONTINUE = 0, /* Continue downconvert */
+ UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire
+ * ->post_unlock callback */
+ UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire
+ * ->post_unlock() callback. */
+};
+
+struct ocfs2_unblock_ctl {
+ int requeue;
+ enum ocfs2_unblock_action unblock_action;
+};
+
+static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
+ int new_level);
+static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
+
+static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
+ int blocking);
+
+static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
+ int blocking);
+
+static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
+ struct ocfs2_lock_res *lockres);
+
+/*
+ * OCFS2 Lock Resource Operations
+ *
+ * These fine tune the behavior of the generic dlmglue locking infrastructure.
+ *
+ * The most basic of lock types can point ->l_priv to their respective
+ * struct ocfs2_super and allow the default actions to manage things.
+ *
+ * Right now, each lock type also needs to implement an init function,
+ * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
+ * should be called when the lock is no longer needed (i.e., object
+ * destruction time).
+ */
struct ocfs2_lock_res_ops {
- void (*ast)(void *);
- void (*bast)(void *, int);
- void (*unlock_ast)(void *, enum dlm_status);
- int (*unblock)(struct ocfs2_lock_res *, int *);
+ /*
+ * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
+ * this callback if ->l_priv is not an ocfs2_super pointer
+ */
+ struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
+
+ /*
+ * Optionally called in the downconvert (or "vote") thread
+ * after a successful downconvert. The lockres will not be
+ * referenced after this callback is called, so it is safe to
+ * free memory, etc.
+ *
+ * The exact semantics of when this is called are controlled
+ * by ->downconvert_worker()
+ */
+ void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
+
+ /*
+ * Allow a lock type to add checks to determine whether it is
+ * safe to downconvert a lock. Return 0 to re-queue the
+ * downconvert at a later time, nonzero to continue.
+ *
+ * For most locks, the default checks that there are no
+ * incompatible holders are sufficient.
+ *
+ * Called with the lockres spinlock held.
+ */
+ int (*check_downconvert)(struct ocfs2_lock_res *, int);
+
+ /*
+ * Allows a lock type to populate the lock value block. This
+ * is called on downconvert, and when we drop a lock.
+ *
+ * Locks that want to use this should set LOCK_TYPE_USES_LVB
+ * in the flags field.
+ *
+ * Called with the lockres spinlock held.
+ */
+ void (*set_lvb)(struct ocfs2_lock_res *);
+
+ /*
+ * Called from the downconvert thread when it is determined
+ * that a lock will be downconverted. This is called without
+ * any locks held so the function can do work that might
+ * schedule (syncing out data, etc).
+ *
+ * This should return any one of the ocfs2_unblock_action
+ * values, depending on what it wants the thread to do.
+ */
+ int (*downconvert_worker)(struct ocfs2_lock_res *, int);
+
+ /*
+ * LOCK_TYPE_* flags which describe the specific requirements
+ * of a lock type. Descriptions of each individual flag follow.
+ */
+ int flags;
};
+/*
+ * Some locks want to "refresh" potentially stale data when a
+ * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
+ * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
+ * individual lockres l_flags member from the ast function. It is
+ * expected that the locking wrapper will clear the
+ * OCFS2_LOCK_NEEDS_REFRESH flag when done.
+ */
+#define LOCK_TYPE_REQUIRES_REFRESH 0x1
+
+/*
+ * Indicate that a lock type makes use of the lock value block. The
+ * ->set_lvb lock type callback must be defined.
+ */
+#define LOCK_TYPE_USES_LVB 0x2
+
static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
- .ast = ocfs2_inode_ast_func,
- .bast = ocfs2_inode_bast_func,
- .unlock_ast = ocfs2_unlock_ast_func,
- .unblock = ocfs2_unblock_inode_lock,
+ .get_osb = ocfs2_get_inode_osb,
+ .flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
- .ast = ocfs2_inode_ast_func,
- .bast = ocfs2_inode_bast_func,
- .unlock_ast = ocfs2_unlock_ast_func,
- .unblock = ocfs2_unblock_meta,
+ .get_osb = ocfs2_get_inode_osb,
+ .check_downconvert = ocfs2_check_meta_downconvert,
+ .set_lvb = ocfs2_set_meta_lvb,
+ .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
- int blocking);
-
static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
- .ast = ocfs2_inode_ast_func,
- .bast = ocfs2_inode_bast_func,
- .unlock_ast = ocfs2_unlock_ast_func,
- .unblock = ocfs2_unblock_data,
+ .get_osb = ocfs2_get_inode_osb,
+ .downconvert_worker = ocfs2_data_convert_worker,
+ .flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_super_lops = {
- .ast = ocfs2_super_ast_func,
- .bast = ocfs2_super_bast_func,
- .unlock_ast = ocfs2_unlock_ast_func,
- .unblock = ocfs2_unblock_osb_lock,
+ .flags = LOCK_TYPE_REQUIRES_REFRESH,
};
static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
- .ast = ocfs2_rename_ast_func,
- .bast = ocfs2_rename_bast_func,
- .unlock_ast = ocfs2_unlock_ast_func,
- .unblock = ocfs2_unblock_osb_lock,
+ .flags = 0,
+};
+
+static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
+ .get_osb = ocfs2_get_dentry_osb,
+ .post_unlock = ocfs2_dentry_post_unlock,
+ .downconvert_worker = ocfs2_dentry_convert_worker,
+ .flags = 0,
};
static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
@@ -147,29 +231,26 @@ static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
lockres->l_type == OCFS2_LOCK_TYPE_RW;
}
-static inline int ocfs2_is_super_lock(struct ocfs2_lock_res *lockres)
+static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
{
- return lockres->l_type == OCFS2_LOCK_TYPE_SUPER;
-}
+ BUG_ON(!ocfs2_is_inode_lock(lockres));
-static inline int ocfs2_is_rename_lock(struct ocfs2_lock_res *lockres)
-{
- return lockres->l_type == OCFS2_LOCK_TYPE_RENAME;
+ return (struct inode *) lockres->l_priv;
}
-static inline struct ocfs2_super *ocfs2_lock_res_super(struct ocfs2_lock_res *lockres)
+static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
{
- BUG_ON(!ocfs2_is_super_lock(lockres)
- && !ocfs2_is_rename_lock(lockres));
+ BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
- return (struct ocfs2_super *) lockres->l_priv;
+ return (struct ocfs2_dentry_lock *)lockres->l_priv;
}
-static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
+static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
{
- BUG_ON(!ocfs2_is_inode_lock(lockres));
+ if (lockres->l_ops->get_osb)
+ return lockres->l_ops->get_osb(lockres);
- return (struct inode *) lockres->l_priv;
+ return (struct ocfs2_super *)lockres->l_priv;
}
static int ocfs2_lock_create(struct ocfs2_super *osb,
@@ -200,25 +281,6 @@ static int ocfs2_meta_lock_update(struct inode *inode,
struct buffer_head **bh);
static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
static inline int ocfs2_highest_compat_lock_level(int level);
-static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode,
- struct ocfs2_lock_res *lockres,
- int new_level);
-
-static char *ocfs2_lock_type_strings[] = {
- [OCFS2_LOCK_TYPE_META] = "Meta",
- [OCFS2_LOCK_TYPE_DATA] = "Data",
- [OCFS2_LOCK_TYPE_SUPER] = "Super",
- [OCFS2_LOCK_TYPE_RENAME] = "Rename",
- /* Need to differntiate from [R]ename.. serializing writes is the
- * important job it does, anyway. */
- [OCFS2_LOCK_TYPE_RW] = "Write/Read",
-};
-
-static char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
-{
- mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
- return ocfs2_lock_type_strings[type];
-}
static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
u64 blkno,
@@ -265,13 +327,9 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
struct ocfs2_lock_res *res,
enum ocfs2_lock_type type,
- u64 blkno,
- u32 generation,
struct ocfs2_lock_res_ops *ops,
void *priv)
{
- ocfs2_build_lock_name(type, blkno, generation, res->l_name);
-
res->l_type = type;
res->l_ops = ops;
res->l_priv = priv;
@@ -299,6 +357,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
enum ocfs2_lock_type type,
+ unsigned int generation,
struct inode *inode)
{
struct ocfs2_lock_res_ops *ops;
@@ -319,9 +378,73 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
break;
};
- ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type,
- OCFS2_I(inode)->ip_blkno,
- inode->i_generation, ops, inode);
+ ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
+ generation, res->l_name);
+ ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
+}
+
+static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
+{
+ struct inode *inode = ocfs2_lock_res_inode(lockres);
+
+ return OCFS2_SB(inode->i_sb);
+}
+
+static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
+{
+ __be64 inode_blkno_be;
+
+ memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
+ sizeof(__be64));
+
+ return be64_to_cpu(inode_blkno_be);
+}
+
+static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
+{
+ struct ocfs2_dentry_lock *dl = lockres->l_priv;
+
+ return OCFS2_SB(dl->dl_inode->i_sb);
+}
+
+void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
+ u64 parent, struct inode *inode)
+{
+ int len;
+ u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
+ __be64 inode_blkno_be = cpu_to_be64(inode_blkno);
+ struct ocfs2_lock_res *lockres = &dl->dl_lockres;
+
+ ocfs2_lock_res_init_once(lockres);
+
+ /*
+ * Unfortunately, the standard lock naming scheme won't work
+ * here because we have two 16 byte values to use. Instead,
+ * we'll stuff the inode number as a binary value. We still
+ * want error prints to show something without garbling the
+ * display, so drop a null byte in there before the inode
+ * number. A future version of OCFS2 will likely use all
+ * binary lock names. The stringified names have been a
+ * tremendous aid in debugging, but now that the debugfs
+ * interface exists, we can mangle things there if need be.
+ *
+ * NOTE: We also drop the standard "pad" value (the total lock
+ * name size stays the same though - the last part is all
+ * zeros due to the memset in ocfs2_lock_res_init_once()
+ */
+ len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
+ "%c%016llx",
+ ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
+ (long long)parent);
+
+ BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
+
+ memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
+ sizeof(__be64));
+
+ ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
+ OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
+ dl);
}
static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
@@ -330,8 +453,9 @@ static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
/* Superblock lockres doesn't come from a slab so we call init
* once on it manually. */
ocfs2_lock_res_init_once(res);
+ ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
+ 0, res->l_name);
ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
- OCFS2_SUPER_BLOCK_BLKNO, 0,
&ocfs2_super_lops, osb);
}
@@ -341,7 +465,8 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
/* Rename lockres doesn't come from a slab so we call init
* once on it manually. */
ocfs2_lock_res_init_once(res);
- ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 0, 0,
+ ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
+ ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
&ocfs2_rename_lops, osb);
}
@@ -495,7 +620,8 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
* information is already up to data. Convert from NL to
* *anything* however should mark ourselves as needing an
* update */
- if (lockres->l_level == LKM_NLMODE)
+ if (lockres->l_level == LKM_NLMODE &&
+ lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
lockres->l_level = lockres->l_requested;
@@ -512,7 +638,8 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
if (lockres->l_requested > LKM_NLMODE &&
- !(lockres->l_flags & OCFS2_LOCK_LOCAL))
+ !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
+ lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
lockres->l_level = lockres->l_requested;
@@ -522,68 +649,6 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
mlog_exit_void();
}
-static void ocfs2_inode_ast_func(void *opaque)
-{
- struct ocfs2_lock_res *lockres = opaque;
- struct inode *inode;
- struct dlm_lockstatus *lksb;
- unsigned long flags;
-
- mlog_entry_void();
-
- inode = ocfs2_lock_res_inode(lockres);
-
- mlog(0, "AST fired for inode %llu, l_action = %u, type = %s\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno, lockres->l_action,
- ocfs2_lock_type_string(lockres->l_type));
-
- BUG_ON(!ocfs2_is_inode_lock(lockres));
-
- spin_lock_irqsave(&lockres->l_lock, flags);
-
- lksb = &(lockres->l_lksb);
- if (lksb->status != DLM_NORMAL) {
- mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u "
- "on inode %llu\n", lksb->status,
- (unsigned long long)OCFS2_I(inode)->ip_blkno);
- spin_unlock_irqrestore(&lockres->l_lock, flags);
- mlog_exit_void();
- return;
- }
-
- switch(lockres->l_action) {
- case OCFS2_AST_ATTACH:
- ocfs2_generic_handle_attach_action(lockres);
- lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
- break;
- case OCFS2_AST_CONVERT:
- ocfs2_generic_handle_convert_action(lockres);
- break;
- case OCFS2_AST_DOWNCONVERT:
- ocfs2_generic_handle_downconvert_action(lockres);
- break;
- default:
- mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
- "lockres flags = 0x%lx, unlock action: %u\n",
- lockres->l_name, lockres->l_action, lockres->l_flags,
- lockres->l_unlock_action);
-
- BUG();
- }
-
- /* data and rw locking ignores refresh flag for now. */
- if (lockres->l_type != OCFS2_LOCK_TYPE_META)
- lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
-
- /* set it to something invalid so if we get called again we
- * can catch it. */
- lockres->l_action = OCFS2_AST_INVALID;
- spin_unlock_irqrestore(&lockres->l_lock, flags);
- wake_up(&lockres->l_event);
-
- mlog_exit_void();
-}
-
static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
int level)
{
@@ -610,54 +675,33 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
return needs_downconvert;
}
-static void ocfs2_generic_bast_func(struct ocfs2_super *osb,
- struct ocfs2_lock_res *lockres,
- int level)
+static void ocfs2_blocking_ast(void *opaque, int level)
{
+ struct ocfs2_lock_res *lockres = opaque;
+ struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
int needs_downconvert;
unsigned long flags;
- mlog_entry_void();
-
BUG_ON(level <= LKM_NLMODE);
+ mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
+ lockres->l_name, level, lockres->l_level,
+ ocfs2_lock_type_string(lockres->l_type));
+
spin_lock_irqsave(&lockres->l_lock, flags);
needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
if (needs_downconvert)
ocfs2_schedule_blocked_lock(osb, lockres);
spin_unlock_irqrestore(&lockres->l_lock, flags);
- ocfs2_kick_vote_thread(osb);
-
wake_up(&lockres->l_event);
- mlog_exit_void();
-}
-
-static void ocfs2_inode_bast_func(void *opaque, int level)
-{
- struct ocfs2_lock_res *lockres = opaque;
- struct inode *inode;
- struct ocfs2_super *osb;
- mlog_entry_void();
-
- BUG_ON(!ocfs2_is_inode_lock(lockres));
-
- inode = ocfs2_lock_res_inode(lockres);
- osb = OCFS2_SB(inode->i_sb);
-
- mlog(0, "BAST fired for inode %llu, blocking %d, level %d type %s\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno, level,
- lockres->l_level, ocfs2_lock_type_string(lockres->l_type));
-
- ocfs2_generic_bast_func(osb, lockres, level);
-
- mlog_exit_void();
+ ocfs2_kick_vote_thread(osb);
}
-static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
- int ignore_refresh)
+static void ocfs2_locking_ast(void *opaque)
{
+ struct ocfs2_lock_res *lockres = opaque;
struct dlm_lockstatus *lksb = &lockres->l_lksb;
unsigned long flags;
@@ -673,6 +717,7 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
switch(lockres->l_action) {
case OCFS2_AST_ATTACH:
ocfs2_generic_handle_attach_action(lockres);
+ lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
break;
case OCFS2_AST_CONVERT:
ocfs2_generic_handle_convert_action(lockres);
@@ -681,80 +726,19 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
ocfs2_generic_handle_downconvert_action(lockres);
break;
default:
+ mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
+ "lockres flags = 0x%lx, unlock action: %u\n",
+ lockres->l_name, lockres->l_action, lockres->l_flags,
+ lockres->l_unlock_action);
BUG();
}
- if (ignore_refresh)
- lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
-
/* set it to something invalid so if we get called again we
* can catch it. */
lockres->l_action = OCFS2_AST_INVALID;
- spin_unlock_irqrestore(&lockres->l_lock, flags);
wake_up(&lockres->l_event);
-}
-
-static void ocfs2_super_ast_func(void *opaque)
-{
- struct ocfs2_lock_res *lockres = opaque;
-
- mlog_entry_void();
- mlog(0, "Superblock AST fired\n");
-
- BUG_ON(!ocfs2_is_super_lock(lockres));
- ocfs2_generic_ast_func(lockres, 0);
-
- mlog_exit_void();
-}
-
-static void ocfs2_super_bast_func(void *opaque,
- int level)
-{
- struct ocfs2_lock_res *lockres = opaque;
- struct ocfs2_super *osb;
-
- mlog_entry_void();
- mlog(0, "Superblock BAST fired\n");
-
- BUG_ON(!ocfs2_is_super_lock(lockres));
- osb = ocfs2_lock_res_super(lockres);
- ocfs2_generic_bast_func(osb, lockres, level);
-
- mlog_exit_void();
-}
-
-static void ocfs2_rename_ast_func(void *opaque)
-{
- struct ocfs2_lock_res *lockres = opaque;
-
- mlog_entry_void();
-
- mlog(0, "Rename AST fired\n");
-
- BUG_ON(!ocfs2_is_rename_lock(lockres));
-
- ocfs2_generic_ast_func(lockres, 1);
-
- mlog_exit_void();
-}
-
-static void ocfs2_rename_bast_func(void *opaque,
- int level)
-{
- struct ocfs2_lock_res *lockres = opaque;
- struct ocfs2_super *osb;
-
- mlog_entry_void();
-
- mlog(0, "Rename BAST fired\n");
-
- BUG_ON(!ocfs2_is_rename_lock(lockres));
-
- osb = ocfs2_lock_res_super(lockres);
- ocfs2_generic_bast_func(osb, lockres, level);
-
- mlog_exit_void();
+ spin_unlock_irqrestore(&lockres->l_lock, flags);
}
static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
@@ -810,9 +794,10 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
&lockres->l_lksb,
dlm_flags,
lockres->l_name,
- lockres->l_ops->ast,
+ OCFS2_LOCK_ID_MAX_LEN - 1,
+ ocfs2_locking_ast,
lockres,
- lockres->l_ops->bast);
+ ocfs2_blocking_ast);
if (status != DLM_NORMAL) {
ocfs2_log_dlm_error("dlmlock", status, lockres);
ret = -EINVAL;
@@ -930,6 +915,9 @@ static int ocfs2_cluster_lock(struct ocfs2_super *osb,
ocfs2_init_mask_waiter(&mw);
+ if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
+ lkm_flags |= LKM_VALBLK;
+
again:
wait = 0;
@@ -997,11 +985,12 @@ again:
status = dlmlock(osb->dlm,
level,
&lockres->l_lksb,
- lkm_flags|LKM_CONVERT|LKM_VALBLK,
+ lkm_flags|LKM_CONVERT,
lockres->l_name,
- lockres->l_ops->ast,
+ OCFS2_LOCK_ID_MAX_LEN - 1,
+ ocfs2_locking_ast,
lockres,
- lockres->l_ops->bast);
+ ocfs2_blocking_ast);
if (status != DLM_NORMAL) {
if ((lkm_flags & LKM_NOQUEUE) &&
(status == DLM_NOTQUEUED))
@@ -1074,18 +1063,21 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
mlog_exit_void();
}
-static int ocfs2_create_new_inode_lock(struct inode *inode,
- struct ocfs2_lock_res *lockres)
+int ocfs2_create_new_lock(struct ocfs2_super *osb,
+ struct ocfs2_lock_res *lockres,
+ int ex,
+ int local)
{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ int level = ex ? LKM_EXMODE : LKM_PRMODE;
unsigned long flags;
+ int lkm_flags = local ? LKM_LOCAL : 0;
spin_lock_irqsave(&lockres->l_lock, flags);
BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
spin_unlock_irqrestore(&lockres->l_lock, flags);
- return ocfs2_lock_create(osb, lockres, LKM_EXMODE, LKM_LOCAL);
+ return ocfs2_lock_create(osb, lockres, level, lkm_flags);
}
/* Grants us an EX lock on the data and metadata resources, skipping
@@ -1097,6 +1089,7 @@ static int ocfs2_create_new_inode_lock(struct inode *inode,
int ocfs2_create_new_inode_locks(struct inode *inode)
{
int ret;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
BUG_ON(!inode);
BUG_ON(!ocfs2_inode_is_new(inode));
@@ -1113,22 +1106,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
* on a resource which has an invalid one -- we'll set it
* valid when we release the EX. */
- ret = ocfs2_create_new_inode_lock(inode,
- &OCFS2_I(inode)->ip_rw_lockres);
+ ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
if (ret) {
mlog_errno(ret);
goto bail;
}
- ret = ocfs2_create_new_inode_lock(inode,
- &OCFS2_I(inode)->ip_meta_lockres);
+ /*
+ * We don't want to use LKM_LOCAL on a meta data lock as they
+ * don't use a generation in their lock names.
+ */
+ ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0);
if (ret) {
mlog_errno(ret);
goto bail;
}
- ret = ocfs2_create_new_inode_lock(inode,
- &OCFS2_I(inode)->ip_data_lockres);
+ ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1);
if (ret) {
mlog_errno(ret);
goto bail;
@@ -1317,7 +1311,17 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
- lvb->lvb_version = cpu_to_be32(OCFS2_LVB_VERSION);
+ /*
+ * Invalidate the LVB of a deleted inode - this way other
+ * nodes are forced to go to disk and discover the new inode
+ * status.
+ */
+ if (oi->ip_flags & OCFS2_INODE_DELETED) {
+ lvb->lvb_version = 0;
+ goto out;
+ }
+
+ lvb->lvb_version = OCFS2_LVB_VERSION;
lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
lvb->lvb_iuid = cpu_to_be32(inode->i_uid);
@@ -1331,7 +1335,9 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
lvb->lvb_imtime_packed =
cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
+ lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
+out:
mlog_meta_lvb(0, lockres);
mlog_exit_void();
@@ -1386,11 +1392,13 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
mlog_exit_void();
}
-static inline int ocfs2_meta_lvb_is_trustable(struct ocfs2_lock_res *lockres)
+static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
+ struct ocfs2_lock_res *lockres)
{
struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
- if (be32_to_cpu(lvb->lvb_version) == OCFS2_LVB_VERSION)
+ if (lvb->lvb_version == OCFS2_LVB_VERSION
+ && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
return 1;
return 0;
}
@@ -1487,7 +1495,7 @@ static int ocfs2_meta_lock_update(struct inode *inode,
* map (directories, bitmap files, etc) */
ocfs2_extent_map_trunc(inode, 0);
- if (ocfs2_meta_lvb_is_trustable(lockres)) {
+ if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
mlog(0, "Trusting LVB on inode %llu\n",
(unsigned long long)oi->ip_blkno);
ocfs2_refresh_inode_from_lvb(inode);
@@ -1628,6 +1636,18 @@ int ocfs2_meta_lock_full(struct inode *inode,
wait_event(osb->recovery_event,
ocfs2_node_map_is_empty(osb, &osb->recovery_map));
+ /*
+ * We only see this flag if we're being called from
+ * ocfs2_read_locked_inode(). It means we're locking an inode
+ * which hasn't been populated yet, so clear the refresh flag
+ * and let the caller handle it.
+ */
+ if (inode->i_state & I_NEW) {
+ status = 0;
+ ocfs2_complete_lock_res_refresh(lockres, 0);
+ goto bail;
+ }
+
/* This is fun. The caller may want a bh back, or it may
* not. ocfs2_meta_lock_update definitely wants one in, but
* may or may not read one, depending on what's in the
@@ -1807,6 +1827,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE);
}
+int ocfs2_dentry_lock(struct dentry *dentry, int ex)
+{
+ int ret;
+ int level = ex ? LKM_EXMODE : LKM_PRMODE;
+ struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+ struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
+
+ BUG_ON(!dl);
+
+ if (ocfs2_is_hard_readonly(osb))
+ return -EROFS;
+
+ ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
+ if (ret < 0)
+ mlog_errno(ret);
+
+ return ret;
+}
+
+void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
+{
+ int level = ex ? LKM_EXMODE : LKM_PRMODE;
+ struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+ struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
+
+ ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
+}
+
/* Reference counting of the dlm debug structure. We want this because
* open references on the debug inodes can live on after a mount, so
* we can't rely on the ocfs2_super to always exist. */
@@ -1937,9 +1985,16 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
if (!lockres)
return -EINVAL;
- seq_printf(m, "0x%x\t"
- "%.*s\t"
- "%d\t"
+ seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
+
+ if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
+ seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
+ lockres->l_name,
+ (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
+ else
+ seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
+
+ seq_printf(m, "%d\t"
"0x%lx\t"
"0x%x\t"
"0x%x\t"
@@ -1947,8 +2002,6 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
"%u\t"
"%d\t"
"%d\t",
- OCFS2_DLM_DEBUG_STR_VERSION,
- OCFS2_LOCK_ID_MAX_LEN, lockres->l_name,
lockres->l_level,
lockres->l_flags,
lockres->l_action,
@@ -1999,7 +2052,7 @@ static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
mlog_errno(ret);
goto out;
}
- osb = (struct ocfs2_super *) inode->u.generic_ip;
+ osb = inode->i_private;
ocfs2_get_dlm_debug(osb->osb_dlm_debug);
priv->p_dlm_debug = osb->osb_dlm_debug;
INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
@@ -2138,7 +2191,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb)
mlog_exit_void();
}
-static void ocfs2_unlock_ast_func(void *opaque, enum dlm_status status)
+static void ocfs2_unlock_ast(void *opaque, enum dlm_status status)
{
struct ocfs2_lock_res *lockres = opaque;
unsigned long flags;
@@ -2194,24 +2247,20 @@ complete_unlock:
mlog_exit_void();
}
-typedef void (ocfs2_pre_drop_cb_t)(struct ocfs2_lock_res *, void *);
-
-struct drop_lock_cb {
- ocfs2_pre_drop_cb_t *drop_func;
- void *drop_data;
-};
-
static int ocfs2_drop_lock(struct ocfs2_super *osb,
- struct ocfs2_lock_res *lockres,
- struct drop_lock_cb *dcb)
+ struct ocfs2_lock_res *lockres)
{
enum dlm_status status;
unsigned long flags;
+ int lkm_flags = 0;
/* We didn't get anywhere near actually using this lockres. */
if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
goto out;
+ if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
+ lkm_flags |= LKM_VALBLK;
+
spin_lock_irqsave(&lockres->l_lock, flags);
mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
@@ -2234,8 +2283,12 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
spin_lock_irqsave(&lockres->l_lock, flags);
}
- if (dcb)
- dcb->drop_func(lockres, dcb->drop_data);
+ if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
+ if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
+ lockres->l_level == LKM_EXMODE &&
+ !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
+ lockres->l_ops->set_lvb(lockres);
+ }
if (lockres->l_flags & OCFS2_LOCK_BUSY)
mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
@@ -2261,8 +2314,8 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
mlog(0, "lock %s\n", lockres->l_name);
- status = dlmunlock(osb->dlm, &lockres->l_lksb, LKM_VALBLK,
- lockres->l_ops->unlock_ast, lockres);
+ status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags,
+ ocfs2_unlock_ast, lockres);
if (status != DLM_NORMAL) {
ocfs2_log_dlm_error("dlmunlock", status, lockres);
mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
@@ -2309,43 +2362,26 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
spin_unlock_irqrestore(&lockres->l_lock, flags);
}
-static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
+void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
+ struct ocfs2_lock_res *lockres)
{
- int status;
-
- mlog_entry_void();
-
- ocfs2_mark_lockres_freeing(&osb->osb_super_lockres);
-
- status = ocfs2_drop_lock(osb, &osb->osb_super_lockres, NULL);
- if (status < 0)
- mlog_errno(status);
-
- ocfs2_mark_lockres_freeing(&osb->osb_rename_lockres);
-
- status = ocfs2_drop_lock(osb, &osb->osb_rename_lockres, NULL);
- if (status < 0)
- mlog_errno(status);
+ int ret;
- mlog_exit(status);
+ ocfs2_mark_lockres_freeing(lockres);
+ ret = ocfs2_drop_lock(osb, lockres);
+ if (ret)
+ mlog_errno(ret);
}
-static void ocfs2_meta_pre_drop(struct ocfs2_lock_res *lockres, void *data)
+static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
{
- struct inode *inode = data;
-
- /* the metadata lock requires a bit more work as we have an
- * LVB to worry about. */
- if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
- lockres->l_level == LKM_EXMODE &&
- !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
- __ocfs2_stuff_meta_lvb(inode);
+ ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
+ ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
}
int ocfs2_drop_inode_locks(struct inode *inode)
{
int status, err;
- struct drop_lock_cb meta_dcb = { ocfs2_meta_pre_drop, inode, };
mlog_entry_void();
@@ -2353,24 +2389,21 @@ int ocfs2_drop_inode_locks(struct inode *inode)
* ocfs2_clear_inode has done it for us. */
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
- &OCFS2_I(inode)->ip_data_lockres,
- NULL);
+ &OCFS2_I(inode)->ip_data_lockres);
if (err < 0)
mlog_errno(err);
status = err;
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
- &OCFS2_I(inode)->ip_meta_lockres,
- &meta_dcb);
+ &OCFS2_I(inode)->ip_meta_lockres);
if (err < 0)
mlog_errno(err);
if (err < 0 && !status)
status = err;
err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
- &OCFS2_I(inode)->ip_rw_lockres,
- NULL);
+ &OCFS2_I(inode)->ip_rw_lockres);
if (err < 0)
mlog_errno(err);
if (err < 0 && !status)
@@ -2419,9 +2452,10 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
&lockres->l_lksb,
dlm_flags,
lockres->l_name,
- lockres->l_ops->ast,
+ OCFS2_LOCK_ID_MAX_LEN - 1,
+ ocfs2_locking_ast,
lockres,
- lockres->l_ops->bast);
+ ocfs2_blocking_ast);
if (status != DLM_NORMAL) {
ocfs2_log_dlm_error("dlmlock", status, lockres);
ret = -EINVAL;
@@ -2480,7 +2514,7 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
status = dlmunlock(osb->dlm,
&lockres->l_lksb,
LKM_CANCEL,
- lockres->l_ops->unlock_ast,
+ ocfs2_unlock_ast,
lockres);
if (status != DLM_NORMAL) {
ocfs2_log_dlm_error("dlmunlock", status, lockres);
@@ -2494,115 +2528,15 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
return ret;
}
-static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode,
- struct ocfs2_lock_res *lockres,
- int new_level)
-{
- int ret;
-
- mlog_entry_void();
-
- BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE);
-
- if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
- ret = 0;
- mlog(0, "lockres %s currently being refreshed -- backing "
- "off!\n", lockres->l_name);
- } else if (new_level == LKM_PRMODE)
- ret = !lockres->l_ex_holders &&
- ocfs2_inode_fully_checkpointed(inode);
- else /* Must be NLMODE we're converting to. */
- ret = !lockres->l_ro_holders && !lockres->l_ex_holders &&
- ocfs2_inode_fully_checkpointed(inode);
-
- mlog_exit(ret);
- return ret;
-}
-
-static int ocfs2_do_unblock_meta(struct inode *inode,
- int *requeue)
-{
- int new_level;
- int set_lvb = 0;
- int ret = 0;
- struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
- unsigned long flags;
-
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-
- mlog_entry_void();
-
- spin_lock_irqsave(&lockres->l_lock, flags);
-
- BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
-
- mlog(0, "l_level=%d, l_blocking=%d\n", lockres->l_level,
- lockres->l_blocking);
-
- BUG_ON(lockres->l_level != LKM_EXMODE &&
- lockres->l_level != LKM_PRMODE);
-
- if (lockres->l_flags & OCFS2_LOCK_BUSY) {
- *requeue = 1;
- ret = ocfs2_prepare_cancel_convert(osb, lockres);
- spin_unlock_irqrestore(&lockres->l_lock, flags);
- if (ret) {
- ret = ocfs2_cancel_convert(osb, lockres);
- if (ret < 0)
- mlog_errno(ret);
- }
- goto leave;
- }
-
- new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
-
- mlog(0, "l_level=%d, l_blocking=%d, new_level=%d\n",
- lockres->l_level, lockres->l_blocking, new_level);
-
- if (ocfs2_can_downconvert_meta_lock(inode, lockres, new_level)) {
- if (lockres->l_level == LKM_EXMODE)
- set_lvb = 1;
-
- /* If the lock hasn't been refreshed yet (rare), then
- * our memory inode values are old and we skip
- * stuffing the lvb. There's no need to actually clear
- * out the lvb here as it's value is still valid. */
- if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
- if (set_lvb)
- __ocfs2_stuff_meta_lvb(inode);
- } else
- mlog(0, "lockres %s: downconverting stale lock!\n",
- lockres->l_name);
-
- mlog(0, "calling ocfs2_downconvert_lock with l_level=%d, "
- "l_blocking=%d, new_level=%d\n",
- lockres->l_level, lockres->l_blocking, new_level);
-
- ocfs2_prepare_downconvert(lockres, new_level);
- spin_unlock_irqrestore(&lockres->l_lock, flags);
- ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb);
- goto leave;
- }
- if (!ocfs2_inode_fully_checkpointed(inode))
- ocfs2_start_checkpoint(osb);
-
- *requeue = 1;
- spin_unlock_irqrestore(&lockres->l_lock, flags);
- ret = 0;
-leave:
- mlog_exit(ret);
- return ret;
-}
-
-static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
- struct ocfs2_lock_res *lockres,
- int *requeue,
- ocfs2_convert_worker_t *worker)
+static int ocfs2_unblock_lock(struct ocfs2_super *osb,
+ struct ocfs2_lock_res *lockres,
+ struct ocfs2_unblock_ctl *ctl)
{
unsigned long flags;
int blocking;
int new_level;
int ret = 0;
+ int set_lvb = 0;
mlog_entry_void();
@@ -2612,7 +2546,7 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
recheck:
if (lockres->l_flags & OCFS2_LOCK_BUSY) {
- *requeue = 1;
+ ctl->requeue = 1;
ret = ocfs2_prepare_cancel_convert(osb, lockres);
spin_unlock_irqrestore(&lockres->l_lock, flags);
if (ret) {
@@ -2626,27 +2560,33 @@ recheck:
/* if we're blocking an exclusive and we have *any* holders,
* then requeue. */
if ((lockres->l_blocking == LKM_EXMODE)
- && (lockres->l_ex_holders || lockres->l_ro_holders)) {
- spin_unlock_irqrestore(&lockres->l_lock, flags);
- *requeue = 1;
- ret = 0;
- goto leave;
- }
+ && (lockres->l_ex_holders || lockres->l_ro_holders))
+ goto leave_requeue;
/* If it's a PR we're blocking, then only
* requeue if we've got any EX holders */
if (lockres->l_blocking == LKM_PRMODE &&
- lockres->l_ex_holders) {
- spin_unlock_irqrestore(&lockres->l_lock, flags);
- *requeue = 1;
- ret = 0;
- goto leave;
- }
+ lockres->l_ex_holders)
+ goto leave_requeue;
+
+ /*
+ * Can we get a lock in this state if the holder counts are
+ * zero? The meta data unblock code used to check this.
+ */
+ if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
+ && (lockres->l_flags & OCFS2_LOCK_REFRESHING))
+ goto leave_requeue;
+
+ new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
+
+ if (lockres->l_ops->check_downconvert
+ && !lockres->l_ops->check_downconvert(lockres, new_level))
+ goto leave_requeue;
/* If we get here, then we know that there are no more
* incompatible holders (and anyone asking for an incompatible
* lock is blocked). We can now downconvert the lock */
- if (!worker)
+ if (!lockres->l_ops->downconvert_worker)
goto downconvert;
/* Some lockres types want to do a bit of work before
@@ -2656,7 +2596,10 @@ recheck:
blocking = lockres->l_blocking;
spin_unlock_irqrestore(&lockres->l_lock, flags);
- worker(lockres, blocking);
+ ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
+
+ if (ctl->unblock_action == UNBLOCK_STOP_POST)
+ goto leave;
spin_lock_irqsave(&lockres->l_lock, flags);
if (blocking != lockres->l_blocking) {
@@ -2666,25 +2609,43 @@ recheck:
}
downconvert:
- *requeue = 0;
- new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
+ ctl->requeue = 0;
+
+ if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
+ if (lockres->l_level == LKM_EXMODE)
+ set_lvb = 1;
+
+ /*
+ * We only set the lvb if the lock has been fully
+ * refreshed - otherwise we risk setting stale
+ * data. Otherwise, there's no need to actually clear
+ * out the lvb here as it's value is still valid.
+ */
+ if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
+ lockres->l_ops->set_lvb(lockres);
+ }
ocfs2_prepare_downconvert(lockres, new_level);
spin_unlock_irqrestore(&lockres->l_lock, flags);
- ret = ocfs2_downconvert_lock(osb, lockres, new_level, 0);
+ ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb);
leave:
mlog_exit(ret);
return ret;
+
+leave_requeue:
+ spin_unlock_irqrestore(&lockres->l_lock, flags);
+ ctl->requeue = 1;
+
+ mlog_exit(0);
+ return 0;
}
-static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
- int blocking)
+static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
+ int blocking)
{
struct inode *inode;
struct address_space *mapping;
- mlog_entry_void();
-
inode = ocfs2_lock_res_inode(lockres);
mapping = inode->i_mapping;
@@ -2705,116 +2666,159 @@ static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
filemap_fdatawait(mapping);
}
- mlog_exit_void();
+ return UNBLOCK_CONTINUE;
}
-int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
- int *requeue)
+static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
+ int new_level)
{
- int status;
- struct inode *inode;
- struct ocfs2_super *osb;
-
- mlog_entry_void();
-
- inode = ocfs2_lock_res_inode(lockres);
- osb = OCFS2_SB(inode->i_sb);
-
- mlog(0, "unblock inode %llu\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ struct inode *inode = ocfs2_lock_res_inode(lockres);
+ int checkpointed = ocfs2_inode_fully_checkpointed(inode);
- status = ocfs2_generic_unblock_lock(osb,
- lockres,
- requeue,
- ocfs2_data_convert_worker);
- if (status < 0)
- mlog_errno(status);
+ BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE);
+ BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed);
- mlog(0, "inode %llu, requeue = %d\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue);
+ if (checkpointed)
+ return 1;
- mlog_exit(status);
- return status;
+ ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb));
+ return 0;
}
-static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
- int *requeue)
+static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
{
- int status;
- struct inode *inode;
-
- mlog_entry_void();
-
- mlog(0, "Unblock lockres %s\n", lockres->l_name);
-
- inode = ocfs2_lock_res_inode(lockres);
+ struct inode *inode = ocfs2_lock_res_inode(lockres);
- status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb),
- lockres,
- requeue,
- NULL);
- if (status < 0)
- mlog_errno(status);
-
- mlog_exit(status);
- return status;
+ __ocfs2_stuff_meta_lvb(inode);
}
-
-int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
- int *requeue)
+/*
+ * Does the final reference drop on our dentry lock. Right now this
+ * happens in the vote thread, but we could choose to simplify the
+ * dlmglue API and push these off to the ocfs2_wq in the future.
+ */
+static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
+ struct ocfs2_lock_res *lockres)
{
- int status;
- struct inode *inode;
-
- mlog_entry_void();
+ struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
+ ocfs2_dentry_lock_put(osb, dl);
+}
- inode = ocfs2_lock_res_inode(lockres);
+/*
+ * d_delete() matching dentries before the lock downconvert.
+ *
+ * At this point, any process waiting to destroy the
+ * dentry_lock due to last ref count is stopped by the
+ * OCFS2_LOCK_QUEUED flag.
+ *
+ * We have two potential problems
+ *
+ * 1) If we do the last reference drop on our dentry_lock (via dput)
+ * we'll wind up in ocfs2_release_dentry_lock(), waiting on
+ * the downconvert to finish. Instead we take an elevated
+ * reference and push the drop until after we've completed our
+ * unblock processing.
+ *
+ * 2) There might be another process with a final reference,
+ * waiting on us to finish processing. If this is the case, we
+ * detect it and exit out - there's no more dentries anyway.
+ */
+static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
+ int blocking)
+{
+ struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
+ struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
+ struct dentry *dentry;
+ unsigned long flags;
+ int extra_ref = 0;
- mlog(0, "unblock inode %llu\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ /*
+ * This node is blocking another node from getting a read
+ * lock. This happens when we've renamed within a
+ * directory. We've forced the other nodes to d_delete(), but
+ * we never actually dropped our lock because it's still
+ * valid. The downconvert code will retain a PR for this node,
+ * so there's no further work to do.
+ */
+ if (blocking == LKM_PRMODE)
+ return UNBLOCK_CONTINUE;
- status = ocfs2_do_unblock_meta(inode, requeue);
- if (status < 0)
- mlog_errno(status);
+ /*
+ * Mark this inode as potentially orphaned. The code in
+ * ocfs2_delete_inode() will figure out whether it actually
+ * needs to be freed or not.
+ */
+ spin_lock(&oi->ip_lock);
+ oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
+ spin_unlock(&oi->ip_lock);
- mlog(0, "inode %llu, requeue = %d\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue);
+ /*
+ * Yuck. We need to make sure however that the check of
+ * OCFS2_LOCK_FREEING and the extra reference are atomic with
+ * respect to a reference decrement or the setting of that
+ * flag.
+ */
+ spin_lock_irqsave(&lockres->l_lock, flags);
+ spin_lock(&dentry_attach_lock);
+ if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
+ && dl->dl_count) {
+ dl->dl_count++;
+ extra_ref = 1;
+ }
+ spin_unlock(&dentry_attach_lock);
+ spin_unlock_irqrestore(&lockres->l_lock, flags);
- mlog_exit(status);
- return status;
-}
+ mlog(0, "extra_ref = %d\n", extra_ref);
-/* Generic unblock function for any lockres whose private data is an
- * ocfs2_super pointer. */
-static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
- int *requeue)
-{
- int status;
- struct ocfs2_super *osb;
+ /*
+ * We have a process waiting on us in ocfs2_dentry_iput(),
+ * which means we can't have any more outstanding
+ * aliases. There's no need to do any more work.
+ */
+ if (!extra_ref)
+ return UNBLOCK_CONTINUE;
+
+ spin_lock(&dentry_attach_lock);
+ while (1) {
+ dentry = ocfs2_find_local_alias(dl->dl_inode,
+ dl->dl_parent_blkno, 1);
+ if (!dentry)
+ break;
+ spin_unlock(&dentry_attach_lock);
- mlog_entry_void();
+ mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
+ dentry->d_name.name);
- mlog(0, "Unblock lockres %s\n", lockres->l_name);
+ /*
+ * The following dcache calls may do an
+ * iput(). Normally we don't want that from the
+ * downconverting thread, but in this case it's ok
+ * because the requesting node already has an
+ * exclusive lock on the inode, so it can't be queued
+ * for a downconvert.
+ */
+ d_delete(dentry);
+ dput(dentry);
- osb = ocfs2_lock_res_super(lockres);
+ spin_lock(&dentry_attach_lock);
+ }
+ spin_unlock(&dentry_attach_lock);
- status = ocfs2_generic_unblock_lock(osb,
- lockres,
- requeue,
- NULL);
- if (status < 0)
- mlog_errno(status);
+ /*
+ * If we are the last holder of this dentry lock, there is no
+ * reason to downconvert so skip straight to the unlock.
+ */
+ if (dl->dl_count == 1)
+ return UNBLOCK_STOP_POST;
- mlog_exit(status);
- return status;
+ return UNBLOCK_CONTINUE_POST;
}
void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres)
{
int status;
- int requeue = 0;
+ struct ocfs2_unblock_ctl ctl = {0, 0,};
unsigned long flags;
/* Our reference to the lockres in this function can be
@@ -2825,7 +2829,6 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
BUG_ON(!lockres);
BUG_ON(!lockres->l_ops);
- BUG_ON(!lockres->l_ops->unblock);
mlog(0, "lockres %s blocked.\n", lockres->l_name);
@@ -2839,21 +2842,25 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
goto unqueue;
spin_unlock_irqrestore(&lockres->l_lock, flags);
- status = lockres->l_ops->unblock(lockres, &requeue);
+ status = ocfs2_unblock_lock(osb, lockres, &ctl);
if (status < 0)
mlog_errno(status);
spin_lock_irqsave(&lockres->l_lock, flags);
unqueue:
- if (lockres->l_flags & OCFS2_LOCK_FREEING || !requeue) {
+ if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
} else
ocfs2_schedule_blocked_lock(osb, lockres);
mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,
- requeue ? "yes" : "no");
+ ctl.requeue ? "yes" : "no");
spin_unlock_irqrestore(&lockres->l_lock, flags);
+ if (ctl.unblock_action != UNBLOCK_CONTINUE
+ && lockres->l_ops->post_unlock)
+ lockres->l_ops->post_unlock(osb, lockres);
+
mlog_exit_void();
}
@@ -2896,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level,
mlog(level, "LVB information for %s (called from %s:%u):\n",
lockres->l_name, function, line);
- mlog(level, "version: %u, clusters: %u\n",
- be32_to_cpu(lvb->lvb_version), be32_to_cpu(lvb->lvb_iclusters));
+ mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
+ lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
+ be32_to_cpu(lvb->lvb_igeneration));
mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
(unsigned long long)be64_to_cpu(lvb->lvb_isize),
be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 243ae862ece5..4a2769387229 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,10 +27,14 @@
#ifndef DLMGLUE_H
#define DLMGLUE_H
-#define OCFS2_LVB_VERSION 3
+#include "dcache.h"
+
+#define OCFS2_LVB_VERSION 4
struct ocfs2_meta_lvb {
- __be32 lvb_version;
+ __u8 lvb_version;
+ __u8 lvb_reserved0;
+ __be16 lvb_reserved1;
__be32 lvb_iclusters;
__be32 lvb_iuid;
__be32 lvb_igid;
@@ -41,7 +45,8 @@ struct ocfs2_meta_lvb {
__be16 lvb_imode;
__be16 lvb_inlink;
__be32 lvb_iattr;
- __be32 lvb_reserved[2];
+ __be32 lvb_igeneration;
+ __be32 lvb_reserved2;
};
/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
@@ -57,9 +62,14 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb);
void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
enum ocfs2_lock_type type,
+ unsigned int generation,
struct inode *inode);
+void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
+ u64 parent, struct inode *inode);
void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
int ocfs2_create_new_inode_locks(struct inode *inode);
+int ocfs2_create_new_lock(struct ocfs2_super *osb,
+ struct ocfs2_lock_res *lockres, int ex, int local);
int ocfs2_drop_inode_locks(struct inode *inode);
int ocfs2_data_lock_full(struct inode *inode,
int write,
@@ -93,7 +103,12 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
int ex);
int ocfs2_rename_lock(struct ocfs2_super *osb);
void ocfs2_rename_unlock(struct ocfs2_super *osb);
+int ocfs2_dentry_lock(struct dentry *dentry, int ex);
+void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
+
void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
+void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
+ struct ocfs2_lock_res *lockres);
/* for the vote thread */
void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index ec55ab3c1214..fb91089a60a7 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -33,6 +33,7 @@
#include "dir.h"
#include "dlmglue.h"
+#include "dcache.h"
#include "export.h"
#include "inode.h"
@@ -57,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
return ERR_PTR(-ESTALE);
}
- inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno);
+ inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);
if (IS_ERR(inode)) {
mlog_errno(PTR_ERR(inode));
@@ -77,6 +78,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
mlog_errno(-ENOMEM);
return ERR_PTR(-ENOMEM);
}
+ result->d_op = &ocfs2_dentry_ops;
mlog_exit_ptr(result);
return result;
@@ -113,7 +115,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
goto bail_unlock;
}
- inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
+ inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
if (IS_ERR(inode)) {
mlog(ML_ERROR, "Unable to create inode %llu\n",
(unsigned long long)blkno);
@@ -127,6 +129,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
parent = ERR_PTR(-ENOMEM);
}
+ parent->d_op = &ocfs2_dentry_ops;
+
bail_unlock:
ocfs2_meta_unlock(dir, 0);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7bcf69154592..16e8e74dc966 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -54,8 +54,6 @@
#include "buffer_head_io.h"
-#define OCFS2_FI_FLAG_NOWAIT 0x1
-#define OCFS2_FI_FLAG_DELETE 0x2
struct ocfs2_find_inode_args
{
u64 fi_blkno;
@@ -109,7 +107,7 @@ struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args);
}
-struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
+struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
{
struct inode *inode = NULL;
struct super_block *sb = osb->sb;
@@ -127,7 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
}
args.fi_blkno = blkno;
- args.fi_flags = 0;
+ args.fi_flags = flags;
args.fi_ino = ino_from_blkno(sb, blkno);
inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor,
@@ -271,7 +269,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
inode->i_mode = le16_to_cpu(fe->i_mode);
inode->i_uid = le32_to_cpu(fe->i_uid);
inode->i_gid = le32_to_cpu(fe->i_gid);
- inode->i_blksize = (u32)osb->s_clustersize;
/* Fast symlinks will have i_size but no allocated clusters. */
if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
@@ -297,15 +294,11 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
- if (create_ino)
- inode->i_ino = ino_from_blkno(inode->i_sb,
- le64_to_cpu(fe->i_blkno));
-
- mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n",
- (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
-
inode->i_nlink = le16_to_cpu(fe->i_links_count);
+ if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
+ OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
+
if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
@@ -343,12 +336,28 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
break;
}
+ if (create_ino) {
+ inode->i_ino = ino_from_blkno(inode->i_sb,
+ le64_to_cpu(fe->i_blkno));
+
+ /*
+ * If we ever want to create system files from kernel,
+ * the generation argument to
+ * ocfs2_inode_lock_res_init() will have to change.
+ */
+ BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL));
+
+ ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
+ OCFS2_LOCK_TYPE_META, 0, inode);
+ }
+
ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
- OCFS2_LOCK_TYPE_RW, inode);
- ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
- OCFS2_LOCK_TYPE_META, inode);
+ OCFS2_LOCK_TYPE_RW, inode->i_generation,
+ inode);
+
ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
- OCFS2_LOCK_TYPE_DATA, inode);
+ OCFS2_LOCK_TYPE_DATA, inode->i_generation,
+ inode);
ocfs2_set_inode_flags(inode);
inode->i_flags |= S_NOATIME;
@@ -366,15 +375,15 @@ static int ocfs2_read_locked_inode(struct inode *inode,
struct ocfs2_super *osb;
struct ocfs2_dinode *fe;
struct buffer_head *bh = NULL;
- int status;
- int sysfile = 0;
+ int status, can_lock;
+ u32 generation = 0;
mlog_entry("(0x%p, 0x%p)\n", inode, args);
status = -EINVAL;
if (inode == NULL || inode->i_sb == NULL) {
mlog(ML_ERROR, "bad inode\n");
- goto bail;
+ return status;
}
sb = inode->i_sb;
osb = OCFS2_SB(sb);
@@ -382,50 +391,110 @@ static int ocfs2_read_locked_inode(struct inode *inode,
if (!args) {
mlog(ML_ERROR, "bad inode args\n");
make_bad_inode(inode);
- goto bail;
+ return status;
+ }
+
+ /*
+ * To improve performance of cold-cache inode stats, we take
+ * the cluster lock here if possible.
+ *
+ * Generally, OCFS2 never trusts the contents of an inode
+ * unless it's holding a cluster lock, so taking it here isn't
+ * a correctness issue as much as it is a performance
+ * improvement.
+ *
+ * There are three times when taking the lock is not a good idea:
+ *
+ * 1) During startup, before we have initialized the DLM.
+ *
+ * 2) If we are reading certain system files which never get
+ * cluster locks (local alloc, truncate log).
+ *
+ * 3) If the process doing the iget() is responsible for
+ * orphan dir recovery. We're holding the orphan dir lock and
+ * can get into a deadlock with another process on another
+ * node in ->delete_inode().
+ *
+ * #1 and #2 can be simply solved by never taking the lock
+ * here for system files (which are the only type we read
+ * during mount). It's a heavier approach, but our main
+ * concern is user-accesible files anyway.
+ *
+ * #3 works itself out because we'll eventually take the
+ * cluster lock before trusting anything anyway.
+ */
+ can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
+ && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK);
+
+ /*
+ * To maintain backwards compatibility with older versions of
+ * ocfs2-tools, we still store the generation value for system
+ * files. The only ones that actually matter to userspace are
+ * the journals, but it's easier and inexpensive to just flag
+ * all system files similarly.
+ */
+ if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
+ generation = osb->fs_generation;
+
+ ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
+ OCFS2_LOCK_TYPE_META,
+ generation, inode);
+
+ if (can_lock) {
+ status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+ if (status) {
+ make_bad_inode(inode);
+ mlog_errno(status);
+ return status;
+ }
}
- /* Read the FE off disk. This is safe because the kernel only
- * does one read_inode2 for a new inode, and if it doesn't
- * exist yet then nobody can be working on it! */
- status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, NULL);
+ status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
+ can_lock ? inode : NULL);
if (status < 0) {
mlog_errno(status);
- make_bad_inode(inode);
goto bail;
}
+ status = -EINVAL;
fe = (struct ocfs2_dinode *) bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
(unsigned long long)fe->i_blkno, 7, fe->i_signature);
- make_bad_inode(inode);
goto bail;
}
- if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
- sysfile = 1;
+ /*
+ * This is a code bug. Right now the caller needs to
+ * understand whether it is asking for a system file inode or
+ * not so the proper lock names can be built.
+ */
+ mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
+ !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
+ "Inode %llu: system file state is ambigous\n",
+ (unsigned long long)args->fi_blkno);
if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
S_ISBLK(le16_to_cpu(fe->i_mode)))
inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
- status = -EINVAL;
if (ocfs2_populate_inode(inode, fe, 0) < 0) {
mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
(unsigned long long)fe->i_blkno, inode->i_ino);
- make_bad_inode(inode);
goto bail;
}
BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
- if (sysfile)
- OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
-
status = 0;
bail:
+ if (can_lock)
+ ocfs2_meta_unlock(inode, 0);
+
+ if (status < 0)
+ make_bad_inode(inode);
+
if (args && bh)
brelse(bh);
@@ -898,9 +967,15 @@ void ocfs2_delete_inode(struct inode *inode)
goto bail_unlock_inode;
}
- /* Mark the inode as successfully deleted. This is important
- * for ocfs2_clear_inode as it will check this flag and skip
- * any checkpointing work */
+ /*
+ * Mark the inode as successfully deleted.
+ *
+ * This is important for ocfs2_clear_inode() as it will check
+ * this flag and skip any checkpointing work
+ *
+ * ocfs2_stuff_meta_lvb() also uses this flag to invalidate
+ * the LVB for other nodes.
+ */
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
bail_unlock_inode:
@@ -1025,12 +1100,10 @@ void ocfs2_drop_inode(struct inode *inode)
/* Testing ip_orphaned_slot here wouldn't work because we may
* not have gotten a delete_inode vote from any other nodes
* yet. */
- if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) {
- mlog(0, "Inode was orphaned on another node, clearing nlink.\n");
- inode->i_nlink = 0;
- }
-
- generic_drop_inode(inode);
+ if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
+ generic_delete_inode(inode);
+ else
+ generic_drop_inode(inode);
mlog_exit_void();
}
@@ -1184,8 +1257,6 @@ leave:
void ocfs2_refresh_inode(struct inode *inode,
struct ocfs2_dinode *fe)
{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-
spin_lock(&OCFS2_I(inode)->ip_lock);
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
@@ -1196,7 +1267,6 @@ void ocfs2_refresh_inode(struct inode *inode,
inode->i_uid = le32_to_cpu(fe->i_uid);
inode->i_gid = le32_to_cpu(fe->i_gid);
inode->i_mode = le16_to_cpu(fe->i_mode);
- inode->i_blksize = (u32) osb->s_clustersize;
if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0)
inode->i_blocks = 0;
else
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 4d1e53992566..9957810fdf85 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -122,7 +122,13 @@ struct buffer_head *ocfs2_bread(struct inode *inode, int block,
void ocfs2_clear_inode(struct inode *inode);
void ocfs2_delete_inode(struct inode *inode);
void ocfs2_drop_inode(struct inode *inode);
-struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff);
+
+/* Flags for ocfs2_iget() */
+#define OCFS2_FI_FLAG_NOWAIT 0x1
+#define OCFS2_FI_FLAG_DELETE 0x2
+#define OCFS2_FI_FLAG_SYSFILE 0x4
+#define OCFS2_FI_FLAG_NOLOCK 0x8
+struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags);
struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
u64 blkno,
int delete_vote);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f92bf1dd379a..fd9734def551 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1493,7 +1493,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
if (de->name_len == 2 && !strncmp("..", de->name, 2))
continue;
- iter = ocfs2_iget(osb, le64_to_cpu(de->inode));
+ iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
+ OCFS2_FI_FLAG_NOLOCK);
if (IS_ERR(iter))
continue;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0d3e939b1f56..849c3b4bb94a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -179,7 +179,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
if (status < 0)
goto bail_add;
- inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
+ inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
if (IS_ERR(inode)) {
mlog(ML_ERROR, "Unable to create inode %llu\n",
(unsigned long long)blkno);
@@ -199,10 +199,32 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
spin_unlock(&oi->ip_lock);
bail_add:
-
dentry->d_op = &ocfs2_dentry_ops;
ret = d_splice_alias(inode, dentry);
+ if (inode) {
+ /*
+ * If d_splice_alias() finds a DCACHE_DISCONNECTED
+ * dentry, it will d_move() it on top of ourse. The
+ * return value will indicate this however, so in
+ * those cases, we switch them around for the locking
+ * code.
+ *
+ * NOTE: This dentry already has ->d_op set from
+ * ocfs2_get_parent() and ocfs2_get_dentry()
+ */
+ if (ret)
+ dentry = ret;
+
+ status = ocfs2_dentry_attach_lock(dentry, inode,
+ OCFS2_I(dir)->ip_blkno);
+ if (status) {
+ mlog_errno(status);
+ ret = ERR_PTR(status);
+ goto bail_unlock;
+ }
+ }
+
bail_unlock:
/* Don't drop the cluster lock until *after* the d_add --
* unlink on another node will message us to remove that
@@ -418,6 +440,13 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
+ status = ocfs2_dentry_attach_lock(dentry, inode,
+ OCFS2_I(dir)->ip_blkno);
+ if (status) {
+ mlog_errno(status);
+ goto leave;
+ }
+
insert_inode_hash(inode);
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
@@ -725,6 +754,12 @@ static int ocfs2_link(struct dentry *old_dentry,
goto bail;
}
+ err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
+ if (err) {
+ mlog_errno(err);
+ goto bail;
+ }
+
atomic_inc(&inode->i_count);
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
@@ -743,6 +778,23 @@ bail:
return err;
}
+/*
+ * Takes and drops an exclusive lock on the given dentry. This will
+ * force other nodes to drop it.
+ */
+static int ocfs2_remote_dentry_delete(struct dentry *dentry)
+{
+ int ret;
+
+ ret = ocfs2_dentry_lock(dentry, 1);
+ if (ret)
+ mlog_errno(ret);
+ else
+ ocfs2_dentry_unlock(dentry, 1);
+
+ return ret;
+}
+
static int ocfs2_unlink(struct inode *dir,
struct dentry *dentry)
{
@@ -832,8 +884,7 @@ static int ocfs2_unlink(struct inode *dir,
else
inode->i_nlink--;
- status = ocfs2_request_unlink_vote(inode, dentry,
- (unsigned int) inode->i_nlink);
+ status = ocfs2_remote_dentry_delete(dentry);
if (status < 0) {
/* This vote should succeed under all normal
* circumstances. */
@@ -1019,7 +1070,6 @@ static int ocfs2_rename(struct inode *old_dir,
struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
// this is the 1st dirent bh
nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
- unsigned int links_count;
/* At some point it might be nice to break this function up a
* bit. */
@@ -1093,23 +1143,26 @@ static int ocfs2_rename(struct inode *old_dir,
}
}
- if (S_ISDIR(old_inode->i_mode)) {
- /* Directories actually require metadata updates to
- * the directory info so we can't get away with not
- * doing node locking on it. */
- status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
- if (status < 0) {
- if (status != -ENOENT)
- mlog_errno(status);
- goto bail;
- }
-
- status = ocfs2_request_rename_vote(old_inode, old_dentry);
- if (status < 0) {
+ /*
+ * Though we don't require an inode meta data update if
+ * old_inode is not a directory, we lock anyway here to ensure
+ * the vote thread on other nodes won't have to concurrently
+ * downconvert the inode and the dentry locks.
+ */
+ status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
+ if (status < 0) {
+ if (status != -ENOENT)
mlog_errno(status);
- goto bail;
- }
+ goto bail;
+ }
+
+ status = ocfs2_remote_dentry_delete(old_dentry);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ if (S_ISDIR(old_inode->i_mode)) {
status = -EIO;
old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
if (!old_inode_de_bh)
@@ -1123,14 +1176,6 @@ static int ocfs2_rename(struct inode *old_dir,
if (!new_inode && new_dir!=old_dir &&
new_dir->i_nlink >= OCFS2_LINK_MAX)
goto bail;
- } else {
- /* Ah, the simple case - we're a file so just send a
- * message. */
- status = ocfs2_request_rename_vote(old_inode, old_dentry);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
}
status = -ENOENT;
@@ -1202,13 +1247,7 @@ static int ocfs2_rename(struct inode *old_dir,
goto bail;
}
- if (S_ISDIR(new_inode->i_mode))
- links_count = 0;
- else
- links_count = (unsigned int) (new_inode->i_nlink - 1);
-
- status = ocfs2_request_unlink_vote(new_inode, new_dentry,
- links_count);
+ status = ocfs2_remote_dentry_delete(new_dentry);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -1387,6 +1426,7 @@ static int ocfs2_rename(struct inode *old_dir,
}
}
+ ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
status = 0;
bail:
if (rename_lock)
@@ -1675,6 +1715,12 @@ static int ocfs2_symlink(struct inode *dir,
goto bail;
}
+ status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+
insert_inode_hash(inode);
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 7dd9e1e705b0..4d5d5655c185 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -35,12 +35,15 @@
#define OCFS2_LOCK_ID_MAX_LEN 32
#define OCFS2_LOCK_ID_PAD "000000"
+#define OCFS2_DENTRY_LOCK_INO_START 18
+
enum ocfs2_lock_type {
OCFS2_LOCK_TYPE_META = 0,
OCFS2_LOCK_TYPE_DATA,
OCFS2_LOCK_TYPE_SUPER,
OCFS2_LOCK_TYPE_RENAME,
OCFS2_LOCK_TYPE_RW,
+ OCFS2_LOCK_TYPE_DENTRY,
OCFS2_NUM_LOCK_TYPES
};
@@ -63,6 +66,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
case OCFS2_LOCK_TYPE_RW:
c = 'W';
break;
+ case OCFS2_LOCK_TYPE_DENTRY:
+ c = 'N';
+ break;
default:
c = '\0';
}
@@ -70,4 +76,23 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
return c;
}
+static char *ocfs2_lock_type_strings[] = {
+ [OCFS2_LOCK_TYPE_META] = "Meta",
+ [OCFS2_LOCK_TYPE_DATA] = "Data",
+ [OCFS2_LOCK_TYPE_SUPER] = "Super",
+ [OCFS2_LOCK_TYPE_RENAME] = "Rename",
+ /* Need to differntiate from [R]ename.. serializing writes is the
+ * important job it does, anyway. */
+ [OCFS2_LOCK_TYPE_RW] = "Write/Read",
+ [OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
+};
+
+static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
+{
+#ifdef __KERNEL__
+ mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
+#endif
+ return ocfs2_lock_type_strings[type];
+}
+
#endif /* OCFS2_LOCKID_H */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d17e33e66a1e..4c29cd7cc8e6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -202,7 +202,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
mlog_entry_void();
- new = ocfs2_iget(osb, osb->root_blkno);
+ new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE);
if (IS_ERR(new)) {
status = PTR_ERR(new);
mlog_errno(status);
@@ -210,7 +210,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
}
osb->root_inode = new;
- new = ocfs2_iget(osb, osb->system_dir_blkno);
+ new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE);
if (IS_ERR(new)) {
status = PTR_ERR(new);
mlog_errno(status);
@@ -682,7 +682,7 @@ static struct file_system_type ocfs2_fs_type = {
.kill_sb = kill_block_super, /* set to the generic one
* right now, but do we
* need to change that? */
- .fs_flags = FS_REQUIRES_DEV,
+ .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
.next = NULL
};
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index fc29cb7a437d..5df6e35d09b1 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -28,11 +28,11 @@
#include <linux/slab.h>
#include <linux/highmem.h>
-#include "ocfs2.h"
-
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
+#include "ocfs2.h"
+
#include "alloc.h"
#include "dir.h"
#include "inode.h"
@@ -115,7 +115,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
goto bail;
}
- inode = ocfs2_iget(osb, blkno);
+ inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE);
if (IS_ERR(inode)) {
mlog_errno(PTR_ERR(inode));
inode = NULL;
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index cf70fe2075b8..5b4dca79990b 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -74,9 +74,6 @@ struct ocfs2_vote_msg
__be32 v_orphaned_slot; /* Used during delete votes */
__be32 v_nlink; /* Used during unlink votes */
} md1; /* Message type dependant 1 */
- __be32 v_unlink_namelen;
- __be64 v_unlink_parent;
- u8 v_unlink_dirent[OCFS2_VOTE_FILENAME_LEN];
};
/* Responses are given these values to maintain backwards
@@ -100,8 +97,6 @@ struct ocfs2_vote_work {
enum ocfs2_vote_request {
OCFS2_VOTE_REQ_INVALID = 0,
OCFS2_VOTE_REQ_DELETE,
- OCFS2_VOTE_REQ_UNLINK,
- OCFS2_VOTE_REQ_RENAME,
OCFS2_VOTE_REQ_MOUNT,
OCFS2_VOTE_REQ_UMOUNT,
OCFS2_VOTE_REQ_LAST
@@ -261,103 +256,13 @@ done:
return response;
}
-static int ocfs2_match_dentry(struct dentry *dentry,
- u64 parent_blkno,
- unsigned int namelen,
- const char *name)
-{
- struct inode *parent;
-
- if (!dentry->d_parent) {
- mlog(0, "Detached from parent.\n");
- return 0;
- }
-
- parent = dentry->d_parent->d_inode;
- /* Negative parent dentry? */
- if (!parent)
- return 0;
-
- /* Name is in a different directory. */
- if (OCFS2_I(parent)->ip_blkno != parent_blkno)
- return 0;
-
- if (dentry->d_name.len != namelen)
- return 0;
-
- /* comparison above guarantees this is safe. */
- if (memcmp(dentry->d_name.name, name, namelen))
- return 0;
-
- return 1;
-}
-
-static void ocfs2_process_dentry_request(struct inode *inode,
- int rename,
- unsigned int new_nlink,
- u64 parent_blkno,
- unsigned int namelen,
- const char *name)
-{
- struct dentry *dentry = NULL;
- struct list_head *p;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
-
- mlog(0, "parent %llu, namelen = %u, name = %.*s\n",
- (unsigned long long)parent_blkno, namelen, namelen, name);
-
- spin_lock(&dcache_lock);
-
- /* Another node is removing this name from the system. It is
- * up to us to find the corresponding dentry and if it exists,
- * unhash it from the dcache. */
- list_for_each(p, &inode->i_dentry) {
- dentry = list_entry(p, struct dentry, d_alias);
-
- if (ocfs2_match_dentry(dentry, parent_blkno, namelen, name)) {
- mlog(0, "dentry found: %.*s\n",
- dentry->d_name.len, dentry->d_name.name);
-
- dget_locked(dentry);
- break;
- }
-
- dentry = NULL;
- }
-
- spin_unlock(&dcache_lock);
-
- if (dentry) {
- d_delete(dentry);
- dput(dentry);
- }
-
- /* rename votes don't send link counts */
- if (!rename) {
- mlog(0, "new_nlink = %u\n", new_nlink);
-
- /* We don't have the proper locks here to directly
- * change i_nlink and besides, the vote is sent
- * *before* the operation so it may have failed on the
- * other node. This passes a hint to ocfs2_drop_inode
- * to force ocfs2_delete_inode, who will take the
- * proper cluster locks to sort things out. */
- if (new_nlink == 0) {
- spin_lock(&oi->ip_lock);
- oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
- spin_unlock(&OCFS2_I(inode)->ip_lock);
- }
- }
-}
-
static void ocfs2_process_vote(struct ocfs2_super *osb,
struct ocfs2_vote_msg *msg)
{
int net_status, vote_response;
int orphaned_slot = 0;
- int rename = 0;
- unsigned int node_num, generation, new_nlink, namelen;
- u64 blkno, parent_blkno;
+ unsigned int node_num, generation;
+ u64 blkno;
enum ocfs2_vote_request request;
struct inode *inode = NULL;
struct ocfs2_msg_hdr *hdr = &msg->v_hdr;
@@ -437,18 +342,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
vote_response = ocfs2_process_delete_request(inode,
&orphaned_slot);
break;
- case OCFS2_VOTE_REQ_RENAME:
- rename = 1;
- /* fall through */
- case OCFS2_VOTE_REQ_UNLINK:
- parent_blkno = be64_to_cpu(msg->v_unlink_parent);
- namelen = be32_to_cpu(msg->v_unlink_namelen);
- /* new_nlink will be ignored in case of a rename vote */
- new_nlink = be32_to_cpu(msg->md1.v_nlink);
- ocfs2_process_dentry_request(inode, rename, new_nlink,
- parent_blkno, namelen,
- msg->v_unlink_dirent);
- break;
default:
mlog(ML_ERROR, "node %u, invalid request: %u\n",
node_num, request);
@@ -889,75 +782,6 @@ int ocfs2_request_delete_vote(struct inode *inode)
return status;
}
-static void ocfs2_setup_unlink_vote(struct ocfs2_vote_msg *request,
- struct dentry *dentry)
-{
- struct inode *parent = dentry->d_parent->d_inode;
-
- /* We need some values which will uniquely identify a dentry
- * on the other nodes so that they can find it and run
- * d_delete against it. Parent directory block and full name
- * should suffice. */
-
- mlog(0, "unlink/rename request: parent: %llu name: %.*s\n",
- (unsigned long long)OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
- dentry->d_name.name);
-
- request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno);
- request->v_unlink_namelen = cpu_to_be32(dentry->d_name.len);
- memcpy(request->v_unlink_dirent, dentry->d_name.name,
- dentry->d_name.len);
-}
-
-int ocfs2_request_unlink_vote(struct inode *inode,
- struct dentry *dentry,
- unsigned int nlink)
-{
- int status;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct ocfs2_vote_msg *request;
-
- if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
- return -ENAMETOOLONG;
-
- status = -ENOMEM;
- request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
- inode->i_generation,
- OCFS2_VOTE_REQ_UNLINK, nlink);
- if (request) {
- ocfs2_setup_unlink_vote(request, dentry);
-
- status = ocfs2_request_vote(inode, request, NULL);
-
- kfree(request);
- }
- return status;
-}
-
-int ocfs2_request_rename_vote(struct inode *inode,
- struct dentry *dentry)
-{
- int status;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct ocfs2_vote_msg *request;
-
- if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
- return -ENAMETOOLONG;
-
- status = -ENOMEM;
- request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
- inode->i_generation,
- OCFS2_VOTE_REQ_RENAME, 0);
- if (request) {
- ocfs2_setup_unlink_vote(request, dentry);
-
- status = ocfs2_request_vote(inode, request, NULL);
-
- kfree(request);
- }
- return status;
-}
-
int ocfs2_request_mount_vote(struct ocfs2_super *osb)
{
int status;
diff --git a/fs/ocfs2/vote.h b/fs/ocfs2/vote.h
index 9cce60703466..53ebc1c69e56 100644
--- a/fs/ocfs2/vote.h
+++ b/fs/ocfs2/vote.h
@@ -39,11 +39,6 @@ static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb)
}
int ocfs2_request_delete_vote(struct inode *inode);
-int ocfs2_request_unlink_vote(struct inode *inode,
- struct dentry *dentry,
- unsigned int nlink);
-int ocfs2_request_rename_vote(struct inode *inode,
- struct dentry *dentry);
int ocfs2_request_mount_vote(struct ocfs2_super *osb);
int ocfs2_request_umount_vote(struct ocfs2_super *osb);
int ocfs2_register_net_handlers(struct ocfs2_super *osb);
diff --git a/fs/open.c b/fs/open.c
index 303f06d2a7b9..304c1c7814cb 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -546,7 +546,8 @@ asmlinkage long sys_chdir(const char __user * filename)
struct nameidata nd;
int error;
- error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+ error = __user_walk(filename,
+ LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
if (error)
goto out;
@@ -1172,6 +1173,7 @@ asmlinkage long sys_close(unsigned int fd)
struct file * filp;
struct files_struct *files = current->files;
struct fdtable *fdt;
+ int retval;
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
@@ -1184,7 +1186,16 @@ asmlinkage long sys_close(unsigned int fd)
FD_CLR(fd, fdt->close_on_exec);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
- return filp_close(filp, files);
+ retval = filp_close(filp, files);
+
+ /* can't restart close syscall because file table entry was cleared */
+ if (unlikely(retval == -ERESTARTSYS ||
+ retval == -ERESTARTNOINTR ||
+ retval == -ERESTARTNOHAND ||
+ retval == -ERESTART_RESTARTBLOCK))
+ retval = -EINTR;
+
+ return retval;
out_unlock:
spin_unlock(&files->file_lock);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 93a56bd4a2b7..592a6402e851 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -8,10 +8,10 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/fs.h>
-#include <linux/openprom_fs.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
+#include <linux/magic.h>
#include <asm/openprom.h>
#include <asm/oplib.h>
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index 63730282ad81..1bea610078b3 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -238,10 +238,9 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt)
le32_to_cpu(gpt->sizeof_partition_entry);
if (!count)
return NULL;
- pte = kmalloc(count, GFP_KERNEL);
+ pte = kzalloc(count, GFP_KERNEL);
if (!pte)
return NULL;
- memset(pte, 0, count);
if (read_lba(bdev, le64_to_cpu(gpt->partition_entry_lba),
(u8 *) pte,
@@ -269,10 +268,9 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba)
if (!bdev)
return NULL;
- gpt = kmalloc(sizeof (gpt_header), GFP_KERNEL);
+ gpt = kzalloc(sizeof (gpt_header), GFP_KERNEL);
if (!gpt)
return NULL;
- memset(gpt, 0, sizeof (gpt_header));
if (read_lba(bdev, lba, (u8 *) gpt,
sizeof (gpt_header)) < sizeof (gpt_header)) {
@@ -526,9 +524,8 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes)
lastlba = last_lba(bdev);
if (!force_gpt) {
/* This will be added to the EFI Spec. per Intel after v1.02. */
- legacymbr = kmalloc(sizeof (*legacymbr), GFP_KERNEL);
+ legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL);
if (legacymbr) {
- memset(legacymbr, 0, sizeof (*legacymbr));
read_lba(bdev, 0, (u8 *) legacymbr,
sizeof (*legacymbr));
good_pmbr = is_pmbr_valid(legacymbr, lastlba);
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 8f12587c3129..4f8df71e49d3 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -58,6 +58,31 @@ msdos_magic_present(unsigned char *p)
return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2);
}
+/* Value is EBCDIC 'IBMA' */
+#define AIX_LABEL_MAGIC1 0xC9
+#define AIX_LABEL_MAGIC2 0xC2
+#define AIX_LABEL_MAGIC3 0xD4
+#define AIX_LABEL_MAGIC4 0xC1
+static int aix_magic_present(unsigned char *p, struct block_device *bdev)
+{
+ Sector sect;
+ unsigned char *d;
+ int ret = 0;
+
+ if (p[0] != AIX_LABEL_MAGIC1 &&
+ p[1] != AIX_LABEL_MAGIC2 &&
+ p[2] != AIX_LABEL_MAGIC3 &&
+ p[3] != AIX_LABEL_MAGIC4)
+ return 0;
+ d = read_dev_sector(bdev, 7, &sect);
+ if (d) {
+ if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M')
+ ret = 1;
+ put_dev_sector(sect);
+ };
+ return ret;
+}
+
/*
* Create devices for each logical partition in an extended partition.
* The logical partitions form a linked list, with each entry being
@@ -393,6 +418,12 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
return 0;
}
+ if (aix_magic_present(data, bdev)) {
+ put_dev_sector(sect);
+ printk( " [AIX]");
+ return 0;
+ }
+
/*
* Now that the 55aa signature is present, this is probably
* either the boot sector of a FAT filesystem or a DOS-type
diff --git a/fs/pipe.c b/fs/pipe.c
index 20352573e025..f3b6f71e9d0b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -879,7 +879,6 @@ static struct inode * get_pipe_inode(void)
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- inode->i_blksize = PAGE_SIZE;
return inode;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0b615d62a159..c0e554971df0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -347,6 +347,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
sigemptyset(&sigign);
sigemptyset(&sigcatch);
cutime = cstime = utime = stime = cputime_zero;
+
+ mutex_lock(&tty_mutex);
read_lock(&tasklist_lock);
if (task->sighand) {
spin_lock_irq(&task->sighand->siglock);
@@ -388,6 +390,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
}
ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
read_unlock(&tasklist_lock);
+ mutex_unlock(&tty_mutex);
if (!whole || num_threads<2)
wchan = get_wchan(task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fe8d55fb17cc..89c20d9d50bf 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -797,7 +797,7 @@ out_no_task:
static ssize_t mem_write(struct file * file, const char * buf,
size_t count, loff_t *ppos)
{
- int copied = 0;
+ int copied;
char *page;
struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
unsigned long dst = *ppos;
@@ -814,6 +814,7 @@ static ssize_t mem_write(struct file * file, const char * buf,
if (!page)
goto out;
+ copied = 0;
while (count > 0) {
int this_len, retval;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 146a434ba944..987c773dbb20 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -28,6 +28,7 @@ do { \
(vmi)->largest_chunk = 0; \
} while(0)
+extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
#endif
extern void create_seq_entry(char *name, mode_t mode, const struct file_operations *f);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 6a984f64edd7..1294eda4acae 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -100,7 +100,7 @@ static int notesize(struct memelfnote *en)
int sz;
sz = sizeof(struct elf_note);
- sz += roundup(strlen(en->name), 4);
+ sz += roundup((strlen(en->name) + 1), 4);
sz += roundup(en->datasz, 4);
return sz;
@@ -116,7 +116,7 @@ static char *storenote(struct memelfnote *men, char *bufp)
#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
- en.n_namesz = strlen(men->name);
+ en.n_namesz = strlen(men->name) + 1;
en.n_descsz = men->datasz;
en.n_type = men->type;
@@ -279,12 +279,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
tsz = elf_buflen - *fpos;
if (buflen < tsz)
tsz = buflen;
- elf_buf = kmalloc(elf_buflen, GFP_ATOMIC);
+ elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
if (!elf_buf) {
read_unlock(&kclist_lock);
return -ENOMEM;
}
- memset(elf_buf, 0, elf_buflen);
elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
read_unlock(&kclist_lock);
if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
@@ -330,10 +329,9 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
unsigned long curstart = start;
unsigned long cursize = tsz;
- elf_buf = kmalloc(tsz, GFP_KERNEL);
+ elf_buf = kzalloc(tsz, GFP_KERNEL);
if (!elf_buf)
return -ENOMEM;
- memset(elf_buf, 0, tsz);
read_lock(&vmlist_lock);
for (m=vmlist; m && cursize; m=m->next) {
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index cff10ab1af63..d7dbdf9e0f49 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -33,19 +33,15 @@
#include "internal.h"
/*
- * display a list of all the VMAs the kernel knows about
- * - nommu kernals have a single flat list
+ * display a single VMA to a sequenced file
*/
-static int nommu_vma_list_show(struct seq_file *m, void *v)
+int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
{
- struct vm_area_struct *vma;
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
int flags, len;
- vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb);
-
flags = vma->vm_flags;
file = vma->vm_file;
@@ -78,6 +74,18 @@ static int nommu_vma_list_show(struct seq_file *m, void *v)
return 0;
}
+/*
+ * display a list of all the VMAs the kernel knows about
+ * - nommu kernals have a single flat list
+ */
+static int nommu_vma_list_show(struct seq_file *m, void *v)
+{
+ struct vm_area_struct *vma;
+
+ vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb);
+ return nommu_vma_show(m, vma);
+}
+
static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos)
{
struct rb_node *_rb;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 942156225447..5bbd60896050 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -157,10 +157,12 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
"SwapCached: %8lu kB\n"
"Active: %8lu kB\n"
"Inactive: %8lu kB\n"
+#ifdef CONFIG_HIGHMEM
"HighTotal: %8lu kB\n"
"HighFree: %8lu kB\n"
"LowTotal: %8lu kB\n"
"LowFree: %8lu kB\n"
+#endif
"SwapTotal: %8lu kB\n"
"SwapFree: %8lu kB\n"
"Dirty: %8lu kB\n"
@@ -168,6 +170,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
"AnonPages: %8lu kB\n"
"Mapped: %8lu kB\n"
"Slab: %8lu kB\n"
+ "SReclaimable: %8lu kB\n"
+ "SUnreclaim: %8lu kB\n"
"PageTables: %8lu kB\n"
"NFS_Unstable: %8lu kB\n"
"Bounce: %8lu kB\n"
@@ -183,17 +187,22 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
K(total_swapcache_pages),
K(active),
K(inactive),
+#ifdef CONFIG_HIGHMEM
K(i.totalhigh),
K(i.freehigh),
K(i.totalram-i.totalhigh),
K(i.freeram-i.freehigh),
+#endif
K(i.totalswap),
K(i.freeswap),
K(global_page_state(NR_FILE_DIRTY)),
K(global_page_state(NR_WRITEBACK)),
K(global_page_state(NR_ANON_PAGES)),
K(global_page_state(NR_FILE_MAPPED)),
- K(global_page_state(NR_SLAB)),
+ K(global_page_state(NR_SLAB_RECLAIMABLE) +
+ global_page_state(NR_SLAB_UNRECLAIMABLE)),
+ K(global_page_state(NR_SLAB_RECLAIMABLE)),
+ K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
K(global_page_state(NR_PAGETABLE)),
K(global_page_state(NR_UNSTABLE_NFS)),
K(global_page_state(NR_BOUNCE)),
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 0a163a4f7764..6b769afac55a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -122,11 +122,6 @@ struct mem_size_stats
unsigned long private_dirty;
};
-__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
-{
- return NULL;
-}
-
static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
{
struct proc_maps_private *priv = m->private;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 4616ed50ffcd..091aa8e48e02 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -138,25 +138,63 @@ out:
}
/*
- * Albert D. Cahalan suggested to fake entries for the traditional
- * sections here. This might be worth investigating.
+ * display mapping lines for a particular process's /proc/pid/maps
*/
-static int show_map(struct seq_file *m, void *v)
+static int show_map(struct seq_file *m, void *_vml)
{
- return 0;
+ struct vm_list_struct *vml = _vml;
+ return nommu_vma_show(m, vml->vma);
}
+
static void *m_start(struct seq_file *m, loff_t *pos)
{
+ struct proc_maps_private *priv = m->private;
+ struct vm_list_struct *vml;
+ struct mm_struct *mm;
+ loff_t n = *pos;
+
+ /* pin the task and mm whilst we play with them */
+ priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
+ if (!priv->task)
+ return NULL;
+
+ mm = get_task_mm(priv->task);
+ if (!mm) {
+ put_task_struct(priv->task);
+ priv->task = NULL;
+ return NULL;
+ }
+
+ down_read(&mm->mmap_sem);
+
+ /* start from the Nth VMA */
+ for (vml = mm->context.vmlist; vml; vml = vml->next)
+ if (n-- == 0)
+ return vml;
return NULL;
}
-static void m_stop(struct seq_file *m, void *v)
+
+static void m_stop(struct seq_file *m, void *_vml)
{
+ struct proc_maps_private *priv = m->private;
+
+ if (priv->task) {
+ struct mm_struct *mm = priv->task->mm;
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ put_task_struct(priv->task);
+ }
}
-static void *m_next(struct seq_file *m, void *v, loff_t *pos)
+
+static void *m_next(struct seq_file *m, void *_vml, loff_t *pos)
{
- return NULL;
+ struct vm_list_struct *vml = _vml;
+
+ (*pos)++;
+ return vml ? vml->next : NULL;
}
-static struct seq_operations proc_pid_maps_op = {
+
+static struct seq_operations proc_pid_maps_ops = {
.start = m_start,
.next = m_next,
.stop = m_stop,
@@ -165,11 +203,19 @@ static struct seq_operations proc_pid_maps_op = {
static int maps_open(struct inode *inode, struct file *file)
{
- int ret;
- ret = seq_open(file, &proc_pid_maps_op);
- if (!ret) {
- struct seq_file *m = file->private_data;
- m->private = NULL;
+ struct proc_maps_private *priv;
+ int ret = -ENOMEM;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (priv) {
+ priv->pid = proc_pid(inode);
+ ret = seq_open(file, &proc_pid_maps_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = priv;
+ } else {
+ kfree(priv);
+ }
}
return ret;
}
@@ -178,6 +224,6 @@ struct file_operations proc_maps_operations = {
.open = maps_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_private,
};
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 5a903491e697..5a41db2a218d 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -358,11 +358,10 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
const char *errmsg;
struct qnx4_sb_info *qs;
- qs = kmalloc(sizeof(struct qnx4_sb_info), GFP_KERNEL);
+ qs = kzalloc(sizeof(struct qnx4_sb_info), GFP_KERNEL);
if (!qs)
return -ENOMEM;
s->s_fs_info = qs;
- memset(qs, 0, sizeof(struct qnx4_sb_info));
sb_set_blocksize(s, QNX4_BLOCK_SIZE);
@@ -497,7 +496,6 @@ static void qnx4_read_inode(struct inode *inode)
inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->di_ctime);
inode->i_ctime.tv_nsec = 0;
inode->i_blocks = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size);
- inode->i_blksize = QNX4_DIR_ENTRY_SIZE;
memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE);
if (S_ISREG(inode->i_mode)) {
@@ -557,9 +555,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(qnx4_inode_cachep))
- printk(KERN_INFO
- "qnx4_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(qnx4_inode_cachep);
}
static int qnx4_get_sb(struct file_system_type *fs_type,
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b9677335cc8d..bc0e51662424 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -58,7 +58,6 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_mapping->a_ops = &ramfs_aops;
inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 3a59309f3ca9..0eb7ac080484 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -28,7 +28,7 @@ endif
# will work around it. If any other architecture displays this behavior,
# add it here.
ifeq ($(CONFIG_PPC32),y)
-EXTRA_CFLAGS := -O1
+EXTRA_CFLAGS := $(call cc-ifversion, -lt, 0400, -O1)
endif
TAGS:
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 1627edd50810..1cfbe857ba27 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -130,7 +130,7 @@ static int reiserfs_sync_file(struct file *p_s_filp,
reiserfs_write_lock(p_s_inode->i_sb);
barrier_done = reiserfs_commit_for_inode(p_s_inode);
reiserfs_write_unlock(p_s_inode->i_sb);
- if (barrier_done != 1)
+ if (barrier_done != 1 && reiserfs_barrier_flush(p_s_inode->i_sb))
blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL);
if (barrier_done < 0)
return barrier_done;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 52f1e2136546..7e5a2f5ebeb0 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -17,8 +17,6 @@
#include <linux/writeback.h>
#include <linux/quotaops.h>
-extern int reiserfs_default_io_size; /* default io size devuned in super.c */
-
static int reiserfs_commit_write(struct file *f, struct page *page,
unsigned from, unsigned to);
static int reiserfs_prepare_write(struct file *f, struct page *page,
@@ -1122,7 +1120,6 @@ static void init_inode(struct inode *inode, struct path *path)
ih = PATH_PITEM_HEAD(path);
copy_key(INODE_PKEY(inode), &(ih->ih_key));
- inode->i_blksize = reiserfs_default_io_size;
INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
REISERFS_I(inode)->i_flags = 0;
@@ -1130,9 +1127,9 @@ static void init_inode(struct inode *inode, struct path *path)
REISERFS_I(inode)->i_prealloc_count = 0;
REISERFS_I(inode)->i_trans_id = 0;
REISERFS_I(inode)->i_jl = NULL;
- REISERFS_I(inode)->i_acl_access = NULL;
- REISERFS_I(inode)->i_acl_default = NULL;
- init_rwsem(&REISERFS_I(inode)->xattr_sem);
+ reiserfs_init_acl_access(inode);
+ reiserfs_init_acl_default(inode);
+ reiserfs_init_xattr_rwsem(inode);
if (stat_data_v1(ih)) {
struct stat_data_v1 *sd =
@@ -1837,9 +1834,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
REISERFS_I(inode)->i_attrs =
REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
- REISERFS_I(inode)->i_acl_access = NULL;
- REISERFS_I(inode)->i_acl_default = NULL;
- init_rwsem(&REISERFS_I(inode)->xattr_sem);
+ reiserfs_init_acl_access(inode);
+ reiserfs_init_acl_default(inode);
+ reiserfs_init_xattr_rwsem(inode);
if (old_format_only(sb))
make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
@@ -1877,7 +1874,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
}
// these do not go to on-disk stat data
inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
- inode->i_blksize = reiserfs_default_io_size;
// store in in-core inode the key of stat data and version all
// object items will have (directory items will have old offset
@@ -1978,11 +1974,13 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
* iput doesn't deadlock in reiserfs_delete_xattrs. The locking
* code really needs to be reworked, but this will take care of it
* for now. -jeffm */
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) {
reiserfs_write_unlock_xattrs(dir->i_sb);
iput(inode);
reiserfs_write_lock_xattrs(dir->i_sb);
} else
+#endif
iput(inode);
return err;
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 9b3672d69367..e6b5ccf23f15 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1186,6 +1186,21 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
return NULL;
}
+static int newer_jl_done(struct reiserfs_journal_cnode *cn)
+{
+ struct super_block *sb = cn->sb;
+ b_blocknr_t blocknr = cn->blocknr;
+
+ cn = cn->hprev;
+ while (cn) {
+ if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
+ atomic_read(&cn->jlist->j_commit_left) != 0)
+ return 0;
+ cn = cn->hprev;
+ }
+ return 1;
+}
+
static void remove_journal_hash(struct super_block *,
struct reiserfs_journal_cnode **,
struct reiserfs_journal_list *, unsigned long,
@@ -1604,6 +1619,31 @@ static int flush_journal_list(struct super_block *s,
return err;
}
+static int test_transaction(struct super_block *s,
+ struct reiserfs_journal_list *jl)
+{
+ struct reiserfs_journal_cnode *cn;
+
+ if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
+ return 1;
+
+ cn = jl->j_realblock;
+ while (cn) {
+ /* if the blocknr == 0, this has been cleared from the hash,
+ ** skip it
+ */
+ if (cn->blocknr == 0) {
+ goto next;
+ }
+ if (cn->bh && !newer_jl_done(cn))
+ return 0;
+ next:
+ cn = cn->next;
+ cond_resched();
+ }
+ return 0;
+}
+
static int write_one_transaction(struct super_block *s,
struct reiserfs_journal_list *jl,
struct buffer_chunk *chunk)
@@ -3433,16 +3473,6 @@ static void flush_async_commits(void *p)
flush_commit_list(p_s_sb, jl, 1);
}
unlock_kernel();
- /*
- * this is a little racey, but there's no harm in missing
- * the filemap_fdata_write
- */
- if (!atomic_read(&journal->j_async_throttle)
- && !reiserfs_is_journal_aborted(journal)) {
- atomic_inc(&journal->j_async_throttle);
- filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping);
- atomic_dec(&journal->j_async_throttle);
- }
}
/*
@@ -3844,7 +3874,9 @@ static void flush_old_journal_lists(struct super_block *s)
entry = journal->j_journal_list.next;
jl = JOURNAL_LIST_ENTRY(entry);
/* this check should always be run, to send old lists to disk */
- if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) {
+ if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
+ atomic_read(&jl->j_commit_left) == 0 &&
+ test_transaction(s, jl)) {
flush_used_journal_lists(s, jl);
} else {
break;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 5567328f1041..80fc3b32802f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -510,8 +510,10 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
SLAB_CTOR_CONSTRUCTOR) {
INIT_LIST_HEAD(&ei->i_prealloc_list);
inode_init_once(&ei->vfs_inode);
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
ei->i_acl_access = NULL;
ei->i_acl_default = NULL;
+#endif
}
}
@@ -530,9 +532,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(reiserfs_inode_cachep))
- reiserfs_warning(NULL,
- "reiserfs_inode_cache: not all structures were freed");
+ kmem_cache_destroy(reiserfs_inode_cachep);
}
/* we don't mark inodes dirty, we just log them */
@@ -562,6 +562,7 @@ static void reiserfs_dirty_inode(struct inode *inode)
reiserfs_write_unlock(inode->i_sb);
}
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
static void reiserfs_clear_inode(struct inode *inode)
{
struct posix_acl *acl;
@@ -576,6 +577,9 @@ static void reiserfs_clear_inode(struct inode *inode)
posix_acl_release(acl);
REISERFS_I(inode)->i_acl_default = NULL;
}
+#else
+#define reiserfs_clear_inode NULL
+#endif
#ifdef CONFIG_QUOTA
static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
@@ -725,12 +729,6 @@ static const arg_desc_t error_actions[] = {
{NULL, 0, 0},
};
-int reiserfs_default_io_size = 128 * 1024; /* Default recommended I/O size is 128k.
- There might be broken applications that are
- confused by this. Use nolargeio mount option
- to get usual i/o size = PAGE_SIZE.
- */
-
/* proceed only one option from a list *cur - string containing of mount options
opts - array of options which are accepted
opt_arg - if option is found and requires an argument and if it is specifed
@@ -959,19 +957,8 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
}
if (c == 'w') {
- char *p = NULL;
- int val = simple_strtoul(arg, &p, 0);
-
- if (*p != '\0') {
- reiserfs_warning(s,
- "reiserfs_parse_options: non-numeric value %s for nolargeio option",
- arg);
- return 0;
- }
- if (val)
- reiserfs_default_io_size = PAGE_SIZE;
- else
- reiserfs_default_io_size = 128 * 1024;
+ reiserfs_warning(s, "reiserfs: nolargeio option is no longer supported");
+ return 0;
}
if (c == 'j') {
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 22eed61ebf69..ddcd9e1ef282 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -589,8 +589,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(romfs_inode_cachep))
- printk(KERN_INFO "romfs_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(romfs_inode_cachep);
}
static int romfs_remount(struct super_block *sb, int *flags, char *data)
diff --git a/fs/select.c b/fs/select.c
index 33b72ba0f86f..dcbc1112b7ec 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -658,8 +658,6 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
unsigned int i;
struct poll_list *head;
struct poll_list *walk;
- struct fdtable *fdt;
- int max_fdset;
/* Allocate small arguments on the stack to save memory and be
faster - use long to make sure the buffer is aligned properly
on 64 bit archs to avoid unaligned access */
@@ -667,11 +665,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
struct poll_list *stack_pp = NULL;
/* Do a sanity check on nfds ... */
- rcu_read_lock();
- fdt = files_fdtable(current->files);
- max_fdset = fdt->max_fdset;
- rcu_read_unlock();
- if (nfds > max_fdset && nfds > OPEN_MAX)
+ if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
return -EINVAL;
poll_initwait(&table);
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index a1ed657c3c84..2c122ee83adb 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -89,8 +89,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(smb_inode_cachep))
- printk(KERN_INFO "smb_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(smb_inode_cachep);
}
static int smb_remount(struct super_block *sb, int *flags, char *data)
@@ -167,7 +166,6 @@ smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr)
fattr->f_mtime = inode->i_mtime;
fattr->f_ctime = inode->i_ctime;
fattr->f_atime = inode->i_atime;
- fattr->f_blksize= inode->i_blksize;
fattr->f_blocks = inode->i_blocks;
fattr->attr = SMB_I(inode)->attr;
@@ -201,7 +199,6 @@ smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
inode->i_uid = fattr->f_uid;
inode->i_gid = fattr->f_gid;
inode->i_ctime = fattr->f_ctime;
- inode->i_blksize= fattr->f_blksize;
inode->i_blocks = fattr->f_blocks;
inode->i_size = fattr->f_size;
inode->i_mtime = fattr->f_mtime;
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index c3495059889d..40e174db9872 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -1826,7 +1826,6 @@ smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
fattr->f_nlink = 1;
fattr->f_uid = server->mnt->uid;
fattr->f_gid = server->mnt->gid;
- fattr->f_blksize = SMB_ST_BLKSIZE;
fattr->f_unix = 0;
}
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index c8e96195b96e..0fb74697abc4 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -49,8 +49,7 @@ int smb_init_request_cache(void)
void smb_destroy_request_cache(void)
{
- if (kmem_cache_destroy(req_cachep))
- printk(KERN_INFO "smb_destroy_request_cache: not all structures were freed\n");
+ kmem_cache_destroy(req_cachep);
}
/*
diff --git a/fs/stat.c b/fs/stat.c
index 3a44dcf97da2..60a31d5e5966 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -14,6 +14,7 @@
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/syscalls.h>
+#include <linux/pagemap.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -32,7 +33,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
stat->ctime = inode->i_ctime;
stat->size = i_size_read(inode);
stat->blocks = inode->i_blocks;
- stat->blksize = inode->i_blksize;
+ stat->blksize = (1 << inode->i_blkbits);
}
EXPORT_SYMBOL(generic_fillattr);
diff --git a/fs/super.c b/fs/super.c
index 5c4c94d5495e..6987824d0dce 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -199,7 +199,7 @@ EXPORT_SYMBOL(deactivate_super);
* success, 0 if we had failed (superblock contents was already dead or
* dying when grab_super() had been called).
*/
-static int grab_super(struct super_block *s)
+static int grab_super(struct super_block *s) __releases(sb_lock)
{
s->s_count++;
spin_unlock(&sb_lock);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index c16a93c353c0..98022e41cda1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -10,6 +10,7 @@
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -176,7 +177,6 @@ const struct file_operations bin_fops = {
* sysfs_create_bin_file - create binary file for object.
* @kobj: object.
* @attr: attribute descriptor.
- *
*/
int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
@@ -191,13 +191,16 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
* sysfs_remove_bin_file - remove binary file for object.
* @kobj: object.
* @attr: attribute descriptor.
- *
*/
-int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
{
- sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
- return 0;
+ if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) {
+ printk(KERN_ERR "%s: "
+ "bad dentry or inode or no such file: \"%s\"\n",
+ __FUNCTION__, attr->attr.name);
+ dump_stack();
+ }
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 61c42430cba3..5f3d725d1125 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -43,7 +43,7 @@ static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd,
memset(sd, 0, sizeof(*sd));
atomic_set(&sd->s_count, 1);
- atomic_set(&sd->s_event, 0);
+ atomic_set(&sd->s_event, 1);
INIT_LIST_HEAD(&sd->s_children);
list_add(&sd->s_sibling, &parent_sd->s_children);
sd->s_element = element;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 9889e54e1f13..e79e38d52c00 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -12,6 +12,7 @@
#include <linux/namei.h>
#include <linux/backing-dev.h>
#include <linux/capability.h>
+#include <linux/errno.h>
#include "sysfs.h"
extern struct super_block * sysfs_sb;
@@ -124,7 +125,6 @@ struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
{
struct inode * inode = new_inode(sysfs_sb);
if (inode) {
- inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_mapping->a_ops = &sysfs_aops;
inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
@@ -234,17 +234,18 @@ void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
}
}
-void sysfs_hash_and_remove(struct dentry * dir, const char * name)
+int sysfs_hash_and_remove(struct dentry * dir, const char * name)
{
struct sysfs_dirent * sd;
struct sysfs_dirent * parent_sd;
+ int found = 0;
if (!dir)
- return;
+ return -ENOENT;
if (dir->d_inode == NULL)
/* no inode means this hasn't been made visible yet */
- return;
+ return -ENOENT;
parent_sd = dir->d_fsdata;
mutex_lock(&dir->d_inode->i_mutex);
@@ -255,8 +256,11 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
list_del_init(&sd->s_sibling);
sysfs_drop_dentry(sd, dir);
sysfs_put(sd);
+ found = 1;
break;
}
}
mutex_unlock(&dir->d_inode->i_mutex);
+
+ return found ? 0 : -ENOENT;
}
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index d2eac3ceed5f..f50e3cc2ded8 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -3,6 +3,7 @@
*/
#include <linux/fs.h>
+#include <linux/mount.h>
#include <linux/module.h>
#include <linux/kobject.h>
#include <linux/namei.h>
@@ -82,10 +83,19 @@ exit1:
*/
int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
{
- struct dentry * dentry = kobj->dentry;
+ struct dentry *dentry = NULL;
int error = -EEXIST;
- BUG_ON(!kobj || !kobj->dentry || !name);
+ BUG_ON(!name);
+
+ if (!kobj) {
+ if (sysfs_mount && sysfs_mount->mnt_sb)
+ dentry = sysfs_mount->mnt_sb->s_root;
+ } else
+ dentry = kobj->dentry;
+
+ if (!dentry)
+ return -EFAULT;
mutex_lock(&dentry->d_inode->i_mutex);
if (!sysfs_dirent_exist(dentry->d_fsdata, name))
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3651ffb5ec09..6f3d6bd52887 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -10,7 +10,7 @@ extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
umode_t, int);
extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
-extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
+extern int sysfs_hash_and_remove(struct dentry * dir, const char * name);
extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index 9b585d1081c0..115ab0d6f4bc 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -170,7 +170,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
inode->i_uid = current->fsuid;
inode->i_ino = fs16_to_cpu(sbi, ino);
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
- inode->i_blocks = inode->i_blksize = 0;
+ inode->i_blocks = 0;
memset(SYSV_I(inode)->i_data, 0, sizeof(SYSV_I(inode)->i_data));
SYSV_I(inode)->i_dir_start_lookup = 0;
insert_inode_hash(inode);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 58b2d22142ba..d63c5e48b050 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -201,7 +201,7 @@ static void sysv_read_inode(struct inode *inode)
inode->i_ctime.tv_nsec = 0;
inode->i_atime.tv_nsec = 0;
inode->i_mtime.tv_nsec = 0;
- inode->i_blocks = inode->i_blksize = 0;
+ inode->i_blocks = 0;
si = SYSV_I(inode);
for (block = 0; block < 10+1+1+1; block++)
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 876639b93321..350cba5d6803 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -369,10 +369,9 @@ static int sysv_fill_super(struct super_block *sb, void *data, int silent)
if (64 != sizeof (struct sysv_inode))
panic("sysv fs: bad inode size");
- sbi = kmalloc(sizeof(struct sysv_sb_info), GFP_KERNEL);
+ sbi = kzalloc(sizeof(struct sysv_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
- memset(sbi, 0, sizeof(struct sysv_sb_info));
sbi->s_sb = sb;
sbi->s_block_base = 0;
@@ -453,10 +452,9 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent)
if (64 != sizeof (struct sysv_inode))
panic("sysv fs: bad i-node size");
- sbi = kmalloc(sizeof(struct sysv_sb_info), GFP_KERNEL);
+ sbi = kzalloc(sizeof(struct sysv_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
- memset(sbi, 0, sizeof(struct sysv_sb_info));
sbi->s_sb = sb;
sbi->s_block_base = 0;
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 33323473e3c4..8206983f2ebf 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -121,7 +121,6 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
UDF_I_LOCATION(inode).logicalBlockNum = block;
UDF_I_LOCATION(inode).partitionReferenceNum = UDF_I_LOCATION(dir).partitionReferenceNum;
inode->i_ino = udf_get_lb_pblock(sb, UDF_I_LOCATION(inode), 0);
- inode->i_blksize = PAGE_SIZE;
inode->i_blocks = 0;
UDF_I_LENEATTR(inode) = 0;
UDF_I_LENALLOC(inode) = 0;
@@ -130,14 +129,12 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
{
UDF_I_EFE(inode) = 1;
UDF_UPDATE_UDFREV(inode->i_sb, UDF_VERS_USE_EXTENDED_FE);
- UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry), GFP_KERNEL);
- memset(UDF_I_DATA(inode), 0x00, inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry));
+ UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry), GFP_KERNEL);
}
else
{
UDF_I_EFE(inode) = 0;
- UDF_I_DATA(inode) = kmalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL);
- memset(UDF_I_DATA(inode), 0x00, inode->i_sb->s_blocksize - sizeof(struct fileEntry));
+ UDF_I_DATA(inode) = kzalloc(inode->i_sb->s_blocksize - sizeof(struct fileEntry), GFP_KERNEL);
}
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB))
UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_IN_ICB;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 605f5111b6d8..b223b32db991 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -916,8 +916,6 @@ __udf_read_inode(struct inode *inode)
* i_nlink = 1
* i_op = NULL;
*/
- inode->i_blksize = PAGE_SIZE;
-
bh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 0, &ident);
if (!bh)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index fcce1a21a51b..1d3b5d2070e5 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -156,8 +156,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(udf_inode_cachep))
- printk(KERN_INFO "udf_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(udf_inode_cachep);
}
/* Superblock operations */
@@ -1622,6 +1621,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
goto error_out;
}
+ if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_READ_ONLY)
+ printk("UDF-fs: Partition marked readonly; forcing readonly mount\n");
+ sb->s_flags |= MS_RDONLY;
+
if ( udf_find_fileset(sb, &fileset, &rootdir) )
{
printk("UDF-fs: No fileset found\n");
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 9501dcd3b213..2ad1259c6eca 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -255,7 +255,6 @@ cg_found:
inode->i_gid = current->fsgid;
inode->i_ino = cg * uspi->s_ipg + bit;
- inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
ufsi->i_flags = UFS_I(dir)->i_flags;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 30c6e8a9446c..ee1eaa6f4ec2 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -741,7 +741,6 @@ void ufs_read_inode(struct inode * inode)
ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino));
}
- inode->i_blksize = PAGE_SIZE;/*This is the optimal IO size (for stat)*/
inode->i_version++;
ufsi->i_lastfrag =
(inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 992ee0b87cc3..ec79e3091d1b 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -611,11 +611,10 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
UFSD("ENTER\n");
- sbi = kmalloc(sizeof(struct ufs_sb_info), GFP_KERNEL);
+ sbi = kzalloc(sizeof(struct ufs_sb_info), GFP_KERNEL);
if (!sbi)
goto failed_nomem;
sb->s_fs_info = sbi;
- memset(sbi, 0, sizeof(struct ufs_sb_info));
UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
@@ -1245,8 +1244,7 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
- if (kmem_cache_destroy(ufs_inode_cachep))
- printk(KERN_INFO "ufs_inode_cache: not all structures were freed\n");
+ kmem_cache_destroy(ufs_inode_cachep);
}
#ifdef CONFIG_QUOTA
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index 9e7f85986d0d..291948d5085a 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -30,7 +30,6 @@ ifeq ($(CONFIG_XFS_TRACE),y)
EXTRA_CFLAGS += -DXFS_BLI_TRACE
EXTRA_CFLAGS += -DXFS_BMAP_TRACE
EXTRA_CFLAGS += -DXFS_BMBT_TRACE
- EXTRA_CFLAGS += -DXFS_DIR_TRACE
EXTRA_CFLAGS += -DXFS_DIR2_TRACE
EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index aba7fcf881a2..d59737589815 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -34,6 +34,14 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
gfp_t lflags = kmem_flags_convert(flags);
void *ptr;
+#ifdef DEBUG
+ if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) {
+ printk(KERN_WARNING "Large %s attempt, size=%ld\n",
+ __FUNCTION__, (long)size);
+ dump_stack();
+ }
+#endif
+
do {
if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
ptr = kmalloc(size, lflags);
@@ -60,6 +68,27 @@ kmem_zalloc(size_t size, unsigned int __nocast flags)
return ptr;
}
+void *
+kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
+ unsigned int __nocast flags)
+{
+ void *ptr;
+ size_t kmsize = maxsize;
+ unsigned int kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP;
+
+ while (!(ptr = kmem_zalloc(kmsize, kmflags))) {
+ if ((kmsize <= minsize) && (flags & KM_NOSLEEP))
+ break;
+ if ((kmsize >>= 1) <= minsize) {
+ kmsize = minsize;
+ kmflags = flags;
+ }
+ }
+ if (ptr)
+ *size = kmsize;
+ return ptr;
+}
+
void
kmem_free(void *ptr, size_t size)
{
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 939bd84bc7ee..9ebabdf7829c 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -30,6 +30,7 @@
#define KM_NOSLEEP 0x0002u
#define KM_NOFS 0x0004u
#define KM_MAYFAIL 0x0008u
+#define KM_LARGE 0x0010u
/*
* We use a special process flag to avoid recursive callbacks into
@@ -41,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags)
{
gfp_t lflags;
- BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
+ BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE));
if (flags & KM_NOSLEEP) {
lflags = GFP_ATOMIC | __GFP_NOWARN;
@@ -54,8 +55,9 @@ kmem_flags_convert(unsigned int __nocast flags)
}
extern void *kmem_alloc(size_t, unsigned int __nocast);
-extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
extern void *kmem_zalloc(size_t, unsigned int __nocast);
+extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
+extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
extern void kmem_free(void *, size_t);
/*
@@ -91,8 +93,8 @@ kmem_zone_free(kmem_zone_t *zone, void *ptr)
static inline void
kmem_zone_destroy(kmem_zone_t *zone)
{
- if (zone && kmem_cache_destroy(zone))
- BUG();
+ if (zone)
+ kmem_cache_destroy(zone);
}
extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
index b25090094cca..2009e6d922ce 100644
--- a/fs/xfs/linux-2.6/sema.h
+++ b/fs/xfs/linux-2.6/sema.h
@@ -29,8 +29,6 @@
typedef struct semaphore sema_t;
-#define init_sema(sp, val, c, d) sema_init(sp, val)
-#define initsema(sp, val) sema_init(sp, val)
#define initnsema(sp, val, name) sema_init(sp, val)
#define psema(sp, b) down(sp)
#define vsema(sp) up(sp)
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
index 9a8ad481b008..351a8f454bd1 100644
--- a/fs/xfs/linux-2.6/sv.h
+++ b/fs/xfs/linux-2.6/sv.h
@@ -53,8 +53,6 @@ static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
remove_wait_queue(&sv->waiters, &wait);
}
-#define init_sv(sv,type,name,flag) \
- init_waitqueue_head(&(sv)->waiters)
#define sv_init(sv,flag,name) \
init_waitqueue_head(&(sv)->waiters)
#define sv_destroy(sv) \
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 34dcb43a7837..09360cf1e1f2 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -71,7 +71,7 @@ xfs_page_trace(
int tag,
struct inode *inode,
struct page *page,
- int mask)
+ unsigned long pgoff)
{
xfs_inode_t *ip;
bhv_vnode_t *vp = vn_from_inode(inode);
@@ -91,7 +91,7 @@ xfs_page_trace(
(void *)ip,
(void *)inode,
(void *)page,
- (void *)((unsigned long)mask),
+ (void *)pgoff,
(void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
(void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
(void *)((unsigned long)((isize >> 32) & 0xffffffff)),
@@ -105,7 +105,7 @@ xfs_page_trace(
(void *)NULL);
}
#else
-#define xfs_page_trace(tag, inode, page, mask)
+#define xfs_page_trace(tag, inode, page, pgoff)
#endif
/*
@@ -1197,7 +1197,7 @@ xfs_vm_releasepage(
.nr_to_write = 1,
};
- xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
+ xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, 0);
if (!page_has_buffers(page))
return 0;
@@ -1356,7 +1356,6 @@ xfs_end_io_direct(
ioend->io_size = size;
xfs_finish_ioend(ioend);
} else {
- ASSERT(size >= 0);
xfs_destroy_ioend(ioend);
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 2af528dcfb04..9bbadafdcb00 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
@@ -318,8 +318,12 @@ xfs_buf_free(
if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
free_address(bp->b_addr - bp->b_offset);
- for (i = 0; i < bp->b_page_count; i++)
- page_cache_release(bp->b_pages[i]);
+ for (i = 0; i < bp->b_page_count; i++) {
+ struct page *page = bp->b_pages[i];
+
+ ASSERT(!PagePrivate(page));
+ page_cache_release(page);
+ }
_xfs_buf_free_pages(bp);
} else if (bp->b_flags & _XBF_KMEM_ALLOC) {
/*
@@ -400,6 +404,7 @@ _xfs_buf_lookup_pages(
nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
size -= nbytes;
+ ASSERT(!PagePrivate(page));
if (!PageUptodate(page)) {
page_count--;
if (blocksize >= PAGE_CACHE_SIZE) {
@@ -768,7 +773,7 @@ xfs_buf_get_noaddr(
_xfs_buf_initialize(bp, target, 0, len, 0);
try_again:
- data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
+ data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE);
if (unlikely(data == NULL))
goto fail_free_buf;
@@ -1117,10 +1122,10 @@ xfs_buf_bio_end_io(
do {
struct page *page = bvec->bv_page;
+ ASSERT(!PagePrivate(page));
if (unlikely(bp->b_error)) {
if (bp->b_flags & XBF_READ)
ClearPageUptodate(page);
- SetPageError(page);
} else if (blocksize >= PAGE_CACHE_SIZE) {
SetPageUptodate(page);
} else if (!PagePrivate(page) &&
@@ -1156,16 +1161,16 @@ _xfs_buf_ioapply(
total_nr_pages = bp->b_page_count;
map_i = 0;
- if (bp->b_flags & _XBF_RUN_QUEUES) {
- bp->b_flags &= ~_XBF_RUN_QUEUES;
- rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC;
- } else {
- rw = (bp->b_flags & XBF_READ) ? READ : WRITE;
- }
-
if (bp->b_flags & XBF_ORDERED) {
ASSERT(!(bp->b_flags & XBF_READ));
rw = WRITE_BARRIER;
+ } else if (bp->b_flags & _XBF_RUN_QUEUES) {
+ ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
+ bp->b_flags &= ~_XBF_RUN_QUEUES;
+ rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
+ } else {
+ rw = (bp->b_flags & XBF_WRITE) ? WRITE :
+ (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
}
/* Special code path for reading a sub page size buffer in --
@@ -1681,6 +1686,7 @@ xfsbufd(
xfs_buf_t *bp, *n;
struct list_head *dwq = &target->bt_delwrite_queue;
spinlock_t *dwlk = &target->bt_delwrite_lock;
+ int count;
current->flags |= PF_MEMALLOC;
@@ -1696,6 +1702,7 @@ xfsbufd(
schedule_timeout_interruptible(
xfs_buf_timer_centisecs * msecs_to_jiffies(10));
+ count = 0;
age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
spin_lock(dwlk);
list_for_each_entry_safe(bp, n, dwq, b_list) {
@@ -1711,9 +1718,11 @@ xfsbufd(
break;
}
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
+ bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
+ _XBF_RUN_QUEUES);
bp->b_flags |= XBF_WRITE;
- list_move(&bp->b_list, &tmp);
+ list_move_tail(&bp->b_list, &tmp);
+ count++;
}
}
spin_unlock(dwlk);
@@ -1724,12 +1733,12 @@ xfsbufd(
list_del_init(&bp->b_list);
xfs_buf_iostrategy(bp);
-
- blk_run_address_space(target->bt_mapping);
}
if (as_list_len > 0)
purge_addresses();
+ if (count)
+ blk_run_address_space(target->bt_mapping);
clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
} while (!kthread_should_stop());
@@ -1767,7 +1776,7 @@ xfs_flush_buftarg(
continue;
}
- list_move(&bp->b_list, &tmp);
+ list_move_tail(&bp->b_list, &tmp);
}
spin_unlock(dwlk);
@@ -1776,7 +1785,7 @@ xfs_flush_buftarg(
*/
list_for_each_entry_safe(bp, n, &tmp, b_list) {
xfs_buf_lock(bp);
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
+ bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|_XBF_RUN_QUEUES);
bp->b_flags |= XBF_WRITE;
if (wait)
bp->b_flags &= ~XBF_ASYNC;
@@ -1786,6 +1795,9 @@ xfs_flush_buftarg(
xfs_buf_iostrategy(bp);
}
+ if (wait)
+ blk_run_address_space(target->bt_mapping);
+
/*
* Remaining list items must be flushed before returning
*/
@@ -1797,9 +1809,6 @@ xfs_flush_buftarg(
xfs_buf_relse(bp);
}
- if (wait)
- blk_run_address_space(target->bt_mapping);
-
return pincount;
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 7858703ed84c..9dd235cb0107 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -298,11 +298,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
-#define XFS_BUF_ISUNINITIAL(bp) (0)
-#define XFS_BUF_UNUNINITIAL(bp) (0)
-
-#define XFS_BUF_BP_ISMAPPED(bp) (1)
-
#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
@@ -393,8 +388,6 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
return error;
}
-#define XFS_bdwrite(bp) xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC)
-
static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
{
bp->b_strat = xfs_bdstrat_cb;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 3d4f6dff2113..41cfcba7ce49 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -370,7 +370,7 @@ xfs_file_readdir(
/* Try fairly hard to get memory */
do {
- if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL)))
+ if ((read_buf = kmalloc(rlen, GFP_KERNEL)))
break;
rlen >>= 1;
} while (rlen >= 1024);
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index 6c162c3dde7e..ed3a5e1b4b67 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -34,7 +34,7 @@ xfs_param_t xfs_params = {
.restrict_chown = { 0, 1, 1 },
.sgid_inherit = { 0, 0, 1 },
.symlink_mode = { 0, 0, 1 },
- .panic_mask = { 0, 0, 127 },
+ .panic_mask = { 0, 0, 255 },
.error_level = { 0, 3, 11 },
.syncd_timer = { 1*100, 30*100, 7200*100},
.stats_clear = { 0, 0, 1 },
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 6e52a5dd38d8..a74f854d91e6 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -653,7 +653,7 @@ xfs_attrmulti_by_handle(
STATIC int
xfs_ioc_space(
bhv_desc_t *bdp,
- bhv_vnode_t *vp,
+ struct inode *inode,
struct file *filp,
int flags,
unsigned int cmd,
@@ -735,7 +735,7 @@ xfs_ioctl(
!capable(CAP_SYS_ADMIN))
return -EPERM;
- return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg);
+ return xfs_ioc_space(bdp, inode, filp, ioflags, cmd, arg);
case XFS_IOC_DIOINFO: {
struct dioattr da;
@@ -763,6 +763,8 @@ xfs_ioctl(
return xfs_ioc_fsgeometry(mp, arg);
case XFS_IOC_GETVERSION:
+ return put_user(inode->i_generation, (int __user *)arg);
+
case XFS_IOC_GETXFLAGS:
case XFS_IOC_SETXFLAGS:
case XFS_IOC_FSGETXATTR:
@@ -957,7 +959,7 @@ xfs_ioctl(
STATIC int
xfs_ioc_space(
bhv_desc_t *bdp,
- bhv_vnode_t *vp,
+ struct inode *inode,
struct file *filp,
int ioflags,
unsigned int cmd,
@@ -967,13 +969,13 @@ xfs_ioc_space(
int attr_flags = 0;
int error;
- if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
+ if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
return -XFS_ERROR(EPERM);
if (!(filp->f_mode & FMODE_WRITE))
return -XFS_ERROR(EBADF);
- if (!VN_ISREG(vp))
+ if (!S_ISREG(inode->i_mode))
return -XFS_ERROR(EINVAL);
if (copy_from_user(&bf, arg, sizeof(bf)))
@@ -1264,13 +1266,6 @@ xfs_ioc_xattr(
break;
}
- case XFS_IOC_GETVERSION: {
- flags = vn_to_inode(vp)->i_generation;
- if (copy_to_user(arg, &flags, sizeof(flags)))
- error = -EFAULT;
- break;
- }
-
default:
error = -ENOTTY;
break;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index d9180020de63..3ba814ae3bba 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -553,13 +553,13 @@ xfs_vn_follow_link(
ASSERT(dentry);
ASSERT(nd);
- link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+ link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
if (!link) {
nd_set_link(nd, ERR_PTR(-ENOMEM));
return NULL;
}
- uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL);
+ uio = kmalloc(sizeof(uio_t), GFP_KERNEL);
if (!uio) {
kfree(link);
nd_set_link(nd, ERR_PTR(-ENOMEM));
@@ -623,12 +623,27 @@ xfs_vn_getattr(
{
struct inode *inode = dentry->d_inode;
bhv_vnode_t *vp = vn_from_inode(inode);
- int error = 0;
+ bhv_vattr_t vattr = { .va_mask = XFS_AT_STAT };
+ int error;
- if (unlikely(vp->v_flag & VMODIFIED))
- error = vn_revalidate(vp);
- if (!error)
- generic_fillattr(inode, stat);
+ error = bhv_vop_getattr(vp, &vattr, ATTR_LAZY, NULL);
+ if (likely(!error)) {
+ stat->size = i_size_read(inode);
+ stat->dev = inode->i_sb->s_dev;
+ stat->rdev = (vattr.va_rdev == 0) ? 0 :
+ MKDEV(sysv_major(vattr.va_rdev) & 0x1ff,
+ sysv_minor(vattr.va_rdev));
+ stat->mode = vattr.va_mode;
+ stat->nlink = vattr.va_nlink;
+ stat->uid = vattr.va_uid;
+ stat->gid = vattr.va_gid;
+ stat->ino = vattr.va_nodeid;
+ stat->atime = vattr.va_atime;
+ stat->mtime = vattr.va_mtime;
+ stat->ctime = vattr.va_ctime;
+ stat->blocks = vattr.va_nblocks;
+ stat->blksize = vattr.va_blocksize;
+ }
return -error;
}
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index a13f75c1a936..2b0e0018738a 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -148,11 +148,7 @@ BUFFER_FNS(PrivateStart, unwritten);
(current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
#define NBPP PAGE_SIZE
-#define DPPSHFT (PAGE_SHIFT - 9)
#define NDPP (1 << (PAGE_SHIFT - 9))
-#define dtop(DD) (((DD) + NDPP - 1) >> DPPSHFT)
-#define dtopt(DD) ((DD) >> DPPSHFT)
-#define dpoff(DD) ((DD) & (NDPP-1))
#define NBBY 8 /* number of bits per byte */
#define NBPC PAGE_SIZE /* Number of bytes per click */
@@ -172,8 +168,6 @@ BUFFER_FNS(PrivateStart, unwritten);
#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT)
#define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
#define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT)
-#define io_btoc(x) (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT)
-#define io_btoct(x) ((__psunsigned_t)(x)>>IO_BPCSHIFT)
/* off_t bytes to clicks */
#define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
@@ -186,7 +180,6 @@ BUFFER_FNS(PrivateStart, unwritten);
#define ctob(x) ((__psunsigned_t)(x)<<BPCSHIFT)
#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT)
#define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT)
-#define io_ctob(x) ((__psunsigned_t)(x)<<IO_BPCSHIFT)
/* bytes to clicks */
#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
@@ -339,4 +332,11 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
return(x * y);
}
+static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
+{
+ x += y - 1;
+ do_div(x, y);
+ return x;
+}
+
#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index ee788b1cb364..55992b40353c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -270,12 +270,12 @@ xfs_read(
}
}
- if (unlikely((ioflags & IO_ISDIRECT) && VN_CACHED(vp)))
- bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
- -1, FI_REMAPF_LOCKED);
-
- if (unlikely(ioflags & IO_ISDIRECT))
+ if (unlikely(ioflags & IO_ISDIRECT)) {
+ if (VN_CACHED(vp))
+ bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
+ -1, FI_REMAPF_LOCKED);
mutex_unlock(&inode->i_mutex);
+ }
xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
(void *)iovp, segs, *offset, ioflags);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4754f342a5d3..38c4d128a8c0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -171,7 +171,6 @@ xfs_revalidate_inode(
break;
}
- inode->i_blksize = xfs_preferred_iosize(mp);
inode->i_generation = ip->i_d.di_gen;
i_size_write(inode, ip->i_d.di_size);
inode->i_blocks =
@@ -228,7 +227,9 @@ xfs_initialize_vnode(
xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
xfs_set_inodeops(inode);
+ spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~XFS_INEW;
+ spin_unlock(&ip->i_flags_lock);
barrier();
unlock_new_inode(inode);
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index 91fc2c4b3353..da255bdf5260 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -79,7 +79,7 @@ typedef enum {
#define VFS_RDONLY 0x0001 /* read-only vfs */
#define VFS_GRPID 0x0002 /* group-ID assigned from directory */
#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */
-#define VFS_UMOUNT 0x0008 /* unmount in progress */
+/* ---- VFS_UMOUNT ---- 0x0008 -- unneeded, fixed via kthread APIs */
#define VFS_32BITINODES 0x0010 /* do not use inums above 32 bits */
#define VFS_END 0x0010 /* max flag */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 6628d96b6fd6..553fa731ade5 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -122,7 +122,6 @@ vn_revalidate_core(
inode->i_blocks = vap->va_nblocks;
inode->i_mtime = vap->va_mtime;
inode->i_ctime = vap->va_ctime;
- inode->i_blksize = vap->va_blocksize;
if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
inode->i_flags |= S_IMMUTABLE;
else
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index c42b3221b20c..515f5fdea57a 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -85,8 +85,6 @@ typedef enum {
#define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh)))
#define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name)
#define vn_bhv_remove(bhp,bdp) bhv_remove(bhp,bdp)
-#define vn_bhv_lookup(bhp,ops) bhv_lookup(bhp,ops)
-#define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops)
/*
* Vnode to Linux inode mapping.
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 5b2dcc58b244..33ad5af386e0 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -382,18 +382,6 @@ xfs_qm_dquot_logitem_unlock(
/*
- * The transaction with the dquot locked has aborted. The dquot
- * must not be dirty within the transaction. We simply unlock just
- * as if the transaction had been cancelled.
- */
-STATIC void
-xfs_qm_dquot_logitem_abort(
- xfs_dq_logitem_t *ql)
-{
- xfs_qm_dquot_logitem_unlock(ql);
-}
-
-/*
* this needs to stamp an lsn into the dquot, I think.
* rpc's that look at user dquot's would then have to
* push on the dependency recorded in the dquot
@@ -426,7 +414,6 @@ STATIC struct xfs_item_ops xfs_dquot_item_ops = {
.iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_qm_dquot_logitem_committed,
.iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push,
- .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_abort,
.iop_pushbuf = (void(*)(xfs_log_item_t*))
xfs_qm_dquot_logitem_pushbuf,
.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
@@ -559,17 +546,6 @@ xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn)
}
/*
- * The transaction of which this QUOTAOFF is a part has been aborted.
- * Just clean up after ourselves.
- * Shouldn't this never happen in the case of qoffend logitems? XXX
- */
-STATIC void
-xfs_qm_qoff_logitem_abort(xfs_qoff_logitem_t *qf)
-{
- kmem_free(qf, sizeof(xfs_qoff_logitem_t));
-}
-
-/*
* There isn't much you can do to push on an quotaoff item. It is simply
* stuck waiting for the log to be flushed to disk.
*/
@@ -644,7 +620,6 @@ STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
.iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_qm_qoffend_logitem_committed,
.iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
- .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
.iop_pushbuf = NULL,
.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_qm_qoffend_logitem_committing
@@ -667,7 +642,6 @@ STATIC struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
.iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_qm_qoff_logitem_committed,
.iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
- .iop_abort = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_abort,
.iop_pushbuf = NULL,
.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_qm_qoff_logitem_committing
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index e23e45535c48..7c6a3a50379e 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -112,17 +112,17 @@ xfs_Gqm_init(void)
{
xfs_dqhash_t *udqhash, *gdqhash;
xfs_qm_t *xqm;
- uint i, hsize, flags = KM_SLEEP | KM_MAYFAIL;
+ size_t hsize;
+ uint i;
/*
* Initialize the dquot hash tables.
*/
- hsize = XFS_QM_HASHSIZE_HIGH;
- while (!(udqhash = kmem_zalloc(hsize * sizeof(xfs_dqhash_t), flags))) {
- if ((hsize >>= 1) <= XFS_QM_HASHSIZE_LOW)
- flags = KM_SLEEP;
- }
- gdqhash = kmem_zalloc(hsize * sizeof(xfs_dqhash_t), KM_SLEEP);
+ udqhash = kmem_zalloc_greedy(&hsize,
+ XFS_QM_HASHSIZE_LOW, XFS_QM_HASHSIZE_HIGH,
+ KM_SLEEP | KM_MAYFAIL | KM_LARGE);
+ gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE);
+ hsize /= sizeof(xfs_dqhash_t);
ndquot = hsize << 8;
xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 4568deb6da86..689407de0a20 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -56,12 +56,6 @@ extern kmem_zone_t *qm_dqtrxzone;
#define XFS_QM_HASHSIZE_HIGH ((NBPP * 4) / sizeof(xfs_dqhash_t))
/*
- * We output a cmn_err when quotachecking a quota file with more than
- * this many fsbs.
- */
-#define XFS_QM_BIG_QCHECK_NBLKS 500
-
-/*
* This defines the unit of allocation of dquots.
* Currently, it is just one file system block, and a 4K blk contains 30
* (136 * 30 = 4080) dquots. It's probably not worth trying to make
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index b7ddd04aae32..a8b85e2be9d5 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -75,7 +75,6 @@ static inline int XQMISLCKD(struct xfs_dqhash *h)
#define xfs_qm_freelist_lock(qm) XQMLCK(&((qm)->qm_dqfreelist))
#define xfs_qm_freelist_unlock(qm) XQMUNLCK(&((qm)->qm_dqfreelist))
-#define XFS_QM_IS_FREELIST_LOCKED(qm) XQMISLCKD(&((qm)->qm_dqfreelist))
/*
* Hash into a bucket in the dquot hash table, based on <mp, id>.
@@ -170,6 +169,5 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
(((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
(((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
-#define DQFLAGTO_DIRTYSTR(d) (XFS_DQ_IS_DIRTY(d) ? "DIRTY" : "NOTDIRTY")
#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index addf5a7ea06c..5cf2e86caa71 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -75,7 +75,7 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
sleep);
} else {
ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)),
- sleep);
+ sleep | KM_LARGE);
}
if (ktep == NULL) {
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index dc2361dd740a..9ece7f87ec5b 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -150,7 +150,7 @@ typedef struct xfs_agi {
#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp))
typedef struct xfs_agfl {
- xfs_agblock_t agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */
+ __be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */
} xfs_agfl_t;
/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index d2bbcd882a69..e80dda3437d1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1477,8 +1477,10 @@ xfs_alloc_ag_vextent_small(
/*
* Can't allocate from the freelist for some reason.
*/
- else
+ else {
+ fbno = NULLAGBLOCK;
flen = 0;
+ }
/*
* Can't do the allocation, give up.
*/
@@ -2021,7 +2023,7 @@ xfs_alloc_get_freelist(
/*
* Get the block number and update the data structures.
*/
- bno = INT_GET(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)], ARCH_CONVERT);
+ bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
be32_add(&agf->agf_flfirst, 1);
xfs_trans_brelse(tp, agflbp);
if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
@@ -2108,7 +2110,7 @@ xfs_alloc_put_freelist(
{
xfs_agf_t *agf; /* a.g. freespace structure */
xfs_agfl_t *agfl; /* a.g. free block array */
- xfs_agblock_t *blockp;/* pointer to array entry */
+ __be32 *blockp;/* pointer to array entry */
int error;
#ifdef XFS_ALLOC_TRACE
static char fname[] = "xfs_alloc_put_freelist";
@@ -2132,7 +2134,7 @@ xfs_alloc_put_freelist(
pag->pagf_flcount++;
ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
- INT_SET(*blockp, ARCH_CONVERT, bno);
+ *blockp = cpu_to_be32(bno);
TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
xfs_trans_log_buf(tp, agflbp,
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 7446556e8021..74cadf95d4e8 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -92,6 +92,7 @@ xfs_alloc_delrec(
xfs_alloc_key_t *rkp; /* right block key pointer */
xfs_alloc_ptr_t *rpp; /* right block address pointer */
int rrecs=0; /* number of records in right block */
+ int numrecs;
xfs_alloc_rec_t *rrp; /* right block record pointer */
xfs_btree_cur_t *tcur; /* temporary btree cursor */
@@ -115,7 +116,8 @@ xfs_alloc_delrec(
/*
* Fail if we're off the end of the block.
*/
- if (ptr > be16_to_cpu(block->bb_numrecs)) {
+ numrecs = be16_to_cpu(block->bb_numrecs);
+ if (ptr > numrecs) {
*stat = 0;
return 0;
}
@@ -129,18 +131,18 @@ xfs_alloc_delrec(
lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
#ifdef DEBUG
- for (i = ptr; i < be16_to_cpu(block->bb_numrecs); i++) {
+ for (i = ptr; i < numrecs; i++) {
if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
return error;
}
#endif
- if (ptr < be16_to_cpu(block->bb_numrecs)) {
+ if (ptr < numrecs) {
memmove(&lkp[ptr - 1], &lkp[ptr],
- (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lkp));
+ (numrecs - ptr) * sizeof(*lkp));
memmove(&lpp[ptr - 1], &lpp[ptr],
- (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lpp));
- xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
- xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
+ (numrecs - ptr) * sizeof(*lpp));
+ xfs_alloc_log_ptrs(cur, bp, ptr, numrecs - 1);
+ xfs_alloc_log_keys(cur, bp, ptr, numrecs - 1);
}
}
/*
@@ -149,10 +151,10 @@ xfs_alloc_delrec(
*/
else {
lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
- if (ptr < be16_to_cpu(block->bb_numrecs)) {
+ if (ptr < numrecs) {
memmove(&lrp[ptr - 1], &lrp[ptr],
- (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lrp));
- xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
+ (numrecs - ptr) * sizeof(*lrp));
+ xfs_alloc_log_recs(cur, bp, ptr, numrecs - 1);
}
/*
* If it's the first record in the block, we'll need a key
@@ -167,7 +169,8 @@ xfs_alloc_delrec(
/*
* Decrement and log the number of entries in the block.
*/
- be16_add(&block->bb_numrecs, -1);
+ numrecs--;
+ block->bb_numrecs = cpu_to_be16(numrecs);
xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
/*
* See if the longest free extent in the allocation group was
@@ -181,14 +184,14 @@ xfs_alloc_delrec(
if (level == 0 &&
cur->bc_btnum == XFS_BTNUM_CNT &&
be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
- ptr > be16_to_cpu(block->bb_numrecs)) {
- ASSERT(ptr == be16_to_cpu(block->bb_numrecs) + 1);
+ ptr > numrecs) {
+ ASSERT(ptr == numrecs + 1);
/*
* There are still records in the block. Grab the size
* from the last one.
*/
- if (be16_to_cpu(block->bb_numrecs)) {
- rrp = XFS_ALLOC_REC_ADDR(block, be16_to_cpu(block->bb_numrecs), cur);
+ if (numrecs) {
+ rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur);
agf->agf_longest = rrp->ar_blockcount;
}
/*
@@ -211,7 +214,7 @@ xfs_alloc_delrec(
* and it's NOT the leaf level,
* then we can get rid of this level.
*/
- if (be16_to_cpu(block->bb_numrecs) == 1 && level > 0) {
+ if (numrecs == 1 && level > 0) {
/*
* lpp is still set to the first pointer in the block.
* Make it the new root of the btree.
@@ -267,7 +270,7 @@ xfs_alloc_delrec(
* If the number of records remaining in the block is at least
* the minimum, we're done.
*/
- if (be16_to_cpu(block->bb_numrecs) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
+ if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
return error;
*stat = 1;
@@ -419,19 +422,21 @@ xfs_alloc_delrec(
* See if we can join with the left neighbor block.
*/
if (lbno != NULLAGBLOCK &&
- lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+ lrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
/*
* Set "right" to be the starting block,
* "left" to be the left neighbor.
*/
rbno = bno;
right = block;
+ rrecs = be16_to_cpu(right->bb_numrecs);
rbp = bp;
if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
cur->bc_private.a.agno, lbno, 0, &lbp,
XFS_ALLOC_BTREE_REF)))
return error;
left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
+ lrecs = be16_to_cpu(left->bb_numrecs);
if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
return error;
}
@@ -439,20 +444,21 @@ xfs_alloc_delrec(
* If that won't work, see if we can join with the right neighbor block.
*/
else if (rbno != NULLAGBLOCK &&
- rrecs + be16_to_cpu(block->bb_numrecs) <=
- XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+ rrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
/*
* Set "left" to be the starting block,
* "right" to be the right neighbor.
*/
lbno = bno;
left = block;
+ lrecs = be16_to_cpu(left->bb_numrecs);
lbp = bp;
if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
cur->bc_private.a.agno, rbno, 0, &rbp,
XFS_ALLOC_BTREE_REF)))
return error;
right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
+ rrecs = be16_to_cpu(right->bb_numrecs);
if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
return error;
}
@@ -474,34 +480,28 @@ xfs_alloc_delrec(
/*
* It's a non-leaf. Move keys and pointers.
*/
- lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
- lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
+ lkp = XFS_ALLOC_KEY_ADDR(left, lrecs + 1, cur);
+ lpp = XFS_ALLOC_PTR_ADDR(left, lrecs + 1, cur);
rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
#ifdef DEBUG
- for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
+ for (i = 0; i < rrecs; i++) {
if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
return error;
}
#endif
- memcpy(lkp, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*lkp));
- memcpy(lpp, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*lpp));
- xfs_alloc_log_keys(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
- be16_to_cpu(left->bb_numrecs) +
- be16_to_cpu(right->bb_numrecs));
- xfs_alloc_log_ptrs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
- be16_to_cpu(left->bb_numrecs) +
- be16_to_cpu(right->bb_numrecs));
+ memcpy(lkp, rkp, rrecs * sizeof(*lkp));
+ memcpy(lpp, rpp, rrecs * sizeof(*lpp));
+ xfs_alloc_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
+ xfs_alloc_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
} else {
/*
* It's a leaf. Move records.
*/
- lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
+ lrp = XFS_ALLOC_REC_ADDR(left, lrecs + 1, cur);
rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
- memcpy(lrp, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*lrp));
- xfs_alloc_log_recs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
- be16_to_cpu(left->bb_numrecs) +
- be16_to_cpu(right->bb_numrecs));
+ memcpy(lrp, rrp, rrecs * sizeof(*lrp));
+ xfs_alloc_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
}
/*
* If we joined with the left neighbor, set the buffer in the
@@ -509,7 +509,7 @@ xfs_alloc_delrec(
*/
if (bp != lbp) {
xfs_btree_setbuf(cur, level, lbp);
- cur->bc_ptrs[level] += be16_to_cpu(left->bb_numrecs);
+ cur->bc_ptrs[level] += lrecs;
}
/*
* If we joined with the right neighbor and there's a level above
@@ -521,7 +521,8 @@ xfs_alloc_delrec(
/*
* Fix up the number of records in the surviving block.
*/
- be16_add(&left->bb_numrecs, be16_to_cpu(right->bb_numrecs));
+ lrecs += rrecs;
+ left->bb_numrecs = cpu_to_be16(lrecs);
/*
* Fix up the right block pointer in the surviving block, and log it.
*/
@@ -608,6 +609,7 @@ xfs_alloc_insrec(
xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
xfs_alloc_key_t nkey; /* new key value, from split */
xfs_alloc_rec_t nrec; /* new record value, for caller */
+ int numrecs;
int optr; /* old ptr value */
xfs_alloc_ptr_t *pp; /* pointer to btree addresses */
int ptr; /* index in btree block for this rec */
@@ -653,13 +655,14 @@ xfs_alloc_insrec(
*/
bp = cur->bc_bufs[level];
block = XFS_BUF_TO_ALLOC_BLOCK(bp);
+ numrecs = be16_to_cpu(block->bb_numrecs);
#ifdef DEBUG
if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
return error;
/*
* Check that the new entry is being inserted in the right place.
*/
- if (ptr <= be16_to_cpu(block->bb_numrecs)) {
+ if (ptr <= numrecs) {
if (level == 0) {
rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
xfs_btree_check_rec(cur->bc_btnum, recp, rp);
@@ -670,12 +673,12 @@ xfs_alloc_insrec(
}
#endif
nbno = NULLAGBLOCK;
- ncur = (xfs_btree_cur_t *)0;
+ ncur = NULL;
/*
* If the block is full, we can't insert the new entry until we
* make the block un-full.
*/
- if (be16_to_cpu(block->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
+ if (numrecs == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
/*
* First, try shifting an entry to the right neighbor.
*/
@@ -729,6 +732,7 @@ xfs_alloc_insrec(
* At this point we know there's room for our new entry in the block
* we're pointing at.
*/
+ numrecs = be16_to_cpu(block->bb_numrecs);
if (level > 0) {
/*
* It's a non-leaf entry. Make a hole for the new data
@@ -737,15 +741,15 @@ xfs_alloc_insrec(
kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
#ifdef DEBUG
- for (i = be16_to_cpu(block->bb_numrecs); i >= ptr; i--) {
+ for (i = numrecs; i >= ptr; i--) {
if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
return error;
}
#endif
memmove(&kp[ptr], &kp[ptr - 1],
- (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*kp));
+ (numrecs - ptr + 1) * sizeof(*kp));
memmove(&pp[ptr], &pp[ptr - 1],
- (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*pp));
+ (numrecs - ptr + 1) * sizeof(*pp));
#ifdef DEBUG
if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
return error;
@@ -755,11 +759,12 @@ xfs_alloc_insrec(
*/
kp[ptr - 1] = key;
pp[ptr - 1] = cpu_to_be32(*bnop);
- be16_add(&block->bb_numrecs, 1);
- xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
- xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
+ numrecs++;
+ block->bb_numrecs = cpu_to_be16(numrecs);
+ xfs_alloc_log_keys(cur, bp, ptr, numrecs);
+ xfs_alloc_log_ptrs(cur, bp, ptr, numrecs);
#ifdef DEBUG
- if (ptr < be16_to_cpu(block->bb_numrecs))
+ if (ptr < numrecs)
xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
kp + ptr);
#endif
@@ -769,16 +774,17 @@ xfs_alloc_insrec(
*/
rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
memmove(&rp[ptr], &rp[ptr - 1],
- (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*rp));
+ (numrecs - ptr + 1) * sizeof(*rp));
/*
* Now stuff the new record in, bump numrecs
* and log the new data.
*/
- rp[ptr - 1] = *recp; /* INT_: struct copy */
- be16_add(&block->bb_numrecs, 1);
- xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
+ rp[ptr - 1] = *recp;
+ numrecs++;
+ block->bb_numrecs = cpu_to_be16(numrecs);
+ xfs_alloc_log_recs(cur, bp, ptr, numrecs);
#ifdef DEBUG
- if (ptr < be16_to_cpu(block->bb_numrecs))
+ if (ptr < numrecs)
xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
rp + ptr);
#endif
@@ -819,8 +825,8 @@ xfs_alloc_insrec(
*/
*bnop = nbno;
if (nbno != NULLAGBLOCK) {
- *recp = nrec; /* INT_: struct copy */
- *curp = ncur; /* INT_: struct copy */
+ *recp = nrec;
+ *curp = ncur;
}
*stat = 1;
return 0;
@@ -981,7 +987,7 @@ xfs_alloc_lookup(
*/
bp = cur->bc_bufs[level];
if (bp && XFS_BUF_ADDR(bp) != d)
- bp = (xfs_buf_t *)0;
+ bp = NULL;
if (!bp) {
/*
* Need to get a new buffer. Read it, then
@@ -1229,7 +1235,7 @@ xfs_alloc_lshift(
if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
return error;
#endif
- *lpp = *rpp; /* INT_: copy */
+ *lpp = *rpp;
xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
}
@@ -1406,8 +1412,8 @@ xfs_alloc_newroot(
kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
if (be16_to_cpu(left->bb_level) > 0) {
- kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */
- kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */
+ kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur);
+ kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);
} else {
xfs_alloc_rec_t *rp; /* btree record pointer */
@@ -1527,8 +1533,8 @@ xfs_alloc_rshift(
if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
return error;
#endif
- *rkp = *lkp; /* INT_: copy */
- *rpp = *lpp; /* INT_: copy */
+ *rkp = *lkp;
+ *rpp = *lpp;
xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
@@ -2044,7 +2050,7 @@ xfs_alloc_insert(
nbno = NULLAGBLOCK;
nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
- ncur = (xfs_btree_cur_t *)0;
+ ncur = NULL;
pcur = cur;
/*
* Loop going up the tree, starting at the leaf level.
@@ -2076,7 +2082,7 @@ xfs_alloc_insert(
*/
if (ncur) {
pcur = ncur;
- ncur = (xfs_btree_cur_t *)0;
+ ncur = NULL;
}
} while (nbno != NULLAGBLOCK);
*stat = i;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 1a2101043275..9ada7bdbae52 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -91,7 +91,6 @@ STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
/*
* Routines to manipulate out-of-line attribute values.
*/
-STATIC int xfs_attr_rmtval_get(xfs_da_args_t *args);
STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args);
STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
@@ -180,7 +179,7 @@ xfs_attr_get(bhv_desc_t *bdp, const char *name, char *value, int *valuelenp,
return(error);
}
-STATIC int
+int
xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
char *value, int valuelen, int flags)
{
@@ -440,7 +439,7 @@ xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int f
* Generic handler routine to remove a name from an attribute list.
* Transitions attribute list from Btree to shortform as necessary.
*/
-STATIC int
+int
xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
{
xfs_da_args_t args;
@@ -591,6 +590,110 @@ xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
return xfs_attr_remove_int(dp, name, namelen, flags);
}
+int /* error */
+xfs_attr_list_int(xfs_attr_list_context_t *context)
+{
+ int error;
+ xfs_inode_t *dp = context->dp;
+
+ /*
+ * Decide on what work routines to call based on the inode size.
+ */
+ if (XFS_IFORK_Q(dp) == 0 ||
+ (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+ dp->i_d.di_anextents == 0)) {
+ error = 0;
+ } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+ error = xfs_attr_shortform_list(context);
+ } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
+ error = xfs_attr_leaf_list(context);
+ } else {
+ error = xfs_attr_node_list(context);
+ }
+ return error;
+}
+
+#define ATTR_ENTBASESIZE /* minimum bytes used by an attr */ \
+ (((struct attrlist_ent *) 0)->a_name - (char *) 0)
+#define ATTR_ENTSIZE(namelen) /* actual bytes used by an attr */ \
+ ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
+ & ~(sizeof(u_int32_t)-1))
+
+/*
+ * Format an attribute and copy it out to the user's buffer.
+ * Take care to check values and protect against them changing later,
+ * we may be reading them directly out of a user buffer.
+ */
+/*ARGSUSED*/
+STATIC int
+xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp,
+ char *name, int namelen,
+ int valuelen, char *value)
+{
+ attrlist_ent_t *aep;
+ int arraytop;
+
+ ASSERT(!(context->flags & ATTR_KERNOVAL));
+ ASSERT(context->count >= 0);
+ ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
+ ASSERT(context->firstu >= sizeof(*context->alist));
+ ASSERT(context->firstu <= context->bufsize);
+
+ arraytop = sizeof(*context->alist) +
+ context->count * sizeof(context->alist->al_offset[0]);
+ context->firstu -= ATTR_ENTSIZE(namelen);
+ if (context->firstu < arraytop) {
+ xfs_attr_trace_l_c("buffer full", context);
+ context->alist->al_more = 1;
+ context->seen_enough = 1;
+ return 1;
+ }
+
+ aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
+ aep->a_valuelen = valuelen;
+ memcpy(aep->a_name, name, namelen);
+ aep->a_name[ namelen ] = 0;
+ context->alist->al_offset[ context->count++ ] = context->firstu;
+ context->alist->al_count = context->count;
+ xfs_attr_trace_l_c("add", context);
+ return 0;
+}
+
+STATIC int
+xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
+ char *name, int namelen,
+ int valuelen, char *value)
+{
+ char *offset;
+ int arraytop;
+
+ ASSERT(context->count >= 0);
+
+ arraytop = context->count + namesp->attr_namelen + namelen + 1;
+ if (arraytop > context->firstu) {
+ context->count = -1; /* insufficient space */
+ return 1;
+ }
+ offset = (char *)context->alist + context->count;
+ strncpy(offset, namesp->attr_name, namesp->attr_namelen);
+ offset += namesp->attr_namelen;
+ strncpy(offset, name, namelen); /* real name */
+ offset += namelen;
+ *offset = '\0';
+ context->count += namesp->attr_namelen + namelen + 1;
+ return 0;
+}
+
+/*ARGSUSED*/
+STATIC int
+xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
+ char *name, int namelen,
+ int valuelen, char *value)
+{
+ context->count += namesp->attr_namelen + namelen + 1;
+ return 0;
+}
+
/*
* Generate a list of extended attribute names and optionally
* also value lengths. Positive return value follows the XFS
@@ -615,13 +718,13 @@ xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags,
return(XFS_ERROR(EINVAL));
if ((cursor->initted == 0) &&
(cursor->hashval || cursor->blkno || cursor->offset))
- return(XFS_ERROR(EINVAL));
+ return XFS_ERROR(EINVAL);
/*
* Check for a properly aligned buffer.
*/
if (((long)buffer) & (sizeof(int)-1))
- return(XFS_ERROR(EFAULT));
+ return XFS_ERROR(EFAULT);
if (flags & ATTR_KERNOVAL)
bufsize = 0;
@@ -634,53 +737,47 @@ xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags,
context.dupcnt = 0;
context.resynch = 1;
context.flags = flags;
- if (!(flags & ATTR_KERNAMELS)) {
+ context.seen_enough = 0;
+ context.alist = (attrlist_t *)buffer;
+ context.put_value = 0;
+
+ if (flags & ATTR_KERNAMELS) {
+ context.bufsize = bufsize;
+ context.firstu = context.bufsize;
+ if (flags & ATTR_KERNOVAL)
+ context.put_listent = xfs_attr_kern_list_sizes;
+ else
+ context.put_listent = xfs_attr_kern_list;
+ } else {
context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
context.firstu = context.bufsize;
- context.alist = (attrlist_t *)buffer;
context.alist->al_count = 0;
context.alist->al_more = 0;
context.alist->al_offset[0] = context.bufsize;
- }
- else {
- context.bufsize = bufsize;
- context.firstu = context.bufsize;
- context.alist = (attrlist_t *)buffer;
+ context.put_listent = xfs_attr_put_listent;
}
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
- return (EIO);
+ return EIO;
xfs_ilock(dp, XFS_ILOCK_SHARED);
- /*
- * Decide on what work routines to call based on the inode size.
- */
xfs_attr_trace_l_c("syscall start", &context);
- if (XFS_IFORK_Q(dp) == 0 ||
- (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
- dp->i_d.di_anextents == 0)) {
- error = 0;
- } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
- error = xfs_attr_shortform_list(&context);
- } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
- error = xfs_attr_leaf_list(&context);
- } else {
- error = xfs_attr_node_list(&context);
- }
+
+ error = xfs_attr_list_int(&context);
+
xfs_iunlock(dp, XFS_ILOCK_SHARED);
xfs_attr_trace_l_c("syscall end", &context);
- if (!(context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS))) {
- ASSERT(error >= 0);
- }
- else { /* must return negated buffer size or the error */
+ if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
+ /* must return negated buffer size or the error */
if (context.count < 0)
error = XFS_ERROR(ERANGE);
else
error = -context.count;
- }
+ } else
+ ASSERT(error >= 0);
- return(error);
+ return error;
}
int /* error */
@@ -1122,19 +1219,19 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
context->cursor->blkno = 0;
error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK);
if (error)
- return(error);
+ return XFS_ERROR(error);
ASSERT(bp != NULL);
leaf = bp->data;
if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
context->dp->i_mount, leaf);
xfs_da_brelse(NULL, bp);
- return(XFS_ERROR(EFSCORRUPTED));
+ return XFS_ERROR(EFSCORRUPTED);
}
- (void)xfs_attr_leaf_list_int(bp, context);
+ error = xfs_attr_leaf_list_int(bp, context);
xfs_da_brelse(NULL, bp);
- return(0);
+ return XFS_ERROR(error);
}
@@ -1858,8 +1955,12 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
return(XFS_ERROR(EFSCORRUPTED));
}
error = xfs_attr_leaf_list_int(bp, context);
- if (error || !leaf->hdr.info.forw)
- break; /* not really an error, buffer full or EOF */
+ if (error) {
+ xfs_da_brelse(NULL, bp);
+ return error;
+ }
+ if (context->seen_enough || leaf->hdr.info.forw == 0)
+ break;
cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
xfs_da_brelse(NULL, bp);
error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
@@ -1886,7 +1987,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
* Read the value associated with an attribute from the out-of-line buffer
* that we stored it in.
*/
-STATIC int
+int
xfs_attr_rmtval_get(xfs_da_args_t *args)
{
xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 981633f6c077..783977d3ea71 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -37,6 +37,7 @@
struct cred;
struct bhv_vnode;
+struct xfs_attr_list_context;
typedef int (*attrset_t)(struct bhv_vnode *, char *, void *, size_t, int);
typedef int (*attrget_t)(struct bhv_vnode *, char *, void *, size_t, int);
@@ -160,13 +161,16 @@ struct xfs_da_args;
*/
int xfs_attr_get(bhv_desc_t *, const char *, char *, int *, int, struct cred *);
int xfs_attr_set(bhv_desc_t *, const char *, char *, int, int, struct cred *);
+int xfs_attr_set_int(struct xfs_inode *, const char *, int, char *, int, int);
int xfs_attr_remove(bhv_desc_t *, const char *, int, struct cred *);
-int xfs_attr_list(bhv_desc_t *, char *, int, int,
- struct attrlist_cursor_kern *, struct cred *);
+int xfs_attr_remove_int(struct xfs_inode *, const char *, int, int);
+int xfs_attr_list(bhv_desc_t *, char *, int, int, struct attrlist_cursor_kern *, struct cred *);
+int xfs_attr_list_int(struct xfs_attr_list_context *);
int xfs_attr_inactive(struct xfs_inode *dp);
int xfs_attr_shortform_getvalue(struct xfs_da_args *);
int xfs_attr_fetch(struct xfs_inode *, const char *, int,
char *, int *, int, struct cred *);
+int xfs_attr_rmtval_get(struct xfs_da_args *args);
#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 9455051f0120..9719bbef122c 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -89,9 +89,46 @@ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf,
int dst_start, int move_count,
xfs_mount_t *mp);
STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
-STATIC int xfs_attr_put_listent(xfs_attr_list_context_t *context,
- attrnames_t *, char *name, int namelen,
- int valuelen);
+
+/*========================================================================
+ * Namespace helper routines
+ *========================================================================*/
+
+STATIC inline attrnames_t *
+xfs_attr_flags_namesp(int flags)
+{
+ return ((flags & XFS_ATTR_SECURE) ? &attr_secure:
+ ((flags & XFS_ATTR_ROOT) ? &attr_trusted : &attr_user));
+}
+
+/*
+ * If namespace bits don't match return 0.
+ * If all match then return 1.
+ */
+STATIC inline int
+xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
+{
+ return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
+}
+
+/*
+ * If namespace bits don't match and we don't have an override for it
+ * then return 0.
+ * If all match or are overridable then return 1.
+ */
+STATIC inline int
+xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags)
+{
+ if (((arg_flags & ATTR_SECURE) == 0) !=
+ ((ondisk_flags & XFS_ATTR_SECURE) == 0) &&
+ !(arg_flags & ATTR_KERNORMALS))
+ return 0;
+ if (((arg_flags & ATTR_ROOT) == 0) !=
+ ((ondisk_flags & XFS_ATTR_ROOT) == 0) &&
+ !(arg_flags & ATTR_KERNROOTLS))
+ return 0;
+ return 1;
+}
/*========================================================================
@@ -228,11 +265,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
continue;
if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
continue;
- if (((args->flags & ATTR_SECURE) != 0) !=
- ((sfe->flags & XFS_ATTR_SECURE) != 0))
- continue;
- if (((args->flags & ATTR_ROOT) != 0) !=
- ((sfe->flags & XFS_ATTR_ROOT) != 0))
+ if (!xfs_attr_namesp_match(args->flags, sfe->flags))
continue;
ASSERT(0);
#endif
@@ -246,8 +279,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
sfe->namelen = args->namelen;
sfe->valuelen = args->valuelen;
- sfe->flags = (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE :
- ((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0);
+ sfe->flags = XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
memcpy(sfe->nameval, args->name, args->namelen);
memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen);
sf->hdr.count++;
@@ -282,11 +314,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
continue;
if (memcmp(sfe->nameval, args->name, args->namelen) != 0)
continue;
- if (((args->flags & ATTR_SECURE) != 0) !=
- ((sfe->flags & XFS_ATTR_SECURE) != 0))
- continue;
- if (((args->flags & ATTR_ROOT) != 0) !=
- ((sfe->flags & XFS_ATTR_ROOT) != 0))
+ if (!xfs_attr_namesp_match(args->flags, sfe->flags))
continue;
break;
}
@@ -363,11 +391,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
continue;
if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
continue;
- if (((args->flags & ATTR_SECURE) != 0) !=
- ((sfe->flags & XFS_ATTR_SECURE) != 0))
- continue;
- if (((args->flags & ATTR_ROOT) != 0) !=
- ((sfe->flags & XFS_ATTR_ROOT) != 0))
+ if (!xfs_attr_namesp_match(args->flags, sfe->flags))
continue;
return(XFS_ERROR(EEXIST));
}
@@ -394,11 +418,7 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
continue;
if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
continue;
- if (((args->flags & ATTR_SECURE) != 0) !=
- ((sfe->flags & XFS_ATTR_SECURE) != 0))
- continue;
- if (((args->flags & ATTR_ROOT) != 0) !=
- ((sfe->flags & XFS_ATTR_ROOT) != 0))
+ if (!xfs_attr_namesp_match(args->flags, sfe->flags))
continue;
if (args->flags & ATTR_KERNOVAL) {
args->valuelen = sfe->valuelen;
@@ -485,8 +505,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
nargs.valuelen = sfe->valuelen;
nargs.hashval = xfs_da_hashname((char *)sfe->nameval,
sfe->namelen);
- nargs.flags = (sfe->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
- ((sfe->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
+ nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */
ASSERT(error == ENOATTR);
error = xfs_attr_leaf_add(bp, &nargs);
@@ -520,6 +539,10 @@ xfs_attr_shortform_compare(const void *a, const void *b)
}
}
+
+#define XFS_ISRESET_CURSOR(cursor) \
+ (!((cursor)->initted) && !((cursor)->hashval) && \
+ !((cursor)->blkno) && !((cursor)->offset))
/*
* Copy out entries of shortform attribute lists for attr_list().
* Shortform attribute lists are not stored in hashval sorted order.
@@ -537,6 +560,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
xfs_attr_sf_entry_t *sfe;
xfs_inode_t *dp;
int sbsize, nsbuf, count, i;
+ int error;
ASSERT(context != NULL);
dp = context->dp;
@@ -552,46 +576,51 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
xfs_attr_trace_l_c("sf start", context);
/*
- * If the buffer is large enough, do not bother with sorting.
+ * If the buffer is large enough and the cursor is at the start,
+ * do not bother with sorting since we will return everything in
+ * one buffer and another call using the cursor won't need to be
+ * made.
* Note the generous fudge factor of 16 overhead bytes per entry.
+ * If bufsize is zero then put_listent must be a search function
+ * and can just scan through what we have.
*/
- if ((dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize) {
+ if (context->bufsize == 0 ||
+ (XFS_ISRESET_CURSOR(cursor) &&
+ (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
attrnames_t *namesp;
- if (((context->flags & ATTR_SECURE) != 0) !=
- ((sfe->flags & XFS_ATTR_SECURE) != 0) &&
- !(context->flags & ATTR_KERNORMALS)) {
- sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
- continue;
- }
- if (((context->flags & ATTR_ROOT) != 0) !=
- ((sfe->flags & XFS_ATTR_ROOT) != 0) &&
- !(context->flags & ATTR_KERNROOTLS)) {
+ if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
continue;
}
- namesp = (sfe->flags & XFS_ATTR_SECURE) ? &attr_secure:
- ((sfe->flags & XFS_ATTR_ROOT) ? &attr_trusted :
- &attr_user);
- if (context->flags & ATTR_KERNOVAL) {
- ASSERT(context->flags & ATTR_KERNAMELS);
- context->count += namesp->attr_namelen +
- sfe->namelen + 1;
- }
- else {
- if (xfs_attr_put_listent(context, namesp,
- (char *)sfe->nameval,
- (int)sfe->namelen,
- (int)sfe->valuelen))
- break;
- }
+ namesp = xfs_attr_flags_namesp(sfe->flags);
+ error = context->put_listent(context,
+ namesp,
+ (char *)sfe->nameval,
+ (int)sfe->namelen,
+ (int)sfe->valuelen,
+ (char*)&sfe->nameval[sfe->namelen]);
+
+ /*
+ * Either search callback finished early or
+ * didn't fit it all in the buffer after all.
+ */
+ if (context->seen_enough)
+ break;
+
+ if (error)
+ return error;
sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
}
xfs_attr_trace_l_c("sf big-gulp", context);
return(0);
}
+ /* do no more for a search callback */
+ if (context->bufsize == 0)
+ return 0;
+
/*
* It didn't all fit, so we have to sort everything on hashval.
*/
@@ -614,15 +643,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
kmem_free(sbuf, sbsize);
return XFS_ERROR(EFSCORRUPTED);
}
- if (((context->flags & ATTR_SECURE) != 0) !=
- ((sfe->flags & XFS_ATTR_SECURE) != 0) &&
- !(context->flags & ATTR_KERNORMALS)) {
- sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
- continue;
- }
- if (((context->flags & ATTR_ROOT) != 0) !=
- ((sfe->flags & XFS_ATTR_ROOT) != 0) &&
- !(context->flags & ATTR_KERNROOTLS)) {
+ if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
continue;
}
@@ -671,24 +692,22 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
for ( ; i < nsbuf; i++, sbp++) {
attrnames_t *namesp;
- namesp = (sbp->flags & XFS_ATTR_SECURE) ? &attr_secure :
- ((sbp->flags & XFS_ATTR_ROOT) ? &attr_trusted :
- &attr_user);
+ namesp = xfs_attr_flags_namesp(sbp->flags);
if (cursor->hashval != sbp->hash) {
cursor->hashval = sbp->hash;
cursor->offset = 0;
}
- if (context->flags & ATTR_KERNOVAL) {
- ASSERT(context->flags & ATTR_KERNAMELS);
- context->count += namesp->attr_namelen +
- sbp->namelen + 1;
- } else {
- if (xfs_attr_put_listent(context, namesp,
- sbp->name, sbp->namelen,
- sbp->valuelen))
- break;
- }
+ error = context->put_listent(context,
+ namesp,
+ sbp->name,
+ sbp->namelen,
+ sbp->valuelen,
+ &sbp->name[sbp->namelen]);
+ if (error)
+ return error;
+ if (context->seen_enough)
+ break;
cursor->offset++;
}
@@ -810,8 +829,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
nargs.value = (char *)&name_loc->nameval[nargs.namelen];
nargs.valuelen = be16_to_cpu(name_loc->valuelen);
nargs.hashval = be32_to_cpu(entry->hashval);
- nargs.flags = (entry->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
- ((entry->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
+ nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags);
xfs_attr_shortform_add(&nargs, forkoff);
}
error = 0;
@@ -1098,8 +1116,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
be16_to_cpu(map->size));
entry->hashval = cpu_to_be32(args->hashval);
entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
- entry->flags |= (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE :
- ((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0);
+ entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
if (args->rename) {
entry->flags |= XFS_ATTR_INCOMPLETE;
if ((args->blkno2 == args->blkno) &&
@@ -1926,7 +1943,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
else
break;
}
- ASSERT((probe >= 0) &&
+ ASSERT((probe >= 0) &&
(!leaf->hdr.count
|| (probe < be16_to_cpu(leaf->hdr.count))));
ASSERT((span <= 4) || (be32_to_cpu(entry->hashval) == hashval));
@@ -1971,14 +1988,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, probe);
if (name_loc->namelen != args->namelen)
continue;
- if (memcmp(args->name, (char *)name_loc->nameval,
- args->namelen) != 0)
+ if (memcmp(args->name, (char *)name_loc->nameval, args->namelen) != 0)
continue;
- if (((args->flags & ATTR_SECURE) != 0) !=
- ((entry->flags & XFS_ATTR_SECURE) != 0))
- continue;
- if (((args->flags & ATTR_ROOT) != 0) !=
- ((entry->flags & XFS_ATTR_ROOT) != 0))
+ if (!xfs_attr_namesp_match(args->flags, entry->flags))
continue;
args->index = probe;
return(XFS_ERROR(EEXIST));
@@ -1989,11 +2001,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
if (memcmp(args->name, (char *)name_rmt->name,
args->namelen) != 0)
continue;
- if (((args->flags & ATTR_SECURE) != 0) !=
- ((entry->flags & XFS_ATTR_SECURE) != 0))
- continue;
- if (((args->flags & ATTR_ROOT) != 0) !=
- ((entry->flags & XFS_ATTR_ROOT) != 0))
+ if (!xfs_attr_namesp_match(args->flags, entry->flags))
continue;
args->index = probe;
args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
@@ -2312,8 +2320,6 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
attrlist_cursor_kern_t *cursor;
xfs_attr_leafblock_t *leaf;
xfs_attr_leaf_entry_t *entry;
- xfs_attr_leaf_name_local_t *name_loc;
- xfs_attr_leaf_name_remote_t *name_rmt;
int retval, i;
ASSERT(bp != NULL);
@@ -2355,9 +2361,8 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
* We have found our place, start copying out the new attributes.
*/
retval = 0;
- for ( ; (i < be16_to_cpu(leaf->hdr.count))
- && (retval == 0); entry++, i++) {
- attrnames_t *namesp;
+ for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) {
+ attrnames_t *namesp;
if (be32_to_cpu(entry->hashval) != cursor->hashval) {
cursor->hashval = be32_to_cpu(entry->hashval);
@@ -2366,115 +2371,69 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
if (entry->flags & XFS_ATTR_INCOMPLETE)
continue; /* skip incomplete entries */
- if (((context->flags & ATTR_SECURE) != 0) !=
- ((entry->flags & XFS_ATTR_SECURE) != 0) &&
- !(context->flags & ATTR_KERNORMALS))
- continue; /* skip non-matching entries */
- if (((context->flags & ATTR_ROOT) != 0) !=
- ((entry->flags & XFS_ATTR_ROOT) != 0) &&
- !(context->flags & ATTR_KERNROOTLS))
- continue; /* skip non-matching entries */
-
- namesp = (entry->flags & XFS_ATTR_SECURE) ? &attr_secure :
- ((entry->flags & XFS_ATTR_ROOT) ? &attr_trusted :
- &attr_user);
+ if (!xfs_attr_namesp_match_overrides(context->flags, entry->flags))
+ continue;
+
+ namesp = xfs_attr_flags_namesp(entry->flags);
if (entry->flags & XFS_ATTR_LOCAL) {
- name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
- if (context->flags & ATTR_KERNOVAL) {
- ASSERT(context->flags & ATTR_KERNAMELS);
- context->count += namesp->attr_namelen +
- (int)name_loc->namelen + 1;
- } else {
- retval = xfs_attr_put_listent(context, namesp,
- (char *)name_loc->nameval,
- (int)name_loc->namelen,
- be16_to_cpu(name_loc->valuelen));
- }
+ xfs_attr_leaf_name_local_t *name_loc =
+ XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
+
+ retval = context->put_listent(context,
+ namesp,
+ (char *)name_loc->nameval,
+ (int)name_loc->namelen,
+ be16_to_cpu(name_loc->valuelen),
+ (char *)&name_loc->nameval[name_loc->namelen]);
+ if (retval)
+ return retval;
} else {
- name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
- if (context->flags & ATTR_KERNOVAL) {
- ASSERT(context->flags & ATTR_KERNAMELS);
- context->count += namesp->attr_namelen +
- (int)name_rmt->namelen + 1;
- } else {
- retval = xfs_attr_put_listent(context, namesp,
- (char *)name_rmt->name,
- (int)name_rmt->namelen,
- be32_to_cpu(name_rmt->valuelen));
+ xfs_attr_leaf_name_remote_t *name_rmt =
+ XFS_ATTR_LEAF_NAME_REMOTE(leaf, i);
+
+ int valuelen = be32_to_cpu(name_rmt->valuelen);
+
+ if (context->put_value) {
+ xfs_da_args_t args;
+
+ memset((char *)&args, 0, sizeof(args));
+ args.dp = context->dp;
+ args.whichfork = XFS_ATTR_FORK;
+ args.valuelen = valuelen;
+ args.value = kmem_alloc(valuelen, KM_SLEEP);
+ args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
+ args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen);
+ retval = xfs_attr_rmtval_get(&args);
+ if (retval)
+ return retval;
+ retval = context->put_listent(context,
+ namesp,
+ (char *)name_rmt->name,
+ (int)name_rmt->namelen,
+ valuelen,
+ (char*)args.value);
+ kmem_free(args.value, valuelen);
}
+ else {
+ retval = context->put_listent(context,
+ namesp,
+ (char *)name_rmt->name,
+ (int)name_rmt->namelen,
+ valuelen,
+ NULL);
+ }
+ if (retval)
+ return retval;
}
- if (retval == 0) {
- cursor->offset++;
- }
+ if (context->seen_enough)
+ break;
+ cursor->offset++;
}
xfs_attr_trace_l_cl("blk end", context, leaf);
return(retval);
}
-#define ATTR_ENTBASESIZE /* minimum bytes used by an attr */ \
- (((struct attrlist_ent *) 0)->a_name - (char *) 0)
-#define ATTR_ENTSIZE(namelen) /* actual bytes used by an attr */ \
- ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
- & ~(sizeof(u_int32_t)-1))
-
-/*
- * Format an attribute and copy it out to the user's buffer.
- * Take care to check values and protect against them changing later,
- * we may be reading them directly out of a user buffer.
- */
-/*ARGSUSED*/
-STATIC int
-xfs_attr_put_listent(xfs_attr_list_context_t *context,
- attrnames_t *namesp, char *name, int namelen, int valuelen)
-{
- attrlist_ent_t *aep;
- int arraytop;
-
- ASSERT(!(context->flags & ATTR_KERNOVAL));
- if (context->flags & ATTR_KERNAMELS) {
- char *offset;
-
- ASSERT(context->count >= 0);
-
- arraytop = context->count + namesp->attr_namelen + namelen + 1;
- if (arraytop > context->firstu) {
- context->count = -1; /* insufficient space */
- return(1);
- }
- offset = (char *)context->alist + context->count;
- strncpy(offset, namesp->attr_name, namesp->attr_namelen);
- offset += namesp->attr_namelen;
- strncpy(offset, name, namelen); /* real name */
- offset += namelen;
- *offset = '\0';
- context->count += namesp->attr_namelen + namelen + 1;
- return(0);
- }
-
- ASSERT(context->count >= 0);
- ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
- ASSERT(context->firstu >= sizeof(*context->alist));
- ASSERT(context->firstu <= context->bufsize);
-
- arraytop = sizeof(*context->alist) +
- context->count * sizeof(context->alist->al_offset[0]);
- context->firstu -= ATTR_ENTSIZE(namelen);
- if (context->firstu < arraytop) {
- xfs_attr_trace_l_c("buffer full", context);
- context->alist->al_more = 1;
- return(1);
- }
-
- aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
- aep->a_valuelen = valuelen;
- memcpy(aep->a_name, name, namelen);
- aep->a_name[ namelen ] = 0;
- context->alist->al_offset[ context->count++ ] = context->firstu;
- context->alist->al_count = context->count;
- xfs_attr_trace_l_c("add", context);
- return(0);
-}
/*========================================================================
* Manage the INCOMPLETE flag in a leaf entry
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 51c3ee156b2f..040f732ce1e2 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -130,6 +130,19 @@ typedef struct xfs_attr_leafblock {
#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT)
/*
+ * Conversion macros for converting namespace bits from argument flags
+ * to ondisk flags.
+ */
+#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE)
+#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
+#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK)
+#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK)
+#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\
+ ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0))
+#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\
+ ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0))
+
+/*
* Alignment for namelist and valuelist entries (since they are mixed
* there can be only one alignment value)
*/
@@ -196,16 +209,26 @@ static inline int xfs_attr_leaf_entsize_local_max(int bsize)
* Structure used to pass context around among the routines.
*========================================================================*/
+
+struct xfs_attr_list_context;
+
+typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, struct attrnames *,
+ char *, int, int, char *);
+
typedef struct xfs_attr_list_context {
- struct xfs_inode *dp; /* inode */
- struct attrlist_cursor_kern *cursor;/* position in list */
- struct attrlist *alist; /* output buffer */
- int count; /* num used entries */
- int dupcnt; /* count dup hashvals seen */
- int bufsize;/* total buffer size */
- int firstu; /* first used byte in buffer */
- int flags; /* from VOP call */
- int resynch;/* T/F: resynch with cursor */
+ struct xfs_inode *dp; /* inode */
+ struct attrlist_cursor_kern *cursor; /* position in list */
+ struct attrlist *alist; /* output buffer */
+ int seen_enough; /* T/F: seen enough of list? */
+ int count; /* num used entries */
+ int dupcnt; /* count dup hashvals seen */
+ int bufsize; /* total buffer size */
+ int firstu; /* first used byte in buffer */
+ int flags; /* from VOP call */
+ int resynch; /* T/F: resynch with cursor */
+ int put_value; /* T/F: need value for listent */
+ put_listent_func_t put_listent; /* list output fmt function */
+ int index; /* index into output buffer */
} xfs_attr_list_context_t;
/*
diff --git a/fs/xfs/xfs_behavior.c b/fs/xfs/xfs_behavior.c
index f4fe3715a803..0dc17219d412 100644
--- a/fs/xfs/xfs_behavior.c
+++ b/fs/xfs/xfs_behavior.c
@@ -110,26 +110,6 @@ bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp)
}
/*
- * Look for a specific ops vector on the specified behavior chain.
- * Return the associated behavior descriptor. Or NULL, if not found.
- */
-bhv_desc_t *
-bhv_lookup(bhv_head_t *bhp, void *ops)
-{
- bhv_desc_t *curdesc;
-
- for (curdesc = bhp->bh_first;
- curdesc != NULL;
- curdesc = curdesc->bd_next) {
-
- if (curdesc->bd_ops == ops)
- return curdesc;
- }
-
- return NULL;
-}
-
-/*
* Looks for the first behavior within a specified range of positions.
* Return the associated behavior descriptor. Or NULL, if none found.
*/
diff --git a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h
index 6e6e56fb352d..e7ca1fed955a 100644
--- a/fs/xfs/xfs_behavior.h
+++ b/fs/xfs/xfs_behavior.h
@@ -176,12 +176,10 @@ extern void bhv_insert_initial(bhv_head_t *, bhv_desc_t *);
* Behavior module prototypes.
*/
extern void bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp);
-extern bhv_desc_t * bhv_lookup(bhv_head_t *bhp, void *ops);
extern bhv_desc_t * bhv_lookup_range(bhv_head_t *bhp, int low, int high);
extern bhv_desc_t * bhv_base(bhv_head_t *bhp);
/* No bhv locking on Linux */
-#define bhv_lookup_unlocked bhv_lookup
#define bhv_base_unlocked bhv_base
#endif /* __XFS_BEHAVIOR_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index bf46fae303af..5b050c06795f 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2999,7 +2999,7 @@ xfs_bmap_btree_to_extents(
int error; /* error return value */
xfs_ifork_t *ifp; /* inode fork data */
xfs_mount_t *mp; /* mount point structure */
- xfs_bmbt_ptr_t *pp; /* ptr to block address */
+ __be64 *pp; /* ptr to block address */
xfs_bmbt_block_t *rblock;/* root btree block */
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -3011,12 +3011,12 @@ xfs_bmap_btree_to_extents(
ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1);
mp = ip->i_mount;
pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes);
+ cbno = be64_to_cpu(*pp);
*logflagsp = 0;
#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), 1)))
+ if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
return error;
#endif
- cbno = INT_GET(*pp, ARCH_CONVERT);
if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
XFS_BMAP_BTREE_REF)))
return error;
@@ -3512,9 +3512,9 @@ xfs_bmap_extents_to_btree(
*/
kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
- INT_SET(kp->br_startoff, ARCH_CONVERT, xfs_bmbt_disk_get_startoff(arp));
+ kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
- INT_SET(*pp, ARCH_CONVERT, args.fsbno);
+ *pp = cpu_to_be64(args.fsbno);
/*
* Do all this logging at the end so that
* the root is at the right level.
@@ -3705,7 +3705,7 @@ STATIC xfs_bmbt_rec_t * /* pointer to found extent entry */
xfs_bmap_search_extents(
xfs_inode_t *ip, /* incore inode pointer */
xfs_fileoff_t bno, /* block number searched for */
- int whichfork, /* data or attr fork */
+ int fork, /* data or attr fork */
int *eofp, /* out: end of file found */
xfs_extnum_t *lastxp, /* out: last extent index */
xfs_bmbt_irec_t *gotp, /* out: extent entry found */
@@ -3713,25 +3713,28 @@ xfs_bmap_search_extents(
{
xfs_ifork_t *ifp; /* inode fork pointer */
xfs_bmbt_rec_t *ep; /* extent record pointer */
- int rt; /* realtime flag */
XFS_STATS_INC(xs_look_exlist);
- ifp = XFS_IFORK_PTR(ip, whichfork);
+ ifp = XFS_IFORK_PTR(ip, fork);
ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
- rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
- if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) {
- cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "
- "start_block : %llx start_off : %llx blkcnt : %llx "
- "extent-state : %x \n",
- (ip->i_mount)->m_fsname, (long long)ip->i_ino,
+ if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
+ !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
+ xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount,
+ "Access to block zero in inode %llu "
+ "start_block: %llx start_off: %llx "
+ "blkcnt: %llx extent-state: %x lastx: %x\n",
+ (unsigned long long)ip->i_ino,
(unsigned long long)gotp->br_startblock,
(unsigned long long)gotp->br_startoff,
(unsigned long long)gotp->br_blockcount,
- gotp->br_state);
- }
- return ep;
+ gotp->br_state, *lastxp);
+ *lastxp = NULLEXTNUM;
+ *eofp = 1;
+ return NULL;
+ }
+ return ep;
}
@@ -4494,7 +4497,7 @@ xfs_bmap_read_extents(
xfs_ifork_t *ifp; /* fork structure */
int level; /* btree level, for checking */
xfs_mount_t *mp; /* file system mount structure */
- xfs_bmbt_ptr_t *pp; /* pointer to block address */
+ __be64 *pp; /* pointer to block address */
/* REFERENCED */
xfs_extnum_t room; /* number of entries there's room for */
@@ -4510,10 +4513,10 @@ xfs_bmap_read_extents(
level = be16_to_cpu(block->bb_level);
ASSERT(level > 0);
pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
- ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO);
- ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount);
- ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks);
- bno = INT_GET(*pp, ARCH_CONVERT);
+ bno = be64_to_cpu(*pp);
+ ASSERT(bno != NULLDFSBNO);
+ ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+ ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
/*
* Go down the tree until leaf level is reached, following the first
* pointer (leftmost) at each level.
@@ -4530,10 +4533,8 @@ xfs_bmap_read_extents(
break;
pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
1, mp->m_bmap_dmxr[1]);
- XFS_WANT_CORRUPTED_GOTO(
- XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)),
- error0);
- bno = INT_GET(*pp, ARCH_CONVERT);
+ bno = be64_to_cpu(*pp);
+ XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
xfs_trans_brelse(tp, bp);
}
/*
@@ -6141,7 +6142,7 @@ xfs_check_block(
short sz)
{
int i, j, dmxr;
- xfs_bmbt_ptr_t *pp, *thispa; /* pointer to block address */
+ __be64 *pp, *thispa; /* pointer to block address */
xfs_bmbt_key_t *prevp, *keyp;
ASSERT(be16_to_cpu(block->bb_level) > 0);
@@ -6179,11 +6180,10 @@ xfs_check_block(
thispa = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
xfs_bmbt, block, j, dmxr);
}
- if (INT_GET(*thispa, ARCH_CONVERT) ==
- INT_GET(*pp, ARCH_CONVERT)) {
+ if (*thispa == *pp) {
cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
__FUNCTION__, j, i,
- INT_GET(*thispa, ARCH_CONVERT));
+ (unsigned long long)be64_to_cpu(*thispa));
panic("%s: ptrs are equal in node\n",
__FUNCTION__);
}
@@ -6210,7 +6210,7 @@ xfs_bmap_check_leaf_extents(
xfs_ifork_t *ifp; /* fork structure */
int level; /* btree level, for checking */
xfs_mount_t *mp; /* file system mount structure */
- xfs_bmbt_ptr_t *pp; /* pointer to block address */
+ __be64 *pp; /* pointer to block address */
xfs_bmbt_rec_t *ep; /* pointer to current extent */
xfs_bmbt_rec_t *lastp; /* pointer to previous extent */
xfs_bmbt_rec_t *nextp; /* pointer to next extent */
@@ -6231,10 +6231,12 @@ xfs_bmap_check_leaf_extents(
ASSERT(level > 0);
xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
- ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO);
- ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount);
- ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks);
- bno = INT_GET(*pp, ARCH_CONVERT);
+ bno = be64_to_cpu(*pp);
+
+ ASSERT(bno != NULLDFSBNO);
+ ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+ ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
/*
* Go down the tree until leaf level is reached, following the first
* pointer (leftmost) at each level.
@@ -6265,8 +6267,8 @@ xfs_bmap_check_leaf_extents(
xfs_check_block(block, mp, 0, 0);
pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
1, mp->m_bmap_dmxr[1]);
- XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)), error0);
- bno = INT_GET(*pp, ARCH_CONVERT);
+ bno = be64_to_cpu(*pp);
+ XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
if (bp_release) {
bp_release = 0;
xfs_trans_brelse(NULL, bp);
@@ -6372,7 +6374,7 @@ xfs_bmap_count_blocks(
xfs_ifork_t *ifp; /* fork structure */
int level; /* btree level, for checking */
xfs_mount_t *mp; /* file system mount structure */
- xfs_bmbt_ptr_t *pp; /* pointer to block address */
+ __be64 *pp; /* pointer to block address */
bno = NULLFSBLOCK;
mp = ip->i_mount;
@@ -6395,10 +6397,10 @@ xfs_bmap_count_blocks(
level = be16_to_cpu(block->bb_level);
ASSERT(level > 0);
pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
- ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO);
- ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount);
- ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks);
- bno = INT_GET(*pp, ARCH_CONVERT);
+ bno = be64_to_cpu(*pp);
+ ASSERT(bno != NULLDFSBNO);
+ ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+ ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
@@ -6425,7 +6427,7 @@ xfs_bmap_count_tree(
int error;
xfs_buf_t *bp, *nbp;
int level = levelin;
- xfs_bmbt_ptr_t *pp;
+ __be64 *pp;
xfs_fsblock_t bno = blockno;
xfs_fsblock_t nextbno;
xfs_bmbt_block_t *block, *nextblock;
@@ -6452,7 +6454,7 @@ xfs_bmap_count_tree(
/* Dive to the next level */
pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
- bno = INT_GET(*pp, ARCH_CONVERT);
+ bno = be64_to_cpu(*pp);
if (unlikely((error =
xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
xfs_trans_brelse(tp, bp);
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 18fb7385d719..a7b835bf870a 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -58,7 +58,7 @@ STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *);
STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *);
STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *,
- xfs_bmbt_key_t *, xfs_btree_cur_t **, int *);
+ __uint64_t *, xfs_btree_cur_t **, int *);
STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int);
@@ -192,16 +192,11 @@ xfs_bmbt_trace_argifk(
xfs_btree_cur_t *cur,
int i,
xfs_fsblock_t f,
- xfs_bmbt_key_t *k,
+ xfs_dfiloff_t o,
int line)
{
- xfs_dfsbno_t d;
- xfs_dfiloff_t o;
-
- d = (xfs_dfsbno_t)f;
- o = INT_GET(k->br_startoff, ARCH_CONVERT);
xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
- i, d >> 32, (int)d, o >> 32,
+ i, (xfs_dfsbno_t)f >> 32, (int)f, o >> 32,
(int)o, 0, 0, 0,
0, 0, 0);
}
@@ -248,7 +243,7 @@ xfs_bmbt_trace_argik(
{
xfs_dfiloff_t o;
- o = INT_GET(k->br_startoff, ARCH_CONVERT);
+ o = be64_to_cpu(k->br_startoff);
xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
i, o >> 32, (int)o, 0,
0, 0, 0, 0,
@@ -286,8 +281,8 @@ xfs_bmbt_trace_cursor(
xfs_bmbt_trace_argfffi(fname, c, o, b, i, j, __LINE__)
#define XFS_BMBT_TRACE_ARGI(c,i) \
xfs_bmbt_trace_argi(fname, c, i, __LINE__)
-#define XFS_BMBT_TRACE_ARGIFK(c,i,f,k) \
- xfs_bmbt_trace_argifk(fname, c, i, f, k, __LINE__)
+#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \
+ xfs_bmbt_trace_argifk(fname, c, i, f, s, __LINE__)
#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \
xfs_bmbt_trace_argifr(fname, c, i, f, r, __LINE__)
#define XFS_BMBT_TRACE_ARGIK(c,i,k) \
@@ -299,7 +294,7 @@ xfs_bmbt_trace_cursor(
#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)
#define XFS_BMBT_TRACE_ARGI(c,i)
-#define XFS_BMBT_TRACE_ARGIFK(c,i,f,k)
+#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s)
#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r)
#define XFS_BMBT_TRACE_ARGIK(c,i,k)
#define XFS_BMBT_TRACE_CURSOR(c,s)
@@ -357,7 +352,7 @@ xfs_bmbt_delrec(
XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
XFS_BMBT_TRACE_ARGI(cur, level);
ptr = cur->bc_ptrs[level];
- tcur = (xfs_btree_cur_t *)0;
+ tcur = NULL;
if (ptr == 0) {
XFS_BMBT_TRACE_CURSOR(cur, EXIT);
*stat = 0;
@@ -382,7 +377,7 @@ xfs_bmbt_delrec(
pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
#ifdef DEBUG
for (i = ptr; i < numrecs; i++) {
- if ((error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level))) {
+ if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
goto error0;
}
@@ -404,7 +399,8 @@ xfs_bmbt_delrec(
xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1);
}
if (ptr == 1) {
- INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_disk_get_startoff(rp));
+ key.br_startoff =
+ cpu_to_be64(xfs_bmbt_disk_get_startoff(rp));
kp = &key;
}
}
@@ -621,7 +617,7 @@ xfs_bmbt_delrec(
rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
#ifdef DEBUG
for (i = 0; i < numrrecs; i++) {
- if ((error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))) {
+ if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
goto error0;
}
@@ -748,7 +744,7 @@ xfs_bmbt_insrec(
int logflags; /* inode logging flags */
xfs_fsblock_t nbno; /* new block number */
struct xfs_btree_cur *ncur; /* new btree cursor */
- xfs_bmbt_key_t nkey; /* new btree key value */
+ __uint64_t startoff; /* new btree key value */
xfs_bmbt_rec_t nrec; /* new record count */
int optr; /* old key/record index */
xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
@@ -759,9 +755,8 @@ xfs_bmbt_insrec(
ASSERT(level < cur->bc_nlevels);
XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp);
- ncur = (xfs_btree_cur_t *)0;
- INT_SET(key.br_startoff, ARCH_CONVERT,
- xfs_bmbt_disk_get_startoff(recp));
+ ncur = NULL;
+ key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(recp));
optr = ptr = cur->bc_ptrs[level];
if (ptr == 0) {
XFS_BMBT_TRACE_CURSOR(cur, EXIT);
@@ -820,7 +815,7 @@ xfs_bmbt_insrec(
optr = ptr = cur->bc_ptrs[level];
} else {
if ((error = xfs_bmbt_split(cur, level,
- &nbno, &nkey, &ncur,
+ &nbno, &startoff, &ncur,
&i))) {
XFS_BMBT_TRACE_CURSOR(cur,
ERROR);
@@ -840,7 +835,7 @@ xfs_bmbt_insrec(
#endif
ptr = cur->bc_ptrs[level];
xfs_bmbt_disk_set_allf(&nrec,
- nkey.br_startoff, 0, 0,
+ startoff, 0, 0,
XFS_EXT_NORM);
} else {
XFS_BMBT_TRACE_CURSOR(cur,
@@ -858,7 +853,7 @@ xfs_bmbt_insrec(
pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
#ifdef DEBUG
for (i = numrecs; i >= ptr; i--) {
- if ((error = xfs_btree_check_lptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT),
+ if ((error = xfs_btree_check_lptr_disk(cur, pp[i - 1],
level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
@@ -870,14 +865,13 @@ xfs_bmbt_insrec(
memmove(&pp[ptr], &pp[ptr - 1], /* INT_: direct copy */
(numrecs - ptr + 1) * sizeof(*pp));
#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)*bnop,
- level))) {
+ if ((error = xfs_btree_check_lptr(cur, *bnop, level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
#endif
kp[ptr - 1] = key;
- INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
+ pp[ptr - 1] = cpu_to_be64(*bnop);
numrecs++;
block->bb_numrecs = cpu_to_be16(numrecs);
xfs_bmbt_log_keys(cur, bp, ptr, numrecs);
@@ -988,7 +982,7 @@ xfs_bmbt_killroot(
cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
#ifdef DEBUG
for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
- if ((error = xfs_btree_check_lptr(cur, INT_GET(cpp[i], ARCH_CONVERT), level - 1))) {
+ if ((error = xfs_btree_check_lptr_disk(cur, cpp[i], level - 1))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
@@ -1132,7 +1126,7 @@ xfs_bmbt_lookup(
d = XFS_FSB_TO_DADDR(mp, fsbno);
bp = cur->bc_bufs[level];
if (bp && XFS_BUF_ADDR(bp) != d)
- bp = (xfs_buf_t *)0;
+ bp = NULL;
if (!bp) {
if ((error = xfs_btree_read_bufl(mp, tp, fsbno,
0, &bp, XFS_BMAP_BTREE_REF))) {
@@ -1170,7 +1164,7 @@ xfs_bmbt_lookup(
keyno = (low + high) >> 1;
if (level > 0) {
kkp = kkbase + keyno - 1;
- startoff = INT_GET(kkp->br_startoff, ARCH_CONVERT);
+ startoff = be64_to_cpu(kkp->br_startoff);
} else {
krp = krbase + keyno - 1;
startoff = xfs_bmbt_disk_get_startoff(krp);
@@ -1189,13 +1183,13 @@ xfs_bmbt_lookup(
if (diff > 0 && --keyno < 1)
keyno = 1;
pp = XFS_BMAP_PTR_IADDR(block, keyno, cur);
+ fsbno = be64_to_cpu(*pp);
#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) {
+ if ((error = xfs_btree_check_lptr(cur, fsbno, level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
#endif
- fsbno = INT_GET(*pp, ARCH_CONVERT);
cur->bc_ptrs[level] = keyno;
}
}
@@ -1313,7 +1307,7 @@ xfs_bmbt_lshift(
lpp = XFS_BMAP_PTR_IADDR(left, lrecs, cur);
rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, INT_GET(*rpp, ARCH_CONVERT), level))) {
+ if ((error = xfs_btree_check_lptr_disk(cur, *rpp, level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
@@ -1340,7 +1334,7 @@ xfs_bmbt_lshift(
if (level > 0) {
#ifdef DEBUG
for (i = 0; i < rrecs; i++) {
- if ((error = xfs_btree_check_lptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
+ if ((error = xfs_btree_check_lptr_disk(cur, rpp[i + 1],
level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
@@ -1354,8 +1348,7 @@ xfs_bmbt_lshift(
} else {
memmove(rrp, rrp + 1, rrecs * sizeof(*rrp));
xfs_bmbt_log_recs(cur, rbp, 1, rrecs);
- INT_SET(key.br_startoff, ARCH_CONVERT,
- xfs_bmbt_disk_get_startoff(rrp));
+ key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
rkp = &key;
}
if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) {
@@ -1445,7 +1438,7 @@ xfs_bmbt_rshift(
rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
#ifdef DEBUG
for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
- if ((error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))) {
+ if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
@@ -1454,7 +1447,7 @@ xfs_bmbt_rshift(
memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, INT_GET(*lpp, ARCH_CONVERT), level))) {
+ if ((error = xfs_btree_check_lptr_disk(cur, *lpp, level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
@@ -1469,8 +1462,7 @@ xfs_bmbt_rshift(
memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
*rrp = *lrp;
xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- INT_SET(key.br_startoff, ARCH_CONVERT,
- xfs_bmbt_disk_get_startoff(rrp));
+ key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
rkp = &key;
}
be16_add(&left->bb_numrecs, -1);
@@ -1535,7 +1527,7 @@ xfs_bmbt_split(
xfs_btree_cur_t *cur,
int level,
xfs_fsblock_t *bnop,
- xfs_bmbt_key_t *keyp,
+ __uint64_t *startoff,
xfs_btree_cur_t **curp,
int *stat) /* success/failure */
{
@@ -1560,7 +1552,7 @@ xfs_bmbt_split(
xfs_bmbt_rec_t *rrp; /* right record pointer */
XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
- XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, keyp);
+ XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff);
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
lbp = cur->bc_bufs[level];
@@ -1619,7 +1611,7 @@ xfs_bmbt_split(
rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
#ifdef DEBUG
for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
- if ((error = xfs_btree_check_lptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level))) {
+ if ((error = xfs_btree_check_lptr_disk(cur, lpp[i], level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
@@ -1629,13 +1621,13 @@ xfs_bmbt_split(
memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- keyp->br_startoff = INT_GET(rkp->br_startoff, ARCH_CONVERT);
+ *startoff = be64_to_cpu(rkp->br_startoff);
} else {
lrp = XFS_BMAP_REC_IADDR(left, i, cur);
rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- keyp->br_startoff = xfs_bmbt_disk_get_startoff(rrp);
+ *startoff = xfs_bmbt_disk_get_startoff(rrp);
}
be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
right->bb_rightsib = left->bb_rightsib;
@@ -1728,9 +1720,9 @@ xfs_bmdr_to_bmbt(
{
int dmxr;
xfs_bmbt_key_t *fkp;
- xfs_bmbt_ptr_t *fpp;
+ __be64 *fpp;
xfs_bmbt_key_t *tkp;
- xfs_bmbt_ptr_t *tpp;
+ __be64 *tpp;
rblock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
rblock->bb_level = dblock->bb_level;
@@ -1745,7 +1737,7 @@ xfs_bmdr_to_bmbt(
tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
- memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
+ memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
}
/*
@@ -1805,7 +1797,7 @@ xfs_bmbt_decrement(
tp = cur->bc_tp;
mp = cur->bc_mp;
for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
- fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+ fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
XFS_BMAP_BTREE_REF))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
@@ -2135,7 +2127,7 @@ xfs_bmbt_increment(
tp = cur->bc_tp;
mp = cur->bc_mp;
for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
- fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
+ fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
XFS_BMAP_BTREE_REF))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
@@ -2178,7 +2170,7 @@ xfs_bmbt_insert(
level = 0;
nbno = NULLFSBLOCK;
xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
- ncur = (xfs_btree_cur_t *)0;
+ ncur = NULL;
pcur = cur;
do {
if ((error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur,
@@ -2205,7 +2197,7 @@ xfs_bmbt_insert(
}
if (ncur) {
pcur = ncur;
- ncur = (xfs_btree_cur_t *)0;
+ ncur = NULL;
}
} while (nbno != NULLFSBLOCK);
XFS_BMBT_TRACE_CURSOR(cur, EXIT);
@@ -2356,12 +2348,12 @@ xfs_bmbt_newroot(
args.firstblock = args.fsbno;
if (args.fsbno == NULLFSBLOCK) {
#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) {
+ if ((error = xfs_btree_check_lptr_disk(cur, *pp, level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
#endif
- args.fsbno = INT_GET(*pp, ARCH_CONVERT);
+ args.fsbno = be64_to_cpu(*pp);
args.type = XFS_ALLOCTYPE_START_BNO;
} else
args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -2393,7 +2385,7 @@ xfs_bmbt_newroot(
cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
#ifdef DEBUG
for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
- if ((error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level))) {
+ if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
@@ -2401,13 +2393,12 @@ xfs_bmbt_newroot(
#endif
memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp));
#ifdef DEBUG
- if ((error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)args.fsbno,
- level))) {
+ if ((error = xfs_btree_check_lptr(cur, args.fsbno, level))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
}
#endif
- INT_SET(*pp, ARCH_CONVERT, args.fsbno);
+ *pp = cpu_to_be64(args.fsbno);
xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs),
cur->bc_private.b.whichfork);
xfs_btree_setbuf(cur, level, bp);
@@ -2681,9 +2672,9 @@ xfs_bmbt_to_bmdr(
{
int dmxr;
xfs_bmbt_key_t *fkp;
- xfs_bmbt_ptr_t *fpp;
+ __be64 *fpp;
xfs_bmbt_key_t *tkp;
- xfs_bmbt_ptr_t *tpp;
+ __be64 *tpp;
ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO);
@@ -2698,7 +2689,7 @@ xfs_bmbt_to_bmdr(
tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
- memcpy(tpp, fpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */
+ memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
}
/*
@@ -2740,7 +2731,7 @@ xfs_bmbt_update(
XFS_BMBT_TRACE_CURSOR(cur, EXIT);
return 0;
}
- INT_SET(key.br_startoff, ARCH_CONVERT, off);
+ key.br_startoff = cpu_to_be64(off);
if ((error = xfs_bmbt_updkey(cur, &key, 1))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
return error;
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 6478cfa0e539..49539de9525b 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -163,13 +163,14 @@ typedef struct xfs_bmbt_irec
/*
* Key structure for non-leaf levels of the tree.
*/
-typedef struct xfs_bmbt_key
-{
- xfs_dfiloff_t br_startoff; /* starting file offset */
+typedef struct xfs_bmbt_key {
+ __be64 br_startoff; /* starting file offset */
} xfs_bmbt_key_t, xfs_bmdr_key_t;
-typedef xfs_dfsbno_t xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; /* btree pointer type */
- /* btree block header type */
+/* btree pointer type */
+typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
+
+/* btree block header type */
typedef struct xfs_btree_lblock xfs_bmbt_block_t;
#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp))
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index ee2255bd6562..aeb87ca69fcc 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -161,7 +161,7 @@ xfs_btree_check_key(
k1 = ak1;
k2 = ak2;
- ASSERT(INT_GET(k1->br_startoff, ARCH_CONVERT) < INT_GET(k2->br_startoff, ARCH_CONVERT));
+ ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
break;
}
case XFS_BTNUM_INO: {
@@ -170,7 +170,7 @@ xfs_btree_check_key(
k1 = ak1;
k2 = ak2;
- ASSERT(INT_GET(k1->ir_startino, ARCH_CONVERT) < INT_GET(k2->ir_startino, ARCH_CONVERT));
+ ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
break;
}
default:
@@ -285,8 +285,8 @@ xfs_btree_check_rec(
r1 = ar1;
r2 = ar2;
- ASSERT(INT_GET(r1->ir_startino, ARCH_CONVERT) + XFS_INODES_PER_CHUNK <=
- INT_GET(r2->ir_startino, ARCH_CONVERT));
+ ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
+ be32_to_cpu(r2->ir_startino));
break;
}
default:
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 44f1bd98064a..892b06c54263 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -145,7 +145,7 @@ typedef struct xfs_btree_cur
union {
xfs_alloc_rec_incore_t a;
xfs_bmbt_irec_t b;
- xfs_inobt_rec_t i;
+ xfs_inobt_rec_incore_t i;
} bc_rec; /* current insert/search record value */
struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
@@ -243,6 +243,9 @@ xfs_btree_check_lptr(
xfs_dfsbno_t ptr, /* btree block disk address */
int level); /* btree block level */
+#define xfs_btree_check_lptr_disk(cur, ptr, level) \
+ xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level)
+
/*
* Checking routine: check that short form block header is ok.
*/
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a4aa53974f76..7a55c248ea70 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -234,7 +234,6 @@ xfs_buf_item_format(
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
(bip->bli_flags & XFS_BLI_STALE));
bp = bip->bli_buf;
- ASSERT(XFS_BUF_BP_ISMAPPED(bp));
vecp = log_vector;
/*
@@ -628,25 +627,6 @@ xfs_buf_item_committed(
}
/*
- * This is called when the transaction holding the buffer is aborted.
- * Just behave as if the transaction had been cancelled. If we're shutting down
- * and have aborted this transaction, we'll trap this buffer when it tries to
- * get written out.
- */
-STATIC void
-xfs_buf_item_abort(
- xfs_buf_log_item_t *bip)
-{
- xfs_buf_t *bp;
-
- bp = bip->bli_buf;
- xfs_buftrace("XFS_ABORT", bp);
- XFS_BUF_SUPER_STALE(bp);
- xfs_buf_item_unlock(bip);
- return;
-}
-
-/*
* This is called to asynchronously write the buffer associated with this
* buf log item out to disk. The buffer will already have been locked by
* a successful call to xfs_buf_item_trylock(). If the buffer still has
@@ -693,7 +673,6 @@ STATIC struct xfs_item_ops xfs_buf_item_ops = {
.iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_buf_item_committed,
.iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push,
- .iop_abort = (void(*)(xfs_log_item_t*))xfs_buf_item_abort,
.iop_pushbuf = NULL,
.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_buf_item_committing
@@ -901,7 +880,6 @@ xfs_buf_item_relse(
XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list);
if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) &&
(XFS_BUF_IODONE_FUNC(bp) != NULL)) {
- ASSERT((XFS_BUF_ISUNINITIAL(bp)) == 0);
XFS_BUF_CLR_IODONE_FUNC(bp);
}
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 32ab61d17ace..a68bc1f1a313 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1054,7 +1054,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
xfs_da_node_entry_t *btree;
xfs_dablk_t blkno;
int probe, span, max, error, retval;
- xfs_dahash_t hashval;
+ xfs_dahash_t hashval, btreehashval;
xfs_da_args_t *args;
args = state->args;
@@ -1079,30 +1079,32 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
return(error);
}
curr = blk->bp->data;
- ASSERT(be16_to_cpu(curr->magic) == XFS_DA_NODE_MAGIC ||
- be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC ||
- be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC);
+ blk->magic = be16_to_cpu(curr->magic);
+ ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
+ blk->magic == XFS_DIR2_LEAFN_MAGIC ||
+ blk->magic == XFS_ATTR_LEAF_MAGIC);
/*
* Search an intermediate node for a match.
*/
- blk->magic = be16_to_cpu(curr->magic);
if (blk->magic == XFS_DA_NODE_MAGIC) {
node = blk->bp->data;
- blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
+ max = be16_to_cpu(node->hdr.count);
+ btreehashval = node->btree[max-1].hashval;
+ blk->hashval = be32_to_cpu(btreehashval);
/*
* Binary search. (note: small blocks will skip loop)
*/
- max = be16_to_cpu(node->hdr.count);
probe = span = max / 2;
hashval = args->hashval;
for (btree = &node->btree[probe]; span > 4;
btree = &node->btree[probe]) {
span /= 2;
- if (be32_to_cpu(btree->hashval) < hashval)
+ btreehashval = be32_to_cpu(btree->hashval);
+ if (btreehashval < hashval)
probe += span;
- else if (be32_to_cpu(btree->hashval) > hashval)
+ else if (btreehashval > hashval)
probe -= span;
else
break;
@@ -1133,10 +1135,10 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
blk->index = probe;
blkno = be32_to_cpu(btree->before);
}
- } else if (be16_to_cpu(curr->magic) == XFS_ATTR_LEAF_MAGIC) {
+ } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
break;
- } else if (be16_to_cpu(curr->magic) == XFS_DIR2_LEAFN_MAGIC) {
+ } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
break;
}
@@ -1152,11 +1154,13 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
&blk->index, state);
- }
- else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
+ } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
retval = xfs_attr_leaf_lookup_int(blk->bp, args);
blk->index = args->index;
args->blkno = blk->blkno;
+ } else {
+ ASSERT(0);
+ return XFS_ERROR(EFSCORRUPTED);
}
if (((retval == ENOENT) || (retval == ENOATTR)) &&
(blk->hashval == args->hashval)) {
@@ -1166,8 +1170,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
return(error);
if (retval == 0) {
continue;
- }
- else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
+ } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
/* path_shift() gives ENOENT */
retval = XFS_ERROR(ENOATTR);
}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index bc43163456ef..0893e16b7d83 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -18,14 +18,6 @@
#ifndef __XFS_ERROR_H__
#define __XFS_ERROR_H__
-#define XFS_ERECOVER 1 /* Failure to recover log */
-#define XFS_ELOGSTAT 2 /* Failure to stat log in user space */
-#define XFS_ENOLOGSPACE 3 /* Reservation too large */
-#define XFS_ENOTSUP 4 /* Operation not supported */
-#define XFS_ENOLSN 5 /* Can't find the lsn you asked for */
-#define XFS_ENOTFOUND 6
-#define XFS_ENOTXFS 7 /* Not XFS filesystem */
-
#ifdef DEBUG
#define XFS_ERROR_NTRAP 10
extern int xfs_etrap[XFS_ERROR_NTRAP];
@@ -175,6 +167,7 @@ extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud);
#define XFS_PTAG_SHUTDOWN_CORRUPT 0x00000010
#define XFS_PTAG_SHUTDOWN_IOERROR 0x00000020
#define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040
+#define XFS_PTAG_FSBLOCK_ZERO 0x00000080
struct xfs_mount;
/* PRINTFLIKE4 */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6cf6d8769b97..6dba78199faf 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -33,9 +33,6 @@ kmem_zone_t *xfs_efi_zone;
kmem_zone_t *xfs_efd_zone;
STATIC void xfs_efi_item_unlock(xfs_efi_log_item_t *);
-STATIC void xfs_efi_item_abort(xfs_efi_log_item_t *);
-STATIC void xfs_efd_item_abort(xfs_efd_log_item_t *);
-
void
xfs_efi_item_free(xfs_efi_log_item_t *efip)
@@ -184,7 +181,7 @@ STATIC void
xfs_efi_item_unlock(xfs_efi_log_item_t *efip)
{
if (efip->efi_item.li_flags & XFS_LI_ABORTED)
- xfs_efi_item_abort(efip);
+ xfs_efi_item_free(efip);
return;
}
@@ -202,18 +199,6 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
}
/*
- * This is called when the transaction logging the EFI is aborted.
- * Free up the EFI and return. No need to clean up the slot for
- * the item in the transaction. That was done by the unpin code
- * which is called prior to this routine in the abort/fs-shutdown path.
- */
-STATIC void
-xfs_efi_item_abort(xfs_efi_log_item_t *efip)
-{
- xfs_efi_item_free(efip);
-}
-
-/*
* There isn't much you can do to push on an efi item. It is simply
* stuck waiting for all of its corresponding efd items to be
* committed to disk.
@@ -255,7 +240,6 @@ STATIC struct xfs_item_ops xfs_efi_item_ops = {
.iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_efi_item_committed,
.iop_push = (void(*)(xfs_log_item_t*))xfs_efi_item_push,
- .iop_abort = (void(*)(xfs_log_item_t*))xfs_efi_item_abort,
.iop_pushbuf = NULL,
.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_efi_item_committing
@@ -386,33 +370,6 @@ xfs_efi_release(xfs_efi_log_item_t *efip,
}
}
-/*
- * This is called when the transaction that should be committing the
- * EFD corresponding to the given EFI is aborted. The committed and
- * canceled flags are used to coordinate the freeing of the EFI and
- * the references by the transaction that committed it.
- */
-STATIC void
-xfs_efi_cancel(
- xfs_efi_log_item_t *efip)
-{
- xfs_mount_t *mp;
- SPLDECL(s);
-
- mp = efip->efi_item.li_mountp;
- AIL_LOCK(mp, s);
- if (efip->efi_flags & XFS_EFI_COMMITTED) {
- /*
- * xfs_trans_delete_ail() drops the AIL lock.
- */
- xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
- xfs_efi_item_free(efip);
- } else {
- efip->efi_flags |= XFS_EFI_CANCELED;
- AIL_UNLOCK(mp, s);
- }
-}
-
STATIC void
xfs_efd_item_free(xfs_efd_log_item_t *efdp)
{
@@ -514,7 +471,7 @@ STATIC void
xfs_efd_item_unlock(xfs_efd_log_item_t *efdp)
{
if (efdp->efd_item.li_flags & XFS_LI_ABORTED)
- xfs_efd_item_abort(efdp);
+ xfs_efd_item_free(efdp);
return;
}
@@ -541,27 +498,6 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
}
/*
- * The transaction of which this EFD is a part has been aborted.
- * Inform its companion EFI of this fact and then clean up after
- * ourselves. No need to clean up the slot for the item in the
- * transaction. That was done by the unpin code which is called
- * prior to this routine in the abort/fs-shutdown path.
- */
-STATIC void
-xfs_efd_item_abort(xfs_efd_log_item_t *efdp)
-{
- /*
- * If we got a log I/O error, it's always the case that the LR with the
- * EFI got unpinned and freed before the EFD got aborted. So don't
- * reference the EFI at all in that case.
- */
- if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0)
- xfs_efi_cancel(efdp->efd_efip);
-
- xfs_efd_item_free(efdp);
-}
-
-/*
* There isn't much you can do to push on an efd item. It is simply
* stuck waiting for the log to be flushed to disk.
*/
@@ -602,7 +538,6 @@ STATIC struct xfs_item_ops xfs_efd_item_ops = {
.iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_efd_item_committed,
.iop_push = (void(*)(xfs_log_item_t*))xfs_efd_item_push,
- .iop_abort = (void(*)(xfs_log_item_t*))xfs_efd_item_abort,
.iop_pushbuf = NULL,
.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_efd_item_committing
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 0ea45edaab03..2f049f63e85f 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -33,14 +33,16 @@ typedef struct xfs_extent {
* conversion routine.
*/
+#ifndef HAVE_FORMAT32
typedef struct xfs_extent_32 {
- xfs_dfsbno_t ext_start;
- xfs_extlen_t ext_len;
+ __uint64_t ext_start;
+ __uint32_t ext_len;
} __attribute__((packed)) xfs_extent_32_t;
+#endif
typedef struct xfs_extent_64 {
- xfs_dfsbno_t ext_start;
- xfs_extlen_t ext_len;
+ __uint64_t ext_start;
+ __uint32_t ext_len;
__uint32_t ext_pad;
} xfs_extent_64_t;
@@ -50,25 +52,27 @@ typedef struct xfs_extent_64 {
* size is given by efi_nextents.
*/
typedef struct xfs_efi_log_format {
- unsigned short efi_type; /* efi log item type */
- unsigned short efi_size; /* size of this item */
- uint efi_nextents; /* # extents to free */
+ __uint16_t efi_type; /* efi log item type */
+ __uint16_t efi_size; /* size of this item */
+ __uint32_t efi_nextents; /* # extents to free */
__uint64_t efi_id; /* efi identifier */
xfs_extent_t efi_extents[1]; /* array of extents to free */
} xfs_efi_log_format_t;
+#ifndef HAVE_FORMAT32
typedef struct xfs_efi_log_format_32 {
- unsigned short efi_type; /* efi log item type */
- unsigned short efi_size; /* size of this item */
- uint efi_nextents; /* # extents to free */
+ __uint16_t efi_type; /* efi log item type */
+ __uint16_t efi_size; /* size of this item */
+ __uint32_t efi_nextents; /* # extents to free */
__uint64_t efi_id; /* efi identifier */
xfs_extent_32_t efi_extents[1]; /* array of extents to free */
} __attribute__((packed)) xfs_efi_log_format_32_t;
+#endif
typedef struct xfs_efi_log_format_64 {
- unsigned short efi_type; /* efi log item type */
- unsigned short efi_size; /* size of this item */
- uint efi_nextents; /* # extents to free */
+ __uint16_t efi_type; /* efi log item type */
+ __uint16_t efi_size; /* size of this item */
+ __uint32_t efi_nextents; /* # extents to free */
__uint64_t efi_id; /* efi identifier */
xfs_extent_64_t efi_extents[1]; /* array of extents to free */
} xfs_efi_log_format_64_t;
@@ -79,25 +83,27 @@ typedef struct xfs_efi_log_format_64 {
* size is given by efd_nextents;
*/
typedef struct xfs_efd_log_format {
- unsigned short efd_type; /* efd log item type */
- unsigned short efd_size; /* size of this item */
- uint efd_nextents; /* # of extents freed */
+ __uint16_t efd_type; /* efd log item type */
+ __uint16_t efd_size; /* size of this item */
+ __uint32_t efd_nextents; /* # of extents freed */
__uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_t efd_extents[1]; /* array of extents freed */
} xfs_efd_log_format_t;
+#ifndef HAVE_FORMAT32
typedef struct xfs_efd_log_format_32 {
- unsigned short efd_type; /* efd log item type */
- unsigned short efd_size; /* size of this item */
- uint efd_nextents; /* # of extents freed */
+ __uint16_t efd_type; /* efd log item type */
+ __uint16_t efd_size; /* size of this item */
+ __uint32_t efd_nextents; /* # of extents freed */
__uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_32_t efd_extents[1]; /* array of extents freed */
} __attribute__((packed)) xfs_efd_log_format_32_t;
+#endif
typedef struct xfs_efd_log_format_64 {
- unsigned short efd_type; /* efd log item type */
- unsigned short efd_size; /* size of this item */
- uint efd_nextents; /* # of extents freed */
+ __uint16_t efd_type; /* efd log item type */
+ __uint16_t efd_size; /* size of this item */
+ __uint32_t efd_nextents; /* # of extents freed */
__uint64_t efd_efi_id; /* id of corresponding efi */
xfs_extent_64_t efd_extents[1]; /* array of extents freed */
} xfs_efd_log_format_64_t;
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 0f0ad1535951..1335449841cd 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -22,8 +22,6 @@
* SGI's XFS filesystem's major stuff (constants, structures)
*/
-#define XFS_NAME "xfs"
-
/*
* Direct I/O attribute record used with XFS_IOC_DIOINFO
* d_miniosz is the min xfer size, xfer size multiple and file seek offset
@@ -426,11 +424,7 @@ typedef struct xfs_handle {
- (char *) &(handle)) \
+ (handle).ha_fid.xfs_fid_len)
-#define XFS_HANDLE_CMP(h1, h2) memcmp(h1, h2, sizeof(xfs_handle_t))
-
-#define FSHSIZE sizeof(fsid_t)
-
-/*
+/*
* Flags for going down operation
*/
#define XFS_FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 33164a85aa9d..a446e5a115c6 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -458,7 +458,7 @@ nextag:
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
up_read(&mp->m_peraglock);
- return (xfs_buf_t *)0;
+ return NULL;
}
agno++;
if (agno >= agcount)
@@ -466,7 +466,7 @@ nextag:
if (agno == pagno) {
if (flags == 0) {
up_read(&mp->m_peraglock);
- return (xfs_buf_t *)0;
+ return NULL;
}
flags = 0;
}
@@ -529,10 +529,10 @@ xfs_dialloc(
int offset; /* index of inode in chunk */
xfs_agino_t pagino; /* parent's a.g. relative inode # */
xfs_agnumber_t pagno; /* parent's allocation group number */
- xfs_inobt_rec_t rec; /* inode allocation record */
+ xfs_inobt_rec_incore_t rec; /* inode allocation record */
xfs_agnumber_t tagno; /* testing allocation group number */
xfs_btree_cur_t *tcur; /* temp cursor */
- xfs_inobt_rec_t trec; /* temp inode allocation record */
+ xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
if (*IO_agbp == NULL) {
@@ -945,7 +945,7 @@ xfs_difree(
int ilen; /* inodes in an inode cluster */
xfs_mount_t *mp; /* mount structure for filesystem */
int off; /* offset of inode in inode chunk */
- xfs_inobt_rec_t rec; /* btree record */
+ xfs_inobt_rec_incore_t rec; /* btree record */
mp = tp->t_mountp;
@@ -1195,6 +1195,7 @@ xfs_dilocate(
"(0x%llx)",
ino, XFS_AGINO_TO_INO(mp, agno, agino));
}
+ xfs_stack_trace();
#endif /* DEBUG */
return XFS_ERROR(EINVAL);
}
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 616eeeb6953e..8cdeeaf8632b 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -568,7 +568,7 @@ xfs_inobt_insrec(
/*
* Make a key out of the record data to be inserted, and save it.
*/
- key.ir_startino = recp->ir_startino; /* INT_: direct copy */
+ key.ir_startino = recp->ir_startino;
optr = ptr = cur->bc_ptrs[level];
/*
* If we're off the left edge, return failure.
@@ -600,7 +600,7 @@ xfs_inobt_insrec(
}
#endif
nbno = NULLAGBLOCK;
- ncur = (xfs_btree_cur_t *)0;
+ ncur = NULL;
/*
* If the block is full, we can't insert the new entry until we
* make the block un-full.
@@ -641,7 +641,7 @@ xfs_inobt_insrec(
return error;
#endif
ptr = cur->bc_ptrs[level];
- nrec.ir_startino = nkey.ir_startino; /* INT_: direct copy */
+ nrec.ir_startino = nkey.ir_startino;
} else {
/*
* Otherwise the insert fails.
@@ -681,7 +681,7 @@ xfs_inobt_insrec(
if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
return error;
#endif
- kp[ptr - 1] = key; /* INT_: struct copy */
+ kp[ptr - 1] = key;
pp[ptr - 1] = cpu_to_be32(*bnop);
numrecs++;
block->bb_numrecs = cpu_to_be16(numrecs);
@@ -698,7 +698,7 @@ xfs_inobt_insrec(
* Now stuff the new record in, bump numrecs
* and log the new data.
*/
- rp[ptr - 1] = *recp; /* INT_: struct copy */
+ rp[ptr - 1] = *recp;
numrecs++;
block->bb_numrecs = cpu_to_be16(numrecs);
xfs_inobt_log_recs(cur, bp, ptr, numrecs);
@@ -731,7 +731,7 @@ xfs_inobt_insrec(
*/
*bnop = nbno;
if (nbno != NULLAGBLOCK) {
- *recp = nrec; /* INT_: struct copy */
+ *recp = nrec;
*curp = ncur;
}
*stat = 1;
@@ -878,7 +878,7 @@ xfs_inobt_lookup(
*/
bp = cur->bc_bufs[level];
if (bp && XFS_BUF_ADDR(bp) != d)
- bp = (xfs_buf_t *)0;
+ bp = NULL;
if (!bp) {
/*
* Need to get a new buffer. Read it, then
@@ -950,12 +950,12 @@ xfs_inobt_lookup(
xfs_inobt_key_t *kkp;
kkp = kkbase + keyno - 1;
- startino = INT_GET(kkp->ir_startino, ARCH_CONVERT);
+ startino = be32_to_cpu(kkp->ir_startino);
} else {
xfs_inobt_rec_t *krp;
krp = krbase + keyno - 1;
- startino = INT_GET(krp->ir_startino, ARCH_CONVERT);
+ startino = be32_to_cpu(krp->ir_startino);
}
/*
* Compute difference to get next direction.
@@ -1117,7 +1117,7 @@ xfs_inobt_lshift(
if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
return error;
#endif
- *lpp = *rpp; /* INT_: no-change copy */
+ *lpp = *rpp;
xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
}
/*
@@ -1160,7 +1160,7 @@ xfs_inobt_lshift(
} else {
memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
+ key.ir_startino = rrp->ir_startino;
rkp = &key;
}
/*
@@ -1297,13 +1297,13 @@ xfs_inobt_newroot(
*/
kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
if (be16_to_cpu(left->bb_level) > 0) {
- kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */
- kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */
+ kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur);
+ kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur);
} else {
rp = XFS_INOBT_REC_ADDR(left, 1, cur);
- INT_COPY(kp[0].ir_startino, rp->ir_startino, ARCH_CONVERT);
+ kp[0].ir_startino = rp->ir_startino;
rp = XFS_INOBT_REC_ADDR(right, 1, cur);
- INT_COPY(kp[1].ir_startino, rp->ir_startino, ARCH_CONVERT);
+ kp[1].ir_startino = rp->ir_startino;
}
xfs_inobt_log_keys(cur, nbp, 1, 2);
/*
@@ -1410,8 +1410,8 @@ xfs_inobt_rshift(
if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
return error;
#endif
- *rkp = *lkp; /* INT_: no change copy */
- *rpp = *lpp; /* INT_: no change copy */
+ *rkp = *lkp;
+ *rpp = *lpp;
xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
} else {
@@ -1420,7 +1420,7 @@ xfs_inobt_rshift(
memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
*rrp = *lrp;
xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
- key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
+ key.ir_startino = rrp->ir_startino;
rkp = &key;
}
/*
@@ -1559,7 +1559,7 @@ xfs_inobt_split(
rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
- keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */
+ keyp->ir_startino = rrp->ir_startino;
}
/*
* Find the left block number by looking in the buffer.
@@ -1813,9 +1813,9 @@ xfs_inobt_get_rec(
* Point to the record and extract its data.
*/
rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
- *ino = INT_GET(rec->ir_startino, ARCH_CONVERT);
- *fcnt = INT_GET(rec->ir_freecount, ARCH_CONVERT);
- *free = INT_GET(rec->ir_free, ARCH_CONVERT);
+ *ino = be32_to_cpu(rec->ir_startino);
+ *fcnt = be32_to_cpu(rec->ir_freecount);
+ *free = be64_to_cpu(rec->ir_free);
*stat = 1;
return 0;
}
@@ -1930,10 +1930,10 @@ xfs_inobt_insert(
level = 0;
nbno = NULLAGBLOCK;
- INT_SET(nrec.ir_startino, ARCH_CONVERT, cur->bc_rec.i.ir_startino);
- INT_SET(nrec.ir_freecount, ARCH_CONVERT, cur->bc_rec.i.ir_freecount);
- INT_SET(nrec.ir_free, ARCH_CONVERT, cur->bc_rec.i.ir_free);
- ncur = (xfs_btree_cur_t *)0;
+ nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
+ nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
+ nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
+ ncur = NULL;
pcur = cur;
/*
* Loop going up the tree, starting at the leaf level.
@@ -1965,7 +1965,7 @@ xfs_inobt_insert(
*/
if (ncur) {
pcur = ncur;
- ncur = (xfs_btree_cur_t *)0;
+ ncur = NULL;
}
} while (nbno != NULLAGBLOCK);
*stat = i;
@@ -2060,9 +2060,9 @@ xfs_inobt_update(
/*
* Fill in the new contents and log them.
*/
- INT_SET(rp->ir_startino, ARCH_CONVERT, ino);
- INT_SET(rp->ir_freecount, ARCH_CONVERT, fcnt);
- INT_SET(rp->ir_free, ARCH_CONVERT, free);
+ rp->ir_startino = cpu_to_be32(ino);
+ rp->ir_freecount = cpu_to_be32(fcnt);
+ rp->ir_free = cpu_to_be64(free);
xfs_inobt_log_recs(cur, bp, ptr, ptr);
/*
* Updating first record in leaf. Pass new key value up to our parent.
@@ -2070,7 +2070,7 @@ xfs_inobt_update(
if (ptr == 1) {
xfs_inobt_key_t key; /* key containing [ino] */
- INT_SET(key.ir_startino, ARCH_CONVERT, ino);
+ key.ir_startino = cpu_to_be32(ino);
if ((error = xfs_inobt_updkey(cur, &key, 1)))
return error;
}
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index ae3904cb1ee8..2c0e49893ff7 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -47,19 +47,24 @@ static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
/*
* Data record structure
*/
-typedef struct xfs_inobt_rec
-{
+typedef struct xfs_inobt_rec {
+ __be32 ir_startino; /* starting inode number */
+ __be32 ir_freecount; /* count of free inodes (set bits) */
+ __be64 ir_free; /* free inode mask */
+} xfs_inobt_rec_t;
+
+typedef struct xfs_inobt_rec_incore {
xfs_agino_t ir_startino; /* starting inode number */
__int32_t ir_freecount; /* count of free inodes (set bits) */
xfs_inofree_t ir_free; /* free inode mask */
-} xfs_inobt_rec_t;
+} xfs_inobt_rec_incore_t;
+
/*
* Key structure
*/
-typedef struct xfs_inobt_key
-{
- xfs_agino_t ir_startino; /* starting inode number */
+typedef struct xfs_inobt_key {
+ __be32 ir_startino; /* starting inode number */
} xfs_inobt_key_t;
/* btree pointer type */
@@ -77,7 +82,7 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
#define XFS_INOBT_IS_FREE(rp,i) \
(((rp)->ir_free & XFS_INOBT_MASK(i)) != 0)
#define XFS_INOBT_IS_FREE_DISK(rp,i) \
- ((INT_GET((rp)->ir_free,ARCH_CONVERT) & XFS_INOBT_MASK(i)) != 0)
+ ((be64_to_cpu((rp)->ir_free) & XFS_INOBT_MASK(i)) != 0)
#define XFS_INOBT_SET_FREE(rp,i) ((rp)->ir_free |= XFS_INOBT_MASK(i))
#define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i))
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0724df7fabb7..b73d216ecaf9 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -50,7 +50,7 @@ void
xfs_ihash_init(xfs_mount_t *mp)
{
__uint64_t icount;
- uint i, flags = KM_SLEEP | KM_MAYFAIL;
+ uint i;
if (!mp->m_ihsize) {
icount = mp->m_maxicount ? mp->m_maxicount :
@@ -61,14 +61,13 @@ xfs_ihash_init(xfs_mount_t *mp)
(64 * NBPP) / sizeof(xfs_ihash_t));
}
- while (!(mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize *
- sizeof(xfs_ihash_t), flags))) {
- if ((mp->m_ihsize >>= 1) <= NBPP)
- flags = KM_SLEEP;
- }
- for (i = 0; i < mp->m_ihsize; i++) {
+ mp->m_ihash = kmem_zalloc_greedy(&mp->m_ihsize,
+ NBPC * sizeof(xfs_ihash_t),
+ mp->m_ihsize * sizeof(xfs_ihash_t),
+ KM_SLEEP | KM_MAYFAIL | KM_LARGE);
+ mp->m_ihsize /= sizeof(xfs_ihash_t);
+ for (i = 0; i < mp->m_ihsize; i++)
rwlock_init(&(mp->m_ihash[i].ih_lock));
- }
}
/*
@@ -77,7 +76,7 @@ xfs_ihash_init(xfs_mount_t *mp)
void
xfs_ihash_free(xfs_mount_t *mp)
{
- kmem_free(mp->m_ihash, mp->m_ihsize*sizeof(xfs_ihash_t));
+ kmem_free(mp->m_ihash, mp->m_ihsize * sizeof(xfs_ihash_t));
mp->m_ihash = NULL;
}
@@ -95,7 +94,7 @@ xfs_chash_init(xfs_mount_t *mp)
mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize);
mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize
* sizeof(xfs_chash_t),
- KM_SLEEP);
+ KM_SLEEP | KM_LARGE);
for (i = 0; i < mp->m_chsize; i++) {
spinlock_init(&mp->m_chash[i].ch_lock,"xfshash");
}
@@ -244,7 +243,9 @@ again:
XFS_STATS_INC(xs_ig_found);
+ spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~XFS_IRECLAIMABLE;
+ spin_unlock(&ip->i_flags_lock);
version = ih->ih_version;
read_unlock(&ih->ih_lock);
xfs_ihash_promote(ih, ip, version);
@@ -290,15 +291,17 @@ again:
finish_inode:
if (ip->i_d.di_mode == 0) {
- if (!(flags & IGET_CREATE))
+ if (!(flags & XFS_IGET_CREATE))
return ENOENT;
xfs_iocore_inode_reinit(ip);
}
-
+
if (lock_flags != 0)
xfs_ilock(ip, lock_flags);
+ spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~XFS_ISTALE;
+ spin_unlock(&ip->i_flags_lock);
vn_trace_exit(vp, "xfs_iget.found",
(inst_t *)__return_address);
@@ -320,21 +323,20 @@ finish_inode:
* Read the disk inode attributes into a new inode structure and get
* a new vnode for it. This should also initialize i_ino and i_mount.
*/
- error = xfs_iread(mp, tp, ino, &ip, bno);
- if (error) {
+ error = xfs_iread(mp, tp, ino, &ip, bno,
+ (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
+ if (error)
return error;
- }
vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address);
xfs_inode_lock_init(ip, vp);
xfs_iocore_inode_init(ip);
- if (lock_flags != 0) {
+ if (lock_flags)
xfs_ilock(ip, lock_flags);
- }
-
- if ((ip->i_d.di_mode == 0) && !(flags & IGET_CREATE)) {
+
+ if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
xfs_idestroy(ip);
return ENOENT;
}
@@ -369,7 +371,9 @@ finish_inode:
ih->ih_next = ip;
ip->i_udquot = ip->i_gdquot = NULL;
ih->ih_version++;
+ spin_lock(&ip->i_flags_lock);
ip->i_flags |= XFS_INEW;
+ spin_unlock(&ip->i_flags_lock);
write_unlock(&ih->ih_lock);
@@ -548,7 +552,7 @@ xfs_inode_lock_init(
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", vp->v_number);
init_waitqueue_head(&ip->i_ipin_wait);
atomic_set(&ip->i_pincount, 0);
- init_sema(&ip->i_flock, 1, "xfsfino", vp->v_number);
+ initnsema(&ip->i_flock, 1, "xfsfino");
}
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1f8ecff8553a..c27d7d495aa0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -854,7 +854,8 @@ xfs_iread(
xfs_trans_t *tp,
xfs_ino_t ino,
xfs_inode_t **ipp,
- xfs_daddr_t bno)
+ xfs_daddr_t bno,
+ uint imap_flags)
{
xfs_buf_t *bp;
xfs_dinode_t *dip;
@@ -866,6 +867,7 @@ xfs_iread(
ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
ip->i_ino = ino;
ip->i_mount = mp;
+ spin_lock_init(&ip->i_flags_lock);
/*
* Get pointer's to the on-disk inode and the buffer containing it.
@@ -874,7 +876,7 @@ xfs_iread(
* return NULL as well. Set i_blkno to 0 so that xfs_itobp() will
* know that this is a new incore inode.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, 0);
+ error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags);
if (error) {
kmem_zone_free(xfs_inode_zone, ip);
return error;
@@ -1113,7 +1115,7 @@ xfs_ialloc(
* to prevent others from looking at until we're done.
*/
error = xfs_trans_iget(tp->t_mountp, tp, ino,
- IGET_CREATE, XFS_ILOCK_EXCL, &ip);
+ XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
if (error != 0) {
return error;
}
@@ -2213,7 +2215,9 @@ xfs_ifree_cluster(
if (ip == free_ip) {
if (xfs_iflock_nowait(ip)) {
+ spin_lock(&ip->i_flags_lock);
ip->i_flags |= XFS_ISTALE;
+ spin_unlock(&ip->i_flags_lock);
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
@@ -2227,7 +2231,9 @@ xfs_ifree_cluster(
if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
if (xfs_iflock_nowait(ip)) {
+ spin_lock(&ip->i_flags_lock);
ip->i_flags |= XFS_ISTALE;
+ spin_unlock(&ip->i_flags_lock);
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
@@ -2257,7 +2263,9 @@ xfs_ifree_cluster(
AIL_LOCK(mp,s);
iip->ili_flush_lsn = iip->ili_item.li_lsn;
AIL_UNLOCK(mp, s);
+ spin_lock(&iip->ili_inode->i_flags_lock);
iip->ili_inode->i_flags |= XFS_ISTALE;
+ spin_unlock(&iip->ili_inode->i_flags_lock);
pre_flushed++;
}
lip = lip->li_bio_list;
@@ -2753,19 +2761,29 @@ xfs_iunpin(
* call as the inode reclaim may be blocked waiting for
* the inode to become unpinned.
*/
+ struct inode *inode = NULL;
+
+ spin_lock(&ip->i_flags_lock);
if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
/* make sync come back and flush this inode */
if (vp) {
- struct inode *inode = vn_to_inode(vp);
+ inode = vn_to_inode(vp);
if (!(inode->i_state &
- (I_NEW|I_FREEING|I_CLEAR)))
- mark_inode_dirty_sync(inode);
+ (I_NEW|I_FREEING|I_CLEAR))) {
+ inode = igrab(inode);
+ if (inode)
+ mark_inode_dirty_sync(inode);
+ } else
+ inode = NULL;
}
}
+ spin_unlock(&ip->i_flags_lock);
wake_up(&ip->i_ipin_wait);
+ if (inode)
+ iput(inode);
}
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index d10b76ed1e5b..e96eb0835fe6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -267,6 +267,7 @@ typedef struct xfs_inode {
sema_t i_flock; /* inode flush lock */
atomic_t i_pincount; /* inode pin count */
wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
+ spinlock_t i_flags_lock; /* inode i_flags lock */
#ifdef HAVE_REFCACHE
struct xfs_inode **i_refcache; /* ptr to entry in ref cache */
struct xfs_inode *i_release; /* inode to unref */
@@ -389,11 +390,14 @@ typedef struct xfs_inode {
(((vfsp)->vfs_flag & VFS_GRPID) || ((pip)->i_d.di_mode & S_ISGID))
/*
- * xfs_iget.c prototypes.
+ * Flags for xfs_iget()
*/
+#define XFS_IGET_CREATE 0x1
+#define XFS_IGET_BULKSTAT 0x2
-#define IGET_CREATE 1
-
+/*
+ * xfs_iget.c prototypes.
+ */
void xfs_ihash_init(struct xfs_mount *);
void xfs_ihash_free(struct xfs_mount *);
void xfs_chash_init(struct xfs_mount *);
@@ -425,7 +429,7 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **,
xfs_daddr_t, uint);
int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
- xfs_inode_t **, xfs_daddr_t);
+ xfs_inode_t **, xfs_daddr_t, uint);
int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index f8e80d8e7237..a7a92251eb56 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -743,21 +743,6 @@ xfs_inode_item_committed(
}
/*
- * The transaction with the inode locked has aborted. The inode
- * must not be dirty within the transaction (unless we're forcibly
- * shutting down). We simply unlock just as if the transaction
- * had been cancelled.
- */
-STATIC void
-xfs_inode_item_abort(
- xfs_inode_log_item_t *iip)
-{
- xfs_inode_item_unlock(iip);
- return;
-}
-
-
-/*
* This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
* failed to get the inode flush lock but did get the inode locked SHARED.
* Here we're trying to see if the inode buffer is incore, and if so whether it's
@@ -915,7 +900,6 @@ STATIC struct xfs_item_ops xfs_inode_item_ops = {
.iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_inode_item_committed,
.iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push,
- .iop_abort = (void(*)(xfs_log_item_t*))xfs_inode_item_abort,
.iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf,
.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_inode_item_committing
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 5db6cd1b4cf3..bfe92ea17952 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -25,52 +25,54 @@
* must be added on to the end.
*/
typedef struct xfs_inode_log_format {
- unsigned short ilf_type; /* inode log item type */
- unsigned short ilf_size; /* size of this item */
- uint ilf_fields; /* flags for fields logged */
- ushort ilf_asize; /* size of attr d/ext/root */
- ushort ilf_dsize; /* size of data/ext/root */
- xfs_ino_t ilf_ino; /* inode number */
+ __uint16_t ilf_type; /* inode log item type */
+ __uint16_t ilf_size; /* size of this item */
+ __uint32_t ilf_fields; /* flags for fields logged */
+ __uint16_t ilf_asize; /* size of attr d/ext/root */
+ __uint16_t ilf_dsize; /* size of data/ext/root */
+ __uint64_t ilf_ino; /* inode number */
union {
- xfs_dev_t ilfu_rdev; /* rdev value for dev inode*/
+ __uint32_t ilfu_rdev; /* rdev value for dev inode*/
uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
__int64_t ilf_blkno; /* blkno of inode buffer */
- int ilf_len; /* len of inode buffer */
- int ilf_boffset; /* off of inode in buffer */
+ __int32_t ilf_len; /* len of inode buffer */
+ __int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_t;
+#ifndef HAVE_FORMAT32
typedef struct xfs_inode_log_format_32 {
- unsigned short ilf_type; /* 16: inode log item type */
- unsigned short ilf_size; /* 16: size of this item */
- uint ilf_fields; /* 32: flags for fields logged */
- ushort ilf_asize; /* 32: size of attr d/ext/root */
- ushort ilf_dsize; /* 32: size of data/ext/root */
- xfs_ino_t ilf_ino; /* 64: inode number */
+ __uint16_t ilf_type; /* inode log item type */
+ __uint16_t ilf_size; /* size of this item */
+ __uint32_t ilf_fields; /* flags for fields logged */
+ __uint16_t ilf_asize; /* size of attr d/ext/root */
+ __uint16_t ilf_dsize; /* size of data/ext/root */
+ __uint64_t ilf_ino; /* inode number */
union {
- xfs_dev_t ilfu_rdev; /* 32: rdev value for dev inode*/
- uuid_t ilfu_uuid; /* 128: mount point value */
+ __uint32_t ilfu_rdev; /* rdev value for dev inode*/
+ uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
- __int64_t ilf_blkno; /* 64: blkno of inode buffer */
- int ilf_len; /* 32: len of inode buffer */
- int ilf_boffset; /* 32: off of inode in buffer */
+ __int64_t ilf_blkno; /* blkno of inode buffer */
+ __int32_t ilf_len; /* len of inode buffer */
+ __int32_t ilf_boffset; /* off of inode in buffer */
} __attribute__((packed)) xfs_inode_log_format_32_t;
+#endif
typedef struct xfs_inode_log_format_64 {
- unsigned short ilf_type; /* 16: inode log item type */
- unsigned short ilf_size; /* 16: size of this item */
- uint ilf_fields; /* 32: flags for fields logged */
- ushort ilf_asize; /* 32: size of attr d/ext/root */
- ushort ilf_dsize; /* 32: size of data/ext/root */
- __uint32_t ilf_pad; /* 32: pad for 64 bit boundary */
- xfs_ino_t ilf_ino; /* 64: inode number */
+ __uint16_t ilf_type; /* inode log item type */
+ __uint16_t ilf_size; /* size of this item */
+ __uint32_t ilf_fields; /* flags for fields logged */
+ __uint16_t ilf_asize; /* size of attr d/ext/root */
+ __uint16_t ilf_dsize; /* size of data/ext/root */
+ __uint32_t ilf_pad; /* pad for 64 bit boundary */
+ __uint64_t ilf_ino; /* inode number */
union {
- xfs_dev_t ilfu_rdev; /* 32: rdev value for dev inode*/
- uuid_t ilfu_uuid; /* 128: mount point value */
+ __uint32_t ilfu_rdev; /* rdev value for dev inode*/
+ uuid_t ilfu_uuid; /* mount point value */
} ilf_u;
- __int64_t ilf_blkno; /* 64: blkno of inode buffer */
- int ilf_len; /* 32: len of inode buffer */
- int ilf_boffset; /* 32: off of inode in buffer */
+ __int64_t ilf_blkno; /* blkno of inode buffer */
+ __int32_t ilf_len; /* len of inode buffer */
+ __int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_64_t;
/*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f1949c16df15..19655124da78 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -398,6 +398,23 @@ xfs_flush_space(
return 1;
}
+STATIC int
+xfs_cmn_err_fsblock_zero(
+ xfs_inode_t *ip,
+ xfs_bmbt_irec_t *imap)
+{
+ xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount,
+ "Access to block zero in inode %llu "
+ "start_block: %llx start_off: %llx "
+ "blkcnt: %llx extent-state: %x\n",
+ (unsigned long long)ip->i_ino,
+ (unsigned long long)imap->br_startblock,
+ (unsigned long long)imap->br_startoff,
+ (unsigned long long)imap->br_blockcount,
+ imap->br_state);
+ return EFSCORRUPTED;
+}
+
int
xfs_iomap_write_direct(
xfs_inode_t *ip,
@@ -536,23 +553,17 @@ xfs_iomap_write_direct(
* Copy any maps to caller's array and return any error.
*/
if (nimaps == 0) {
- error = (ENOSPC);
+ error = ENOSPC;
+ goto error_out;
+ }
+
+ if (unlikely(!imap.br_startblock && !(io->io_flags & XFS_IOCORE_RT))) {
+ error = xfs_cmn_err_fsblock_zero(ip, &imap);
goto error_out;
}
*ret_imap = imap;
*nmaps = 1;
- if ( !(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) {
- cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld "
- "start_block : %llx start_off : %llx blkcnt : %llx "
- "extent-state : %x \n",
- (ip->i_mount)->m_fsname,
- (long long)ip->i_ino,
- (unsigned long long)ret_imap->br_startblock,
- (unsigned long long)ret_imap->br_startoff,
- (unsigned long long)ret_imap->br_blockcount,
- ret_imap->br_state);
- }
return 0;
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
@@ -715,17 +726,8 @@ retry:
goto retry;
}
- if (!(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) {
- cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld "
- "start_block : %llx start_off : %llx blkcnt : %llx "
- "extent-state : %x \n",
- (ip->i_mount)->m_fsname,
- (long long)ip->i_ino,
- (unsigned long long)ret_imap->br_startblock,
- (unsigned long long)ret_imap->br_startoff,
- (unsigned long long)ret_imap->br_blockcount,
- ret_imap->br_state);
- }
+ if (unlikely(!imap[0].br_startblock && !(io->io_flags & XFS_IOCORE_RT)))
+ return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
*ret_imap = imap[0];
*nmaps = 1;
@@ -853,24 +855,10 @@ xfs_iomap_write_allocate(
* See if we were able to allocate an extent that
* covers at least part of the callers request
*/
-
for (i = 0; i < nimaps; i++) {
- if (!(io->io_flags & XFS_IOCORE_RT) &&
- !imap[i].br_startblock) {
- cmn_err(CE_PANIC,"Access to block zero: "
- "fs <%s> inode: %lld "
- "start_block : %llx start_off : %llx "
- "blkcnt : %llx extent-state : %x \n",
- (ip->i_mount)->m_fsname,
- (long long)ip->i_ino,
- (unsigned long long)
- imap[i].br_startblock,
- (unsigned long long)
- imap[i].br_startoff,
- (unsigned long long)
- imap[i].br_blockcount,
- imap[i].br_state);
- }
+ if (unlikely(!imap[i].br_startblock &&
+ !(io->io_flags & XFS_IOCORE_RT)))
+ return xfs_cmn_err_fsblock_zero(ip, &imap[i]);
if ((offset_fsb >= imap[i].br_startoff) &&
(offset_fsb < (imap[i].br_startoff +
imap[i].br_blockcount))) {
@@ -941,7 +929,7 @@ xfs_iomap_write_unwritten(
XFS_WRITE_LOG_COUNT);
if (error) {
xfs_trans_cancel(tp, 0);
- goto error0;
+ return XFS_ERROR(error);
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -967,19 +955,11 @@ xfs_iomap_write_unwritten(
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
- goto error0;
-
- if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) {
- cmn_err(CE_PANIC,"Access to block zero: fs <%s> "
- "inode: %lld start_block : %llx start_off : "
- "%llx blkcnt : %llx extent-state : %x \n",
- (ip->i_mount)->m_fsname,
- (long long)ip->i_ino,
- (unsigned long long)imap.br_startblock,
- (unsigned long long)imap.br_startoff,
- (unsigned long long)imap.br_blockcount,
- imap.br_state);
- }
+ return XFS_ERROR(error);
+
+ if (unlikely(!imap.br_startblock &&
+ !(io->io_flags & XFS_IOCORE_RT)))
+ return xfs_cmn_err_fsblock_zero(ip, &imap);
if ((numblks_fsb = imap.br_blockcount) == 0) {
/*
@@ -999,6 +979,5 @@ error_on_bmapi_transaction:
xfs_bmap_cancel(&free_list);
xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
xfs_iunlock(ip, XFS_ILOCK_EXCL);
-error0:
return XFS_ERROR(error);
}
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 46249e4d1fea..7775ddc0b3c6 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -39,6 +39,16 @@
#include "xfs_error.h"
#include "xfs_btree.h"
+int
+xfs_internal_inum(
+ xfs_mount_t *mp,
+ xfs_ino_t ino)
+{
+ return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
+ (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
+ (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
+}
+
STATIC int
xfs_bulkstat_one_iget(
xfs_mount_t *mp, /* mount point for filesystem */
@@ -52,7 +62,8 @@ xfs_bulkstat_one_iget(
bhv_vnode_t *vp;
int error;
- error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
+ error = xfs_iget(mp, NULL, ino,
+ XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
if (error) {
*stat = BULKSTAT_RV_NOTHING;
return error;
@@ -212,17 +223,12 @@ xfs_bulkstat_one(
xfs_dinode_t *dip; /* dinode inode pointer */
dip = (xfs_dinode_t *)dibuff;
+ *stat = BULKSTAT_RV_NOTHING;
- if (!buffer || ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
- (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
- (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))) {
- *stat = BULKSTAT_RV_NOTHING;
+ if (!buffer || xfs_internal_inum(mp, ino))
return XFS_ERROR(EINVAL);
- }
- if (ubsize < sizeof(*buf)) {
- *stat = BULKSTAT_RV_NOTHING;
+ if (ubsize < sizeof(*buf))
return XFS_ERROR(ENOMEM);
- }
buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
@@ -238,8 +244,7 @@ xfs_bulkstat_one(
}
if (copy_to_user(buffer, buf, sizeof(*buf))) {
- *stat = BULKSTAT_RV_NOTHING;
- error = EFAULT;
+ error = EFAULT;
goto out_free;
}
@@ -253,6 +258,46 @@ xfs_bulkstat_one(
}
/*
+ * Test to see whether we can use the ondisk inode directly, based
+ * on the given bulkstat flags, filling in dipp accordingly.
+ * Returns zero if the inode is dodgey.
+ */
+STATIC int
+xfs_bulkstat_use_dinode(
+ xfs_mount_t *mp,
+ int flags,
+ xfs_buf_t *bp,
+ int clustidx,
+ xfs_dinode_t **dipp)
+{
+ xfs_dinode_t *dip;
+ unsigned int aformat;
+
+ *dipp = NULL;
+ if (!bp || (flags & BULKSTAT_FG_IGET))
+ return 1;
+ dip = (xfs_dinode_t *)
+ xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
+ if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC ||
+ !XFS_DINODE_GOOD_VERSION(
+ INT_GET(dip->di_core.di_version, ARCH_CONVERT)))
+ return 0;
+ if (flags & BULKSTAT_FG_QUICK) {
+ *dipp = dip;
+ return 1;
+ }
+ /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
+ aformat = INT_GET(dip->di_core.di_aformat, ARCH_CONVERT);
+ if ((XFS_CFORK_Q(&dip->di_core) == 0) ||
+ (aformat == XFS_DINODE_FMT_LOCAL) ||
+ (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) {
+ *dipp = dip;
+ return 1;
+ }
+ return 1;
+}
+
+/*
* Return stat information in bulk (by-inode) for the filesystem.
*/
int /* error status */
@@ -284,10 +329,11 @@ xfs_bulkstat(
xfs_agino_t gino; /* current btree rec's start inode */
int i; /* loop index */
int icount; /* count of inodes good in irbuf */
+ size_t irbsize; /* size of irec buffer in bytes */
xfs_ino_t ino; /* inode number (filesystem) */
- xfs_inobt_rec_t *irbp; /* current irec buffer pointer */
- xfs_inobt_rec_t *irbuf; /* start of irec buffer */
- xfs_inobt_rec_t *irbufend; /* end of good irec buffer entries */
+ xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */
+ xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
+ xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */
xfs_ino_t lastino=0; /* last inode number returned */
int nbcluster; /* # of blocks in a cluster */
int nicluster; /* # of inodes in a cluster */
@@ -328,13 +374,10 @@ xfs_bulkstat(
(XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
nimask = ~(nicluster - 1);
nbcluster = nicluster >> mp->m_sb.sb_inopblog;
- /*
- * Allocate a page-sized buffer for inode btree records.
- * We could try allocating something smaller, but for normal
- * calls we'll always (potentially) need the whole page.
- */
- irbuf = kmem_alloc(NBPC, KM_SLEEP);
- nirbuf = NBPC / sizeof(*irbuf);
+ irbuf = kmem_zalloc_greedy(&irbsize, NBPC, NBPC * 4,
+ KM_SLEEP | KM_MAYFAIL | KM_LARGE);
+ nirbuf = irbsize / sizeof(*irbuf);
+
/*
* Loop over the allocation groups, starting from the last
* inode returned; 0 means start of the allocation group.
@@ -358,7 +401,7 @@ xfs_bulkstat(
* Allocate and initialize a btree cursor for ialloc btree.
*/
cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO,
- (xfs_inode_t *)0, 0);
+ (xfs_inode_t *)0, 0);
irbp = irbuf;
irbufend = irbuf + nirbuf;
end_of_ag = 0;
@@ -395,9 +438,9 @@ xfs_bulkstat(
gcnt++;
}
gfree |= XFS_INOBT_MASKN(0, chunkidx);
- INT_SET(irbp->ir_startino, ARCH_CONVERT, gino);
- INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt);
- INT_SET(irbp->ir_free, ARCH_CONVERT, gfree);
+ irbp->ir_startino = gino;
+ irbp->ir_freecount = gcnt;
+ irbp->ir_free = gfree;
irbp++;
agino = gino + XFS_INODES_PER_CHUNK;
icount = XFS_INODES_PER_CHUNK - gcnt;
@@ -451,11 +494,27 @@ xfs_bulkstat(
}
/*
* If this chunk has any allocated inodes, save it.
+ * Also start read-ahead now for this chunk.
*/
if (gcnt < XFS_INODES_PER_CHUNK) {
- INT_SET(irbp->ir_startino, ARCH_CONVERT, gino);
- INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt);
- INT_SET(irbp->ir_free, ARCH_CONVERT, gfree);
+ /*
+ * Loop over all clusters in the next chunk.
+ * Do a readahead if there are any allocated
+ * inodes in that cluster.
+ */
+ for (agbno = XFS_AGINO_TO_AGBNO(mp, gino),
+ chunkidx = 0;
+ chunkidx < XFS_INODES_PER_CHUNK;
+ chunkidx += nicluster,
+ agbno += nbcluster) {
+ if (XFS_INOBT_MASKN(chunkidx,
+ nicluster) & ~gfree)
+ xfs_btree_reada_bufs(mp, agno,
+ agbno, nbcluster);
+ }
+ irbp->ir_startino = gino;
+ irbp->ir_freecount = gcnt;
+ irbp->ir_free = gfree;
irbp++;
icount += XFS_INODES_PER_CHUNK - gcnt;
}
@@ -479,33 +538,11 @@ xfs_bulkstat(
for (irbp = irbuf;
irbp < irbufend && ubleft >= statstruct_size; irbp++) {
/*
- * Read-ahead the next chunk's worth of inodes.
- */
- if (&irbp[1] < irbufend) {
- /*
- * Loop over all clusters in the next chunk.
- * Do a readahead if there are any allocated
- * inodes in that cluster.
- */
- for (agbno = XFS_AGINO_TO_AGBNO(mp,
- INT_GET(irbp[1].ir_startino, ARCH_CONVERT)),
- chunkidx = 0;
- chunkidx < XFS_INODES_PER_CHUNK;
- chunkidx += nicluster,
- agbno += nbcluster) {
- if (XFS_INOBT_MASKN(chunkidx,
- nicluster) &
- ~(INT_GET(irbp[1].ir_free, ARCH_CONVERT)))
- xfs_btree_reada_bufs(mp, agno,
- agbno, nbcluster);
- }
- }
- /*
* Now process this chunk of inodes.
*/
- for (agino = INT_GET(irbp->ir_startino, ARCH_CONVERT), chunkidx = 0, clustidx = 0;
+ for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
ubleft > 0 &&
- INT_GET(irbp->ir_freecount, ARCH_CONVERT) < XFS_INODES_PER_CHUNK;
+ irbp->ir_freecount < XFS_INODES_PER_CHUNK;
chunkidx++, clustidx++, agino++) {
ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
/*
@@ -525,11 +562,12 @@ xfs_bulkstat(
*/
if ((chunkidx & (nicluster - 1)) == 0) {
agbno = XFS_AGINO_TO_AGBNO(mp,
- INT_GET(irbp->ir_startino, ARCH_CONVERT)) +
+ irbp->ir_startino) +
((chunkidx & nimask) >>
mp->m_sb.sb_inopblog);
- if (flags & BULKSTAT_FG_QUICK) {
+ if (flags & (BULKSTAT_FG_QUICK |
+ BULKSTAT_FG_INLINE)) {
ino = XFS_AGINO_TO_INO(mp, agno,
agino);
bno = XFS_AGB_TO_DADDR(mp, agno,
@@ -543,6 +581,7 @@ xfs_bulkstat(
KM_SLEEP);
ip->i_ino = ino;
ip->i_mount = mp;
+ spin_lock_init(&ip->i_flags_lock);
if (bp)
xfs_buf_relse(bp);
error = xfs_itobp(mp, NULL, ip,
@@ -564,30 +603,34 @@ xfs_bulkstat(
/*
* Skip if this inode is free.
*/
- if (XFS_INOBT_MASK(chunkidx) & INT_GET(irbp->ir_free, ARCH_CONVERT))
+ if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
continue;
/*
* Count used inodes as free so we can tell
* when the chunk is used up.
*/
- INT_MOD(irbp->ir_freecount, ARCH_CONVERT, +1);
+ irbp->ir_freecount++;
ino = XFS_AGINO_TO_INO(mp, agno, agino);
bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
- if (flags & BULKSTAT_FG_QUICK) {
- dip = (xfs_dinode_t *)xfs_buf_offset(bp,
- (clustidx << mp->m_sb.sb_inodelog));
-
- if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT)
- != XFS_DINODE_MAGIC
- || !XFS_DINODE_GOOD_VERSION(
- INT_GET(dip->di_core.di_version, ARCH_CONVERT)))
- continue;
+ if (!xfs_bulkstat_use_dinode(mp, flags, bp,
+ clustidx, &dip))
+ continue;
+ /*
+ * If we need to do an iget, cannot hold bp.
+ * Drop it, until starting the next cluster.
+ */
+ if ((flags & BULKSTAT_FG_INLINE) && !dip) {
+ if (bp)
+ xfs_buf_relse(bp);
+ bp = NULL;
}
/*
* Get the inode and fill in a single buffer.
* BULKSTAT_FG_QUICK uses dip to fill it in.
* BULKSTAT_FG_IGET uses igets.
+ * BULKSTAT_FG_INLINE uses dip if we have an
+ * inline attr fork, else igets.
* See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
* This is also used to count inodes/blks, etc
* in xfs_qm_quotacheck.
@@ -597,8 +640,15 @@ xfs_bulkstat(
ubleft, private_data,
bno, &ubused, dip, &fmterror);
if (fmterror == BULKSTAT_RV_NOTHING) {
- if (error == ENOMEM)
+ if (error == EFAULT) {
+ ubleft = 0;
+ rval = error;
+ break;
+ }
+ else if (error == ENOMEM)
ubleft = 0;
+ else
+ lastino = ino;
continue;
}
if (fmterror == BULKSTAT_RV_GIVEUP) {
@@ -633,7 +683,7 @@ xfs_bulkstat(
/*
* Done, we're either out of filesystem or space to put the data.
*/
- kmem_free(irbuf, NBPC);
+ kmem_free(irbuf, irbsize);
*ubcountp = ubelem;
if (agno >= mp->m_sb.sb_agcount) {
/*
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index be5f12e07d22..f25a28862a17 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -36,15 +36,16 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
/*
* Values for stat return value.
*/
-#define BULKSTAT_RV_NOTHING 0
-#define BULKSTAT_RV_DIDONE 1
-#define BULKSTAT_RV_GIVEUP 2
+#define BULKSTAT_RV_NOTHING 0
+#define BULKSTAT_RV_DIDONE 1
+#define BULKSTAT_RV_GIVEUP 2
/*
* Values for bulkstat flag argument.
*/
-#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */
-#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */
+#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */
+#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */
+#define BULKSTAT_FG_INLINE 0x4 /* No iget if inline attrs */
/*
* Return stat information in bulk (by-inode) for the filesystem.
@@ -80,6 +81,11 @@ xfs_bulkstat_one(
void *dibuff,
int *stat);
+int
+xfs_internal_inum(
+ xfs_mount_t *mp,
+ xfs_ino_t ino);
+
int /* error status */
xfs_inumbers(
xfs_mount_t *mp, /* mount point for filesystem */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 21ac1a67e3e0..c48bf61f17bd 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -617,7 +617,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
reg[0].i_len = sizeof(magic);
XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT);
- error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, 0);
+ error = xfs_log_reserve(mp, 600, 1, &tic,
+ XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
if (!error) {
/* remove inited flag */
((xlog_ticket_t *)tic)->t_flags = 0;
@@ -655,8 +656,11 @@ xfs_log_unmount_write(xfs_mount_t *mp)
} else {
LOG_UNLOCK(log, s);
}
- if (tic)
+ if (tic) {
+ xlog_trace_loggrant(log, tic, "unmount rec");
+ xlog_ungrant_log_space(log, tic);
xlog_state_put_ticket(log, tic);
+ }
} else {
/*
* We're already in forced_shutdown mode, couldn't
@@ -1196,7 +1200,7 @@ xlog_alloc_log(xfs_mount_t *mp,
kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
iclog = *iclogp;
iclog->hic_data = (xlog_in_core_2_t *)
- kmem_zalloc(iclogsize, KM_SLEEP);
+ kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
iclog->ic_prev = prev_iclog;
prev_iclog = iclog;
@@ -2212,9 +2216,13 @@ xlog_state_do_callback(
iclog = iclog->ic_next;
} while (first_iclog != iclog);
- if (repeats && (repeats % 10) == 0) {
+
+ if (repeats > 5000) {
+ flushcnt += repeats;
+ repeats = 0;
xfs_fs_cmn_err(CE_WARN, log->l_mp,
- "xlog_state_do_callback: looping %d", repeats);
+ "%s: possible infinite loop (%d iterations)",
+ __FUNCTION__, flushcnt);
}
} while (!ioerrors && loopdidcallbacks);
@@ -2246,6 +2254,7 @@ xlog_state_do_callback(
}
#endif
+ flushcnt = 0;
if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) {
flushcnt = log->l_flushcnt;
log->l_flushcnt = 0;
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index eacb3d4987f2..ebbe93f4f97b 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -48,16 +48,10 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
*/
/*
- * Flags to xfs_log_mount
- */
-#define XFS_LOG_RECOVER 0x1
-
-/*
* Flags to xfs_log_done()
*/
#define XFS_LOG_REL_PERM_RESERV 0x1
-
/*
* Flags to xfs_log_reserve()
*
@@ -70,8 +64,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
#define XFS_LOG_SLEEP 0x0
#define XFS_LOG_NOSLEEP 0x1
#define XFS_LOG_PERM_RESERV 0x2
-#define XFS_LOG_RESV_ALL (XFS_LOG_NOSLEEP|XFS_LOG_PERM_RESERV)
-
/*
* Flags to xfs_log_force()
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 34bcbf50789c..9bd3cdf11a87 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -32,7 +32,6 @@ struct xfs_mount;
#define XLOG_MIN_ICLOGS 2
#define XLOG_MED_ICLOGS 4
#define XLOG_MAX_ICLOGS 8
-#define XLOG_CALLBACK_SIZE 10
#define XLOG_HEADER_MAGIC_NUM 0xFEEDbabe /* Invalid cycle number */
#define XLOG_VERSION_1 1
#define XLOG_VERSION_2 2 /* Large IClogs, Log sunit */
@@ -149,9 +148,6 @@ struct xfs_mount;
#define XLOG_WAS_CONT_TRANS 0x08 /* Cont this trans into new region */
#define XLOG_END_TRANS 0x10 /* End a continued transaction */
#define XLOG_UNMOUNT_TRANS 0x20 /* Unmount a filesystem transaction */
-#define XLOG_SKIP_TRANS (XLOG_COMMIT_TRANS | XLOG_CONTINUE_TRANS | \
- XLOG_WAS_CONT_TRANS | XLOG_END_TRANS | \
- XLOG_UNMOUNT_TRANS)
#ifdef __KERNEL__
/*
@@ -506,6 +502,12 @@ extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
#define XLOG_TRACE_SLEEP_FLUSH 3
#define XLOG_TRACE_WAKE_FLUSH 4
+/*
+ * Unmount record type is used as a pseudo transaction type for the ticket.
+ * It's value must be outside the range of XFS_TRANS_* values.
+ */
+#define XLOG_UNMOUNT_REC_TYPE (-1U)
+
#endif /* __KERNEL__ */
#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b2bd4be4200a..e5f396ff9a3d 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -331,7 +331,7 @@ typedef struct xfs_mount {
xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */
lock_t m_agirotor_lock;/* .. and lock protecting it */
xfs_agnumber_t m_maxagi; /* highest inode alloc group */
- uint m_ihsize; /* size of next field */
+ size_t m_ihsize; /* size of next field */
struct xfs_ihash *m_ihash; /* fs private inode hash table*/
struct xfs_inode *m_inodes; /* active inode list */
struct list_head m_del_inodes; /* inodes to reclaim */
@@ -541,7 +541,8 @@ static inline xfs_mount_t *xfs_bhvtom(bhv_desc_t *bdp)
#define XFS_VFSTOM(vfs) xfs_vfstom(vfs)
static inline xfs_mount_t *xfs_vfstom(bhv_vfs_t *vfs)
{
- return XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfs), &xfs_vfsops));
+ return XFS_BHVTOM(bhv_lookup_range(VFS_BHVHEAD(vfs),
+ VFS_POSITION_XFS, VFS_POSITION_XFS));
}
#define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d)
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index acb853b33ebb..9dcb32aa4e2e 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -281,8 +281,6 @@ typedef struct xfs_qoff_logformat {
XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\
XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\
XFS_GQUOTA_ACCT)
-#define XFS_MOUNT_QUOTA_MASK (XFS_MOUNT_QUOTA_ALL | XFS_UQUOTA_ACTIVE | \
- XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE)
/*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 5a0b678956e0..880c73271c05 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1948,7 +1948,7 @@ xfs_growfs_rt(
*/
nrextents = nrblocks;
do_div(nrextents, in->extsize);
- nrbmblocks = roundup_64(nrextents, NBBY * sbp->sb_blocksize);
+ nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize);
nrextslog = xfs_highbit32(nrextents);
nrsumlevels = nrextslog + 1;
nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks;
@@ -1976,7 +1976,10 @@ xfs_growfs_rt(
if ((error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks,
mp->m_sb.sb_rsumino)))
return error;
- nmp = NULL;
+ /*
+ * Allocate a new (fake) mount/sb.
+ */
+ nmp = kmem_alloc(sizeof(*nmp), KM_SLEEP);
/*
* Loop over the bitmap blocks.
* We will do everything one bitmap block at a time.
@@ -1987,10 +1990,6 @@ xfs_growfs_rt(
((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
bmbno < nrbmblocks;
bmbno++) {
- /*
- * Allocate a new (fake) mount/sb.
- */
- nmp = kmem_alloc(sizeof(*nmp), KM_SLEEP);
*nmp = *mp;
nsbp = &nmp->m_sb;
/*
@@ -2018,13 +2017,13 @@ xfs_growfs_rt(
cancelflags = 0;
if ((error = xfs_trans_reserve(tp, 0,
XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0)))
- goto error_exit;
+ break;
/*
* Lock out other callers by grabbing the bitmap inode lock.
*/
if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
XFS_ILOCK_EXCL, &ip)))
- goto error_exit;
+ break;
ASSERT(ip == mp->m_rbmip);
/*
* Update the bitmap inode's size.
@@ -2038,7 +2037,7 @@ xfs_growfs_rt(
*/
if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0,
XFS_ILOCK_EXCL, &ip)))
- goto error_exit;
+ break;
ASSERT(ip == mp->m_rsumip);
/*
* Update the summary inode's size.
@@ -2053,7 +2052,7 @@ xfs_growfs_rt(
mp->m_rsumlevels != nmp->m_rsumlevels) {
error = xfs_rtcopy_summary(mp, nmp, tp);
if (error)
- goto error_exit;
+ break;
}
/*
* Update superblock fields.
@@ -2080,18 +2079,13 @@ xfs_growfs_rt(
error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents,
nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
if (error)
- goto error_exit;
+ break;
/*
* Mark more blocks free in the superblock.
*/
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS,
nsbp->sb_rextents - sbp->sb_rextents);
/*
- * Free the fake mp structure.
- */
- kmem_free(nmp, sizeof(*nmp));
- nmp = NULL;
- /*
* Update mp values into the real mp structure.
*/
mp->m_rsumlevels = nrsumlevels;
@@ -2101,15 +2095,15 @@ xfs_growfs_rt(
*/
xfs_trans_commit(tp, 0, NULL);
}
- return 0;
+
+ if (error)
+ xfs_trans_cancel(tp, cancelflags);
/*
- * Error paths come here.
+ * Free the fake mp structure.
*/
-error_exit:
- if (nmp)
- kmem_free(nmp, sizeof(*nmp));
- xfs_trans_cancel(tp, cancelflags);
+ kmem_free(nmp, sizeof(*nmp));
+
return error;
}
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index bf168a91ddb8..467854b45c8f 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -60,10 +60,6 @@ struct xfs_mount;
XFS_SB_VERSION_LOGV2BIT | \
XFS_SB_VERSION_SECTORBIT | \
XFS_SB_VERSION_MOREBITSBIT)
-#define XFS_SB_VERSION_OKSASHBITS \
- (XFS_SB_VERSION_NUMBITS | \
- XFS_SB_VERSION_REALFBITS | \
- XFS_SB_VERSION_OKSASHFBITS)
#define XFS_SB_VERSION_OKREALBITS \
(XFS_SB_VERSION_NUMBITS | \
XFS_SB_VERSION_OKREALFBITS | \
@@ -81,9 +77,6 @@ struct xfs_mount;
#define XFS_SB_VERSION2_RESERVED2BIT 0x00000002
#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004
#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */
-#define XFS_SB_VERSION2_SASHFBITS 0xff000000 /* Mask: features that
- require changing
- PROM and SASH */
#define XFS_SB_VERSION2_OKREALFBITS \
(XFS_SB_VERSION2_ATTR2BIT)
@@ -238,12 +231,6 @@ static inline int xfs_sb_good_version(xfs_sb_t *sbp)
}
#endif /* __KERNEL__ */
-#define XFS_SB_GOOD_SASH_VERSION(sbp) \
- ((((sbp)->sb_versionnum >= XFS_SB_VERSION_1) && \
- ((sbp)->sb_versionnum <= XFS_SB_VERSION_3)) || \
- ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
- !((sbp)->sb_versionnum & ~XFS_SB_VERSION_OKSASHBITS)))
-
#define XFS_SB_VERSION_TONEW(v) xfs_sb_version_tonew(v)
static inline unsigned xfs_sb_version_tonew(unsigned v)
{
@@ -461,15 +448,6 @@ static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
* File system sector to basic block conversions.
*/
#define XFS_FSS_TO_BB(mp,sec) ((sec) << (mp)->m_sectbb_log)
-#define XFS_BB_TO_FSS(mp,bb) \
- (((bb) + (XFS_FSS_TO_BB(mp,1) - 1)) >> (mp)->m_sectbb_log)
-#define XFS_BB_TO_FSST(mp,bb) ((bb) >> (mp)->m_sectbb_log)
-
-/*
- * File system sector to byte conversions.
- */
-#define XFS_FSS_TO_B(mp,sectno) ((xfs_fsize_t)(sectno) << (mp)->m_sb.sb_sectlog)
-#define XFS_B_TO_FSST(mp,b) (((__uint64_t)(b)) >> (mp)->m_sb.sb_sectlog)
/*
* File system block to basic block conversions.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 9dc88b380608..c68e00105d23 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -149,7 +149,6 @@ typedef struct xfs_item_ops {
void (*iop_unlock)(xfs_log_item_t *);
xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
void (*iop_push)(xfs_log_item_t *);
- void (*iop_abort)(xfs_log_item_t *);
void (*iop_pushbuf)(xfs_log_item_t *);
void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
} xfs_item_ops_t;
@@ -163,7 +162,6 @@ typedef struct xfs_item_ops {
#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
-#define IOP_ABORT(ip) (*(ip)->li_ops->iop_abort)(ip)
#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 558c87ff0c41..fc39b166d403 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -276,7 +276,7 @@ xfs_trans_update_ail(
xfs_mount_t *mp,
xfs_log_item_t *lip,
xfs_lsn_t lsn,
- unsigned long s)
+ unsigned long s) __releases(mp->m_ail_lock)
{
xfs_ail_entry_t *ailp;
xfs_log_item_t *dlip=NULL;
@@ -328,7 +328,7 @@ void
xfs_trans_delete_ail(
xfs_mount_t *mp,
xfs_log_item_t *lip,
- unsigned long s)
+ unsigned long s) __releases(mp->m_ail_lock)
{
xfs_ail_entry_t *ailp;
xfs_log_item_t *dlip;
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 13edab8a9e94..447ac4308c91 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -46,11 +46,13 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
/*
* From xfs_trans_ail.c
*/
-void xfs_trans_update_ail(struct xfs_mount *,
- struct xfs_log_item *, xfs_lsn_t,
- unsigned long);
-void xfs_trans_delete_ail(struct xfs_mount *,
- struct xfs_log_item *, unsigned long);
+void xfs_trans_update_ail(struct xfs_mount *mp,
+ struct xfs_log_item *lip, xfs_lsn_t lsn,
+ unsigned long s)
+ __releases(mp->m_ail_lock);
+void xfs_trans_delete_ail(struct xfs_mount *mp,
+ struct xfs_log_item *lip, unsigned long s)
+ __releases(mp->m_ail_lock);
struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *);
struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *,
struct xfs_log_item *, int *, int *);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index a34796e57afb..62336a4cc5a4 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1922,7 +1922,7 @@ xfs_showargs(
}
if (mp->m_flags & XFS_MOUNT_IHASHSIZE)
- seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", mp->m_ihsize);
+ seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", (int)mp->m_ihsize);
if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 23cfa5837728..061e2ffdd1de 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2366,10 +2366,15 @@ xfs_remove(
namelen = VNAMELEN(dentry);
+ if (!xfs_get_dir_entry(dentry, &ip)) {
+ dm_di_mode = ip->i_d.di_mode;
+ IRELE(ip);
+ }
+
if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) {
error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp,
DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
- name, NULL, 0, 0, 0);
+ name, NULL, dm_di_mode, 0, 0);
if (error)
return error;
}
@@ -2995,7 +3000,7 @@ xfs_rmdir(
int cancel_flags;
int committed;
bhv_vnode_t *dir_vp;
- int dm_di_mode = 0;
+ int dm_di_mode = S_IFDIR;
int last_cdp_link;
int namelen;
uint resblks;
@@ -3010,11 +3015,16 @@ xfs_rmdir(
return XFS_ERROR(EIO);
namelen = VNAMELEN(dentry);
+ if (!xfs_get_dir_entry(dentry, &cdp)) {
+ dm_di_mode = cdp->i_d.di_mode;
+ IRELE(cdp);
+ }
+
if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) {
error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
dir_vp, DM_RIGHT_NULL,
NULL, DM_RIGHT_NULL,
- name, NULL, 0, 0, 0);
+ name, NULL, dm_di_mode, 0, 0);
if (error)
return XFS_ERROR(error);
}
@@ -3834,7 +3844,9 @@ xfs_reclaim(
XFS_MOUNT_ILOCK(mp);
vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
+ spin_lock(&ip->i_flags_lock);
ip->i_flags |= XFS_IRECLAIMABLE;
+ spin_unlock(&ip->i_flags_lock);
XFS_MOUNT_IUNLOCK(mp);
}
return 0;
@@ -3859,8 +3871,10 @@ xfs_finish_reclaim(
* us.
*/
write_lock(&ih->ih_lock);
+ spin_lock(&ip->i_flags_lock);
if ((ip->i_flags & XFS_IRECLAIM) ||
(!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) {
+ spin_unlock(&ip->i_flags_lock);
write_unlock(&ih->ih_lock);
if (locked) {
xfs_ifunlock(ip);
@@ -3869,6 +3883,7 @@ xfs_finish_reclaim(
return 1;
}
ip->i_flags |= XFS_IRECLAIM;
+ spin_unlock(&ip->i_flags_lock);
write_unlock(&ih->ih_lock);
/*
@@ -4272,7 +4287,7 @@ xfs_free_file_space(
xfs_mount_t *mp;
int nimap;
uint resblks;
- int rounding;
+ uint rounding;
int rt;
xfs_fileoff_t startoffset_fsb;
xfs_trans_t *tp;
@@ -4313,8 +4328,7 @@ xfs_free_file_space(
vn_iowait(vp); /* wait for the completion of any pending DIOs */
}
- rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog),
- (__uint8_t)NBPP);
+ rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP);
ilen = len + (offset & (rounding - 1));
ioffset = offset & ~(rounding - 1);
if (ilen & (rounding - 1))