From c2bd6c11cd05fed1eeb83230e87351357d72bb48 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Apr 2012 23:52:50 -0400 Subject: [PATCH 01/95] switch do_fsync() to fget_light() Signed-off-by: Al Viro --- fs/sync.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/sync.c b/fs/sync.c index 0e8db939d96f8f..11e3d1c449018d 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -188,11 +188,12 @@ static int do_fsync(unsigned int fd, int datasync) { struct file *file; int ret = -EBADF; + int fput_needed; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (file) { ret = vfs_fsync(file, datasync); - fput(file); + fput_light(file, fput_needed); } return ret; } From 863ced7fe762f80e67bc9171e47c7d80032cce12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:40:32 -0400 Subject: [PATCH 02/95] switch readdir/getdents to fget_light/fput_light Signed-off-by: Al Viro --- fs/compat.c | 33 ++++++++++++++------------------- fs/readdir.c | 33 ++++++++++++++------------------- 2 files changed, 28 insertions(+), 38 deletions(-) diff --git a/fs/compat.c b/fs/compat.c index 0781e619a62a48..9f77486414bda8 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -871,12 +871,12 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, { int error; struct file *file; + int fput_needed; struct compat_readdir_callback buf; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.result = 0; buf.dirent = dirent; @@ -885,8 +885,7 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, if (buf.result) error = buf.result; - fput(file); -out: + fput_light(file, fput_needed); return error; } @@ -953,16 +952,15 @@ asmlinkage long compat_sys_getdents(unsigned int fd, struct file * file; struct compat_linux_dirent __user * lastdirent; struct compat_getdents_callback buf; + int fput_needed; int error; - error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - goto out; + return -EFAULT; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.current_dir = dirent; buf.previous = NULL; @@ -979,8 +977,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, else error = count - buf.count; } - fput(file); -out: + fput_light(file, fput_needed); return error; } @@ -1041,16 +1038,15 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, struct file * file; struct linux_dirent64 __user * lastdirent; struct compat_getdents_callback64 buf; + int fput_needed; int error; - error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - goto out; + return -EFAULT; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.current_dir = dirent; buf.previous = NULL; @@ -1068,8 +1064,7 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, else error = count - buf.count; } - fput(file); -out: + fput_light(file, fput_needed); return error; } #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ diff --git a/fs/readdir.c b/fs/readdir.c index cc0a8227cddf68..39e3370d79cf1e 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -108,11 +108,11 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, int error; struct file * file; struct readdir_callback buf; + int fput_needed; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.result = 0; buf.dirent = dirent; @@ -121,8 +121,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, if (buf.result) error = buf.result; - fput(file); -out: + fput_light(file, fput_needed); return error; } @@ -195,16 +194,15 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, struct file * file; struct linux_dirent __user * lastdirent; struct getdents_callback buf; + int fput_needed; int error; - error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - goto out; + return -EFAULT; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.current_dir = dirent; buf.previous = NULL; @@ -221,8 +219,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, else error = count - buf.count; } - fput(file); -out: + fput_light(file, fput_needed); return error; } @@ -278,16 +275,15 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, struct file * file; struct linux_dirent64 __user * lastdirent; struct getdents_callback64 buf; + int fput_needed; int error; - error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - goto out; + return -EFAULT; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.current_dir = dirent; buf.previous = NULL; @@ -305,7 +301,6 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, else error = count - buf.count; } - fput(file); -out: + fput_light(file, fput_needed); return error; } From 7449af1e8b795abf4ef829ac507861f34dca30b4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:41:25 -0400 Subject: [PATCH 03/95] switch xattr syscalls to fget_light/fput_light Signed-off-by: Al Viro --- fs/xattr.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index 3c8c1cc333c7c7..1d7ac379045879 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -399,11 +399,12 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, const void __user *,value, size_t, size, int, flags) { + int fput_needed; struct file *f; struct dentry *dentry; int error = -EBADF; - f = fget(fd); + f = fget_light(fd, &fput_needed); if (!f) return error; dentry = f->f_path.dentry; @@ -413,7 +414,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, error = setxattr(dentry, name, value, size, flags); mnt_drop_write_file(f); } - fput(f); + fput_light(f, fput_needed); return error; } @@ -486,15 +487,16 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size) { + int fput_needed; struct file *f; ssize_t error = -EBADF; - f = fget(fd); + f = fget_light(fd, &fput_needed); if (!f) return error; audit_inode(NULL, f->f_path.dentry); error = getxattr(f->f_path.dentry, name, value, size); - fput(f); + fput_light(f, fput_needed); return error; } @@ -566,15 +568,16 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) { + int fput_needed; struct file *f; ssize_t error = -EBADF; - f = fget(fd); + f = fget_light(fd, &fput_needed); if (!f) return error; audit_inode(NULL, f->f_path.dentry); error = listxattr(f->f_path.dentry, list, size); - fput(f); + fput_light(f, fput_needed); return error; } @@ -634,11 +637,12 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) { + int fput_needed; struct file *f; struct dentry *dentry; int error = -EBADF; - f = fget(fd); + f = fget_light(fd, &fput_needed); if (!f) return error; dentry = f->f_path.dentry; @@ -648,7 +652,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) error = removexattr(dentry, name); mnt_drop_write_file(f); } - fput(f); + fput_light(f, fput_needed); return error; } From 545ec2c7945bf7d22d0779e7dc9bf16f7dd9ae34 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:42:19 -0400 Subject: [PATCH 04/95] switch fcntl to fget_raw_light/fput_light Signed-off-by: Al Viro --- fs/fcntl.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index d078b75572a75e..81b70e665bf000 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -442,28 +442,24 @@ static int check_fcntl_cmd(unsigned cmd) SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file *filp; + int fput_needed; long err = -EBADF; - filp = fget_raw(fd); + filp = fget_raw_light(fd, &fput_needed); if (!filp) goto out; if (unlikely(filp->f_mode & FMODE_PATH)) { - if (!check_fcntl_cmd(cmd)) { - fput(filp); - goto out; - } + if (!check_fcntl_cmd(cmd)) + goto out1; } err = security_file_fcntl(filp, cmd, arg); - if (err) { - fput(filp); - return err; - } + if (!err) + err = do_fcntl(fd, cmd, arg, filp); - err = do_fcntl(fd, cmd, arg, filp); - - fput(filp); +out1: + fput_light(filp, fput_needed); out: return err; } @@ -473,26 +469,21 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file * filp; - long err; + long err = -EBADF; + int fput_needed; - err = -EBADF; - filp = fget_raw(fd); + filp = fget_raw_light(fd, &fput_needed); if (!filp) goto out; if (unlikely(filp->f_mode & FMODE_PATH)) { - if (!check_fcntl_cmd(cmd)) { - fput(filp); - goto out; - } + if (!check_fcntl_cmd(cmd)) + goto out1; } err = security_file_fcntl(filp, cmd, arg); - if (err) { - fput(filp); - return err; - } - err = -EBADF; + if (err) + goto out1; switch (cmd) { case F_GETLK64: @@ -507,7 +498,8 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, err = do_fcntl(fd, cmd, arg, filp); break; } - fput(filp); +out1: + fput_light(filp, fput_needed); out: return err; } From 20ba5d736f5a42abbee3e14384ff2d0fdaef2e6d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:44:12 -0400 Subject: [PATCH 05/95] switch signalfd4() to fget_light/fput_light Signed-off-by: Al Viro --- fs/signalfd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 7ae2a574cb25a6..9f35a37173de0d 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -269,12 +269,13 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, if (ufd < 0) kfree(ctx); } else { - struct file *file = fget(ufd); + int fput_needed; + struct file *file = fget_light(ufd, &fput_needed); if (!file) return -EBADF; ctx = file->private_data; if (file->f_op != &signalfd_fops) { - fput(file); + fput_light(file, fput_needed); return -EINVAL; } spin_lock_irq(¤t->sighand->siglock); @@ -282,7 +283,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, spin_unlock_irq(¤t->sighand->siglock); wake_up(¤t->sighand->signalfd_wqh); - fput(file); + fput_light(file, fput_needed); } return ufd; From bdc689594bf3ce967bc3a17ba5db3f23222dede0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:46:53 -0400 Subject: [PATCH 06/95] switch flock to fget_light/fput_light Signed-off-by: Al Viro --- fs/locks.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 4f441e46cef47b..814c51d0de4739 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1636,12 +1636,13 @@ EXPORT_SYMBOL(flock_lock_file_wait); SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) { struct file *filp; + int fput_needed; struct file_lock *lock; int can_sleep, unlock; int error; error = -EBADF; - filp = fget(fd); + filp = fget_light(fd, &fput_needed); if (!filp) goto out; @@ -1674,7 +1675,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) locks_free_lock(lock); out_putf: - fput(filp); + fput_light(filp, fput_needed); out: return error; } From 0aa2ee5f0a341a7fc081a499b221d29784ed711d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:47:27 -0400 Subject: [PATCH 07/95] switch statfs to fget_light/fput_light Signed-off-by: Al Viro --- fs/statfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/statfs.c b/fs/statfs.c index 43e6b6fe4e8556..95ad5c0e586c9f 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -87,11 +87,12 @@ int user_statfs(const char __user *pathname, struct kstatfs *st) int fd_statfs(int fd, struct kstatfs *st) { - struct file *file = fget(fd); + int fput_needed; + struct file *file = fget_light(fd, &fput_needed); int error = -EBADF; if (file) { error = vfs_statfs(&file->f_path, st); - fput(file); + fput_light(file, fput_needed); } return error; } From c217a2a004d98d09dfceec3a023c563ed800e833 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:47:57 -0400 Subject: [PATCH 08/95] switch utimes() to fget_light/fput_light Signed-off-by: Al Viro --- fs/utimes.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/utimes.c b/fs/utimes.c index ba653f3dc1bc9c..fa4dbe451e278e 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -140,18 +140,19 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times, goto out; if (filename == NULL && dfd != AT_FDCWD) { + int fput_needed; struct file *file; if (flags & AT_SYMLINK_NOFOLLOW) goto out; - file = fget(dfd); + file = fget_light(dfd, &fput_needed); error = -EBADF; if (!file) goto out; error = utimes_common(&file->f_path, times); - fput(file); + fput_light(file, fput_needed); } else { struct path path; int lookup_flags = 0; From 77ba78776e90e8de541f13b326e284c74286252f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 06:24:04 -0400 Subject: [PATCH 09/95] xfs: switch to proper __bitwise type for KM_... flags Signed-off-by: Al Viro --- fs/xfs/kmem.c | 10 +++++----- fs/xfs/kmem.h | 21 +++++++++++---------- fs/xfs/xfs_log.c | 2 +- fs/xfs/xfs_log_priv.h | 2 +- fs/xfs/xfs_trans.c | 2 +- fs/xfs/xfs_trans.h | 2 +- 6 files changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index a907de565db3bf..4a7286c1dc80d2 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -46,7 +46,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) } void * -kmem_alloc(size_t size, unsigned int __nocast flags) +kmem_alloc(size_t size, xfs_km_flags_t flags) { int retries = 0; gfp_t lflags = kmem_flags_convert(flags); @@ -65,7 +65,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) } void * -kmem_zalloc(size_t size, unsigned int __nocast flags) +kmem_zalloc(size_t size, xfs_km_flags_t flags) { void *ptr; @@ -87,7 +87,7 @@ kmem_free(const void *ptr) void * kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, - unsigned int __nocast flags) + xfs_km_flags_t flags) { void *new; @@ -102,7 +102,7 @@ kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, } void * -kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) +kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags) { int retries = 0; gfp_t lflags = kmem_flags_convert(flags); @@ -121,7 +121,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) } void * -kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) +kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) { void *ptr; diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index ab7c53fe346e22..b2f2620f9a87b9 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -27,10 +27,11 @@ * General memory allocation interfaces */ -#define KM_SLEEP 0x0001u -#define KM_NOSLEEP 0x0002u -#define KM_NOFS 0x0004u -#define KM_MAYFAIL 0x0008u +typedef unsigned __bitwise xfs_km_flags_t; +#define KM_SLEEP ((__force xfs_km_flags_t)0x0001u) +#define KM_NOSLEEP ((__force xfs_km_flags_t)0x0002u) +#define KM_NOFS ((__force xfs_km_flags_t)0x0004u) +#define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u) /* * We use a special process flag to avoid recursive callbacks into @@ -38,7 +39,7 @@ * warnings, so we explicitly skip any generic ones (silly of us). */ static inline gfp_t -kmem_flags_convert(unsigned int __nocast flags) +kmem_flags_convert(xfs_km_flags_t flags) { gfp_t lflags; @@ -54,9 +55,9 @@ kmem_flags_convert(unsigned int __nocast flags) return lflags; } -extern void *kmem_alloc(size_t, unsigned int __nocast); -extern void *kmem_zalloc(size_t, unsigned int __nocast); -extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); +extern void *kmem_alloc(size_t, xfs_km_flags_t); +extern void *kmem_zalloc(size_t, xfs_km_flags_t); +extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); extern void kmem_free(const void *); static inline void *kmem_zalloc_large(size_t size) @@ -107,7 +108,7 @@ kmem_zone_destroy(kmem_zone_t *zone) kmem_cache_destroy(zone); } -extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); -extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t); +extern void *kmem_zone_zalloc(kmem_zone_t *, xfs_km_flags_t); #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6b965bf450e44d..f30d9807dc48a0 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3152,7 +3152,7 @@ xlog_ticket_alloc( int cnt, char client, bool permanent, - int alloc_flags) + xfs_km_flags_t alloc_flags) { struct xlog_ticket *tic; uint num_headers; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 735ff1ee53da44..5bc33261f5be63 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -555,7 +555,7 @@ extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); extern kmem_zone_t *xfs_log_ticket_zone; struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, int count, char client, bool permanent, - int alloc_flags); + xfs_km_flags_t alloc_flags); static inline void diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index cdf896fcbfa438..fdf324508c5ee4 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -584,7 +584,7 @@ xfs_trans_t * _xfs_trans_alloc( xfs_mount_t *mp, uint type, - uint memflags) + xfs_km_flags_t memflags) { xfs_trans_t *tp; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 7ab99e1898c8de..7c37b533aa8e5c 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -443,7 +443,7 @@ typedef struct xfs_trans { * XFS transaction mechanism exported interfaces. */ xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); -xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint); +xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t); xfs_trans_t *xfs_trans_dup(xfs_trans_t *); int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, uint, uint); From 6d42e7e9f6d86ed4dfacde75a6cf515068f9749c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 14:25:07 -0400 Subject: [PATCH 10/95] ubifs: use generic_fillattr() don't open-code it... Signed-off-by: Al Viro --- fs/ubifs/dir.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 62a2727f4ecf71..a6d42efc76d227 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1127,16 +1127,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct ubifs_inode *ui = ubifs_inode(inode); mutex_lock(&ui->ui_mutex); - stat->dev = inode->i_sb->s_dev; - stat->ino = inode->i_ino; - stat->mode = inode->i_mode; - stat->nlink = inode->i_nlink; - stat->uid = inode->i_uid; - stat->gid = inode->i_gid; - stat->rdev = inode->i_rdev; - stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; + generic_fillattr(inode, stat); stat->blksize = UBIFS_BLOCK_SIZE; stat->size = ui->ui_size; From b0b0382bb4904965a9e9fca77ad87514dfda0d1c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 14:34:06 -0400 Subject: [PATCH 11/95] ->encode_fh() API change pass inode + parent's inode or NULL instead of dentry + bool saying whether we want the parent or not. NOTE: that needs ceph fix folded in. Signed-off-by: Al Viro --- fs/btrfs/export.c | 15 ++++----------- fs/ceph/export.c | 2 ++ fs/exportfs/expfs.c | 33 +++++++++++++++++++-------------- fs/fat/inode.c | 9 ++++----- fs/fuse/inode.c | 17 +++++------------ fs/gfs2/export.c | 17 +++++------------ fs/isofs/export.c | 13 ++++--------- fs/nilfs2/namei.c | 22 ++++++++++------------ fs/ocfs2/export.c | 19 +++++++------------ fs/reiserfs/inode.c | 30 ++++++++++++------------------ fs/reiserfs/reiserfs.h | 4 ++-- fs/udf/namei.c | 14 +++++--------- fs/xfs/xfs_export.c | 23 +++++++++-------------- include/linux/exportfs.h | 4 ++-- mm/cleancache.c | 6 ++---- mm/shmem.c | 6 ++---- 16 files changed, 94 insertions(+), 140 deletions(-) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index e887ee62b6d4ba..614f34a899c2db 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -13,15 +13,14 @@ parent_root_objectid) / 4) #define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4) -static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, - int connectable) +static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, + struct inode *parent) { struct btrfs_fid *fid = (struct btrfs_fid *)fh; - struct inode *inode = dentry->d_inode; int len = *max_len; int type; - if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) { + if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) { *max_len = BTRFS_FID_SIZE_CONNECTABLE; return 255; } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { @@ -36,19 +35,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, fid->root_objectid = BTRFS_I(inode)->root->objectid; fid->gen = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; + if (parent) { u64 parent_root_id; - spin_lock(&dentry->d_lock); - - parent = dentry->d_parent->d_inode; fid->parent_objectid = BTRFS_I(parent)->location.objectid; fid->parent_gen = parent->i_generation; parent_root_id = BTRFS_I(parent)->root->objectid; - spin_unlock(&dentry->d_lock); - if (parent_root_id != fid->root_objectid) { fid->parent_root_objectid = parent_root_id; len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; diff --git a/fs/ceph/export.c b/fs/ceph/export.c index fbb2a643ef10a1..4f9234c6da613a 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -247,7 +247,9 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, } const struct export_operations ceph_export_ops = { +#ifdef CEPH_BREAKAGE_FIXED .encode_fh = ceph_encode_fh, +#endif .fh_to_dentry = ceph_fh_to_dentry, .fh_to_parent = ceph_fh_to_parent, }; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b05acb7961355d..b0201ca6e9c6e0 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -304,24 +304,23 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry, /** * export_encode_fh - default export_operations->encode_fh function - * @dentry: the dentry to encode + * @inode: the object to encode * @fh: where to store the file handle fragment * @max_len: maximum length to store there - * @connectable: whether to store parent information + * @parent: parent directory inode, if wanted * * This default encode_fh function assumes that the 32 inode number * is suitable for locating an inode, and that the generation number * can be used to check that it is still valid. It places them in the * filehandle fragment where export_decode_fh expects to find them. */ -static int export_encode_fh(struct dentry *dentry, struct fid *fid, - int *max_len, int connectable) +static int export_encode_fh(struct inode *inode, struct fid *fid, + int *max_len, struct inode *parent) { - struct inode * inode = dentry->d_inode; int len = *max_len; int type = FILEID_INO32_GEN; - if (connectable && (len < 4)) { + if (parent && (len < 4)) { *max_len = 4; return 255; } else if (len < 2) { @@ -332,14 +331,9 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid, len = 2; fid->i32.ino = inode->i_ino; fid->i32.gen = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; - - spin_lock(&dentry->d_lock); - parent = dentry->d_parent->d_inode; + if (parent) { fid->i32.parent_ino = parent->i_ino; fid->i32.parent_gen = parent->i_generation; - spin_unlock(&dentry->d_lock); len = 4; type = FILEID_INO32_GEN_PARENT; } @@ -352,11 +346,22 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, { const struct export_operations *nop = dentry->d_sb->s_export_op; int error; + struct dentry *p = NULL; + struct inode *inode = dentry->d_inode, *parent = NULL; + if (connectable && !S_ISDIR(inode->i_mode)) { + p = dget_parent(dentry); + /* + * note that while p might've ceased to be our parent already, + * it's still pinned by and still positive. + */ + parent = p->d_inode; + } if (nop->encode_fh) - error = nop->encode_fh(dentry, fid->raw, max_len, connectable); + error = nop->encode_fh(inode, fid->raw, max_len, parent); else - error = export_encode_fh(dentry, fid, max_len, connectable); + error = export_encode_fh(inode, fid, max_len, parent); + dput(p); return error; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index b3d290c1b51392..7edfaadc07878e 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -752,10 +752,9 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, } static int -fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable) +fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) { int len = *lenp; - struct inode *inode = de->d_inode; u32 ipos_h, ipos_m, ipos_l; if (len < 5) { @@ -771,9 +770,9 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable) fh[1] = inode->i_generation; fh[2] = ipos_h; fh[3] = ipos_m | MSDOS_I(inode)->i_logstart; - spin_lock(&de->d_lock); - fh[4] = ipos_l | MSDOS_I(de->d_parent->d_inode)->i_logstart; - spin_unlock(&de->d_lock); + fh[4] = ipos_l; + if (parent) + fh[4] |= MSDOS_I(parent)->i_logstart; return 3; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 56f6dcf3076842..42678a33b7bb62 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -627,12 +627,10 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, return ERR_PTR(err); } -static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, - int connectable) +static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, + struct inode *parent) { - struct inode *inode = dentry->d_inode; - bool encode_parent = connectable && !S_ISDIR(inode->i_mode); - int len = encode_parent ? 6 : 3; + int len = parent ? 6 : 3; u64 nodeid; u32 generation; @@ -648,14 +646,9 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, fh[1] = (u32)(nodeid & 0xffffffff); fh[2] = generation; - if (encode_parent) { - struct inode *parent; - - spin_lock(&dentry->d_lock); - parent = dentry->d_parent->d_inode; + if (parent) { nodeid = get_fuse_inode(parent)->nodeid; generation = parent->i_generation; - spin_unlock(&dentry->d_lock); fh[3] = (u32)(nodeid >> 32); fh[4] = (u32)(nodeid & 0xffffffff); @@ -663,7 +656,7 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, } *max_len = len; - return encode_parent ? 0x82 : 0x81; + return parent ? 0x82 : 0x81; } static struct dentry *fuse_fh_to_dentry(struct super_block *sb, diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 70ba891654f8ce..e8ed6d4a618113 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -28,15 +28,14 @@ #define GFS2_LARGE_FH_SIZE 8 #define GFS2_OLD_FH_SIZE 10 -static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, - int connectable) +static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len, + struct inode *parent) { __be32 *fh = (__force __be32 *)p; - struct inode *inode = dentry->d_inode; struct super_block *sb = inode->i_sb; struct gfs2_inode *ip = GFS2_I(inode); - if (connectable && (*len < GFS2_LARGE_FH_SIZE)) { + if (parent && (*len < GFS2_LARGE_FH_SIZE)) { *len = GFS2_LARGE_FH_SIZE; return 255; } else if (*len < GFS2_SMALL_FH_SIZE) { @@ -50,14 +49,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); *len = GFS2_SMALL_FH_SIZE; - if (!connectable || inode == sb->s_root->d_inode) + if (!parent || inode == sb->s_root->d_inode) return *len; - spin_lock(&dentry->d_lock); - inode = dentry->d_parent->d_inode; - ip = GFS2_I(inode); - igrab(inode); - spin_unlock(&dentry->d_lock); + ip = GFS2_I(parent); fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32); fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); @@ -65,8 +60,6 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); *len = GFS2_LARGE_FH_SIZE; - iput(inode); - return *len; } diff --git a/fs/isofs/export.c b/fs/isofs/export.c index dd4687ff30d099..aa4356d09eeeb0 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -107,12 +107,11 @@ static struct dentry *isofs_export_get_parent(struct dentry *child) } static int -isofs_export_encode_fh(struct dentry *dentry, +isofs_export_encode_fh(struct inode *inode, __u32 *fh32, int *max_len, - int connectable) + struct inode *parent) { - struct inode * inode = dentry->d_inode; struct iso_inode_info * ei = ISOFS_I(inode); int len = *max_len; int type = 1; @@ -124,7 +123,7 @@ isofs_export_encode_fh(struct dentry *dentry, * offset of the inode and the upper 16 bits of fh32[1] to * hold the offset of the parent. */ - if (connectable && (len < 5)) { + if (parent && (len < 5)) { *max_len = 5; return 255; } else if (len < 3) { @@ -136,16 +135,12 @@ isofs_export_encode_fh(struct dentry *dentry, fh32[0] = ei->i_iget5_block; fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */ fh32[2] = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; + if (parent) { struct iso_inode_info *eparent; - spin_lock(&dentry->d_lock); - parent = dentry->d_parent->d_inode; eparent = ISOFS_I(parent); fh32[3] = eparent->i_iget5_block; fh16[3] = (__u16)eparent->i_iget5_offset; /* fh16 [sic] */ fh32[4] = parent->i_generation; - spin_unlock(&dentry->d_lock); len = 5; type = 2; } diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 0bb2c2010b9512..b72847988b78d9 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -508,31 +508,29 @@ static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh, return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen); } -static int nilfs_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp, - int connectable) +static int nilfs_encode_fh(struct inode *inode, __u32 *fh, int *lenp, + struct inode *parent) { struct nilfs_fid *fid = (struct nilfs_fid *)fh; - struct inode *inode = dentry->d_inode; struct nilfs_root *root = NILFS_I(inode)->i_root; int type; - if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE || - (connectable && *lenp < NILFS_FID_SIZE_CONNECTABLE)) + if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) { + *lenp = NILFS_FID_SIZE_CONNECTABLE; + return 255; + } + if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) { + *lenp = NILFS_FID_SIZE_NON_CONNECTABLE; return 255; + } fid->cno = root->cno; fid->ino = inode->i_ino; fid->gen = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; - - spin_lock(&dentry->d_lock); - parent = dentry->d_parent->d_inode; + if (parent) { fid->parent_ino = parent->i_ino; fid->parent_gen = parent->i_generation; - spin_unlock(&dentry->d_lock); - type = FILEID_NILFS_WITH_PARENT; *lenp = NILFS_FID_SIZE_CONNECTABLE; } else { diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 745db42528d5fd..322216a5f0dd1e 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -177,21 +177,23 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) return parent; } -static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, - int connectable) +static int ocfs2_encode_fh(struct inode *inode, u32 *fh_in, int *max_len, + struct inode *parent) { - struct inode *inode = dentry->d_inode; int len = *max_len; int type = 1; u64 blkno; u32 generation; __le32 *fh = (__force __le32 *) fh_in; +#ifdef TRACE_HOOKS_ARE_NOT_BRAINDEAD_IN_YOUR_OPINION +#error "You go ahead and fix that mess, then. Somehow" trace_ocfs2_encode_fh_begin(dentry, dentry->d_name.len, dentry->d_name.name, fh, len, connectable); +#endif - if (connectable && (len < 6)) { + if (parent && (len < 6)) { *max_len = 6; type = 255; goto bail; @@ -211,12 +213,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, fh[1] = cpu_to_le32((u32)(blkno & 0xffffffff)); fh[2] = cpu_to_le32(generation); - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; - - spin_lock(&dentry->d_lock); - - parent = dentry->d_parent->d_inode; + if (parent) { blkno = OCFS2_I(parent)->ip_blkno; generation = parent->i_generation; @@ -224,8 +221,6 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, fh[4] = cpu_to_le32((u32)(blkno & 0xffffffff)); fh[5] = cpu_to_le32(generation); - spin_unlock(&dentry->d_lock); - len = 6; type = 2; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 59d06871a850dc..a6d4268fb6c117 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1592,13 +1592,12 @@ struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, (fh_type == 6) ? fid->raw[5] : 0); } -int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, - int need_parent) +int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, + struct inode *parent) { - struct inode *inode = dentry->d_inode; int maxlen = *lenp; - if (need_parent && (maxlen < 5)) { + if (parent && (maxlen < 5)) { *lenp = 5; return 255; } else if (maxlen < 3) { @@ -1610,20 +1609,15 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); data[2] = inode->i_generation; *lenp = 3; - /* no room for directory info? return what we've stored so far */ - if (maxlen < 5 || !need_parent) - return 3; - - spin_lock(&dentry->d_lock); - inode = dentry->d_parent->d_inode; - data[3] = inode->i_ino; - data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); - *lenp = 5; - if (maxlen >= 6) { - data[5] = inode->i_generation; - *lenp = 6; - } - spin_unlock(&dentry->d_lock); + if (parent) { + data[3] = parent->i_ino; + data[4] = le32_to_cpu(INODE_PKEY(parent)->k_dir_id); + *lenp = 5; + if (maxlen >= 6) { + data[5] = parent->i_generation; + *lenp = 6; + } + } return *lenp; } diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index a59d27126338e4..14a4f9dfb1714d 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -2611,8 +2611,8 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); -int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, - int connectable); +int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, + struct inode *parent); int reiserfs_truncate_file(struct inode *, int update_timestamps); void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset, diff --git a/fs/udf/namei.c b/fs/udf/namei.c index a165c66e3eef22..18024178ac4c04 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1260,16 +1260,15 @@ static struct dentry *udf_fh_to_parent(struct super_block *sb, fid->udf.parent_partref, fid->udf.parent_generation); } -static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, - int connectable) +static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp, + struct inode *parent) { int len = *lenp; - struct inode *inode = de->d_inode; struct kernel_lb_addr location = UDF_I(inode)->i_location; struct fid *fid = (struct fid *)fh; int type = FILEID_UDF_WITHOUT_PARENT; - if (connectable && (len < 5)) { + if (parent && (len < 5)) { *lenp = 5; return 255; } else if (len < 3) { @@ -1282,14 +1281,11 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, fid->udf.partref = location.partitionReferenceNum; fid->udf.generation = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - spin_lock(&de->d_lock); - inode = de->d_parent->d_inode; - location = UDF_I(inode)->i_location; + if (parent) { + location = UDF_I(parent)->i_location; fid->udf.parent_block = location.logicalBlockNum; fid->udf.parent_partref = location.partitionReferenceNum; fid->udf.parent_generation = inode->i_generation; - spin_unlock(&de->d_lock); *lenp = 5; type = FILEID_UDF_WITH_PARENT; } diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 2d25d19c4ea17b..42679223a0fde6 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -52,19 +52,18 @@ static int xfs_fileid_length(int fileid_type) STATIC int xfs_fs_encode_fh( - struct dentry *dentry, - __u32 *fh, - int *max_len, - int connectable) + struct inode *inode, + __u32 *fh, + int *max_len, + struct inode *parent) { struct fid *fid = (struct fid *)fh; struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; - struct inode *inode = dentry->d_inode; int fileid_type; int len; /* Directories don't need their parent encoded, they have ".." */ - if (S_ISDIR(inode->i_mode) || !connectable) + if (!parent) fileid_type = FILEID_INO32_GEN; else fileid_type = FILEID_INO32_GEN_PARENT; @@ -96,20 +95,16 @@ xfs_fs_encode_fh( switch (fileid_type) { case FILEID_INO32_GEN_PARENT: - spin_lock(&dentry->d_lock); - fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; - fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; - spin_unlock(&dentry->d_lock); + fid->i32.parent_ino = XFS_I(parent)->i_ino; + fid->i32.parent_gen = parent->i_generation; /*FALLTHRU*/ case FILEID_INO32_GEN: fid->i32.ino = XFS_I(inode)->i_ino; fid->i32.gen = inode->i_generation; break; case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: - spin_lock(&dentry->d_lock); - fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; - fid64->parent_gen = dentry->d_parent->d_inode->i_generation; - spin_unlock(&dentry->d_lock); + fid64->parent_ino = XFS_I(parent)->i_ino; + fid64->parent_gen = parent->i_generation; /*FALLTHRU*/ case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: fid64->ino = XFS_I(inode)->i_ino; diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3a4cef5322dcab..12291a7ee27591 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -165,8 +165,8 @@ struct fid { */ struct export_operations { - int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, - int connectable); + int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len, + struct inode *parent); struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid, diff --git a/mm/cleancache.c b/mm/cleancache.c index 5646c740f613ed..32e6f4136fa229 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -80,7 +80,7 @@ EXPORT_SYMBOL(__cleancache_init_shared_fs); static int cleancache_get_key(struct inode *inode, struct cleancache_filekey *key) { - int (*fhfn)(struct dentry *, __u32 *fh, int *, int); + int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *); int len = 0, maxlen = CLEANCACHE_KEY_MAX; struct super_block *sb = inode->i_sb; @@ -88,9 +88,7 @@ static int cleancache_get_key(struct inode *inode, if (sb->s_export_op != NULL) { fhfn = sb->s_export_op->encode_fh; if (fhfn) { - struct dentry d; - d.d_inode = inode; - len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0); + len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL); if (len <= 0 || len == 255) return -1; if (maxlen > CLEANCACHE_KEY_MAX) diff --git a/mm/shmem.c b/mm/shmem.c index be5af34a070dcd..3711422c31726b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2033,11 +2033,9 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb, return dentry; } -static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, - int connectable) +static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len, + struct inode *parent) { - struct inode *inode = dentry->d_inode; - if (*len < 3) { *len = 3; return 255; From c862868bb455694704c255481369c40d7185eb25 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 5 Apr 2012 12:07:36 -0700 Subject: [PATCH 12/95] ceph: move encode_fh to new API Use parent_inode has a flag for whether nfsd wants a connectable fh, but generate one opportunistically so that we can take advantage of the additional info in there. Signed-off-by: Sage Weil Signed-off-by: Al Viro --- fs/ceph/export.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 4f9234c6da613a..8e1b60e557b65b 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -40,38 +40,49 @@ struct ceph_nfs_confh { u32 parent_name_hash; } __attribute__ ((packed)); -static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, - int connectable) +/* + * The presence of @parent_inode here tells us whether NFS wants a + * connectable file handle. However, we want to make a connectionable + * file handle unconditionally so that the MDS gets as much of a hint + * as possible. That means we only use @parent_dentry to indicate + * whether nfsd wants a connectable fh, and whether we should indicate + * failure from a too-small @max_len. + */ +static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, + struct inode *parent_inode) { int type; struct ceph_nfs_fh *fh = (void *)rawfh; struct ceph_nfs_confh *cfh = (void *)rawfh; - struct dentry *parent; - struct inode *inode = dentry->d_inode; int connected_handle_length = sizeof(*cfh)/4; int handle_length = sizeof(*fh)/4; + struct dentry *dentry = d_find_alias(inode); + struct dentry *parent; /* don't re-export snaps */ if (ceph_snap(inode) != CEPH_NOSNAP) return -EINVAL; - spin_lock(&dentry->d_lock); - parent = dentry->d_parent; - if (*max_len >= connected_handle_length) { + /* if we found an alias, generate a connectable fh */ + if (*max_len >= connected_handle_length && dentry) { dout("encode_fh %p connectable\n", dentry); - cfh->ino = ceph_ino(dentry->d_inode); + spin_lock(&dentry->d_lock); + parent = dentry->d_parent; + cfh->ino = ceph_ino(inode); cfh->parent_ino = ceph_ino(parent->d_inode); cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode, dentry); *max_len = connected_handle_length; type = 2; + spin_unlock(&dentry->d_lock); } else if (*max_len >= handle_length) { - if (connectable) { + if (parent_inode) { + /* nfsd wants connectable */ *max_len = connected_handle_length; type = 255; } else { dout("encode_fh %p\n", dentry); - fh->ino = ceph_ino(dentry->d_inode); + fh->ino = ceph_ino(inode); *max_len = handle_length; type = 1; } @@ -79,7 +90,6 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, *max_len = handle_length; type = 255; } - spin_unlock(&dentry->d_lock); return type; } @@ -247,9 +257,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, } const struct export_operations ceph_export_ops = { -#ifdef CEPH_BREAKAGE_FIXED .encode_fh = ceph_encode_fh, -#endif .fh_to_dentry = ceph_fh_to_dentry, .fh_to_parent = ceph_fh_to_parent, }; From cc1dad7183e4cb7f5d313b6942f2059fc0eabab6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 19:40:47 -0400 Subject: [PATCH 13/95] selinuxfs snprintf() misuses a) %d does _not_ produce a page worth of output b) snprintf() doesn't return negatives - it used to in old glibc, but that's the kernel... Signed-off-by: Al Viro --- security/selinux/selinuxfs.c | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 4e93f9ef970b25..3ad29025128882 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1259,12 +1259,8 @@ static int sel_make_bools(void) if (!inode) goto out; - ret = -EINVAL; - len = snprintf(page, PAGE_SIZE, "/%s/%s", BOOL_DIR_NAME, names[i]); - if (len < 0) - goto out; - ret = -ENAMETOOLONG; + len = snprintf(page, PAGE_SIZE, "/%s/%s", BOOL_DIR_NAME, names[i]); if (len >= PAGE_SIZE) goto out; @@ -1557,19 +1553,10 @@ static inline u32 sel_ino_to_perm(unsigned long ino) static ssize_t sel_read_class(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - ssize_t rc, len; - char *page; unsigned long ino = file->f_path.dentry->d_inode->i_ino; - - page = (char *)__get_free_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - - len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_class(ino)); - rc = simple_read_from_buffer(buf, count, ppos, page, len); - free_page((unsigned long)page); - - return rc; + char res[TMPBUFLEN]; + ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_class(ino)); + return simple_read_from_buffer(buf, count, ppos, res, len); } static const struct file_operations sel_class_ops = { @@ -1580,19 +1567,10 @@ static const struct file_operations sel_class_ops = { static ssize_t sel_read_perm(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - ssize_t rc, len; - char *page; unsigned long ino = file->f_path.dentry->d_inode->i_ino; - - page = (char *)__get_free_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - - len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_perm(ino)); - rc = simple_read_from_buffer(buf, count, ppos, page, len); - free_page((unsigned long)page); - - return rc; + char res[TMPBUFLEN]; + ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_perm(ino)); + return simple_read_from_buffer(buf, count, ppos, res, len); } static const struct file_operations sel_perm_ops = { From af569596a9b85626564149c5c4c0c17d05baa2da Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 20:02:53 -0400 Subject: [PATCH 14/95] kill v9fs_dentry_from_dir_inode() In *all* callers we have a dentry of child of that directory. Just use ->d_parent of that one, for fsck sake... Signed-off-by: Al Viro --- fs/9p/vfs_inode_dotl.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a1e6c990cd410e..e3dd2a1e2bfc18 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -68,24 +68,6 @@ static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) return current_fsgid(); } -/** - * v9fs_dentry_from_dir_inode - helper function to get the dentry from - * dir inode. - * - */ - -static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) -{ - struct dentry *dentry; - - spin_lock(&inode->i_lock); - /* Directory should have only one entry. */ - BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); - dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); - spin_unlock(&inode->i_lock); - return dentry; -} - static int v9fs_test_inode_dotl(struct inode *inode, void *data) { struct v9fs_inode *v9inode = V9FS_I(inode); @@ -415,7 +397,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, if (dir->i_mode & S_ISGID) omode |= S_ISGID; - dir_dentry = v9fs_dentry_from_dir_inode(dir); + dir_dentry = dentry->d_parent; dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); @@ -793,7 +775,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, dir->i_ino, old_dentry->d_name.name, dentry->d_name.name); v9ses = v9fs_inode2v9ses(dir); - dir_dentry = v9fs_dentry_from_dir_inode(dir); + dir_dentry = dentry->d_parent; dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) return PTR_ERR(dfid); @@ -858,7 +840,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, return -EINVAL; v9ses = v9fs_inode2v9ses(dir); - dir_dentry = v9fs_dentry_from_dir_inode(dir); + dir_dentry = dentry->d_parent; dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); From 66f8f50920472f9b6d0a797a29dc8a8ada0b24c3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 6 Apr 2012 01:40:50 -0400 Subject: [PATCH 15/95] affs: bury unused macros ... unused since 2.4.4. Signed-off-by: Al Viro --- fs/affs/affs.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 45a0ce45d7b46a..1fceb320d2f22c 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -18,14 +18,6 @@ #define AFFS_GET_HASHENTRY(data,hashkey) be32_to_cpu(((struct dir_front *)data)->hashtable[hashkey]) #define AFFS_BLOCK(sb, bh, blk) (AFFS_HEAD(bh)->table[AFFS_SB(sb)->s_hashsize-1-(blk)]) -#ifdef __LITTLE_ENDIAN -#define BO_EXBITS 0x18UL -#elif defined(__BIG_ENDIAN) -#define BO_EXBITS 0x00UL -#else -#error Endianness must be known for affs to work. -#endif - #define AFFS_HEAD(bh) ((struct affs_head *)(bh)->b_data) #define AFFS_TAIL(sb, bh) ((struct affs_tail *)((bh)->b_data+(sb)->s_blocksize-sizeof(struct affs_tail))) #define AFFS_ROOT_HEAD(bh) ((struct affs_root_head *)(bh)->b_data) From 8515841086d14594b24cdc8febdcc7fd1bbc313e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 Apr 2012 18:47:13 -0400 Subject: [PATCH 16/95] ocfs2: trivial endianness misannotations Signed-off-by: Al Viro --- fs/ocfs2/dlm/dlmast.c | 2 +- fs/ocfs2/dlm/dlmcommon.h | 6 +++--- fs/ocfs2/dlm/dlmdomain.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 3a3ed4bb794b0d..fbec0be6232622 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -293,7 +293,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; char *name; struct list_head *iter, *head=NULL; - u64 cookie; + __be64 cookie; u32 flags; u8 node; diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index a5952ceecba5a8..de854cca12a2d2 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -679,7 +679,7 @@ struct dlm_query_join_packet { }; union dlm_query_join_response { - u32 intval; + __be32 intval; struct dlm_query_join_packet packet; }; @@ -755,8 +755,8 @@ struct dlm_query_region { struct dlm_node_info { u8 ni_nodenum; u8 pad1; - u16 ni_ipv4_port; - u32 ni_ipv4_address; + __be16 ni_ipv4_port; + __be32 ni_ipv4_address; }; struct dlm_query_nodeinfo { diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 92f2ead0fab6de..9e89d70df337fc 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -818,7 +818,7 @@ static void dlm_query_join_packet_to_wire(struct dlm_query_join_packet *packet, union dlm_query_join_response response; response.packet = *packet; - *wire = cpu_to_be32(response.intval); + *wire = be32_to_cpu(response.intval); } static void dlm_query_join_wire_to_packet(u32 wire, From f6a5690324d5ab9c33bbc0a6b4cc59c7fa34eeec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 Apr 2012 19:52:19 -0400 Subject: [PATCH 17/95] ocfs2: deal with __user misannotations Signed-off-by: Al Viro --- fs/ocfs2/ioctl.c | 31 ++++++++++++++----------------- fs/ocfs2/move_extents.c | 6 ++---- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index a1a1bfd652c90d..d96f7f81d8dd32 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -864,7 +864,7 @@ int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, if (status) break; - reqp = (struct ocfs2_info_request *)(unsigned long)req_addr; + reqp = (struct ocfs2_info_request __user *)(unsigned long)req_addr; if (!reqp) { status = -EINVAL; goto bail; @@ -888,9 +888,11 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct ocfs2_space_resv sr; struct ocfs2_new_group_input input; struct reflink_arguments args; - const char *old_path, *new_path; + const char __user *old_path; + const char __user *new_path; bool preserve; struct ocfs2_info info; + void __user *argp = (void __user *)arg; switch (cmd) { case OCFS2_IOC_GETFLAGS: @@ -937,17 +939,15 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return ocfs2_group_add(inode, &input); case OCFS2_IOC_REFLINK: - if (copy_from_user(&args, (struct reflink_arguments *)arg, - sizeof(args))) + if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; - old_path = (const char *)(unsigned long)args.old_path; - new_path = (const char *)(unsigned long)args.new_path; + old_path = (const char __user *)(unsigned long)args.old_path; + new_path = (const char __user *)(unsigned long)args.new_path; preserve = (args.preserve != 0); return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); case OCFS2_IOC_INFO: - if (copy_from_user(&info, (struct ocfs2_info __user *)arg, - sizeof(struct ocfs2_info))) + if (copy_from_user(&info, argp, sizeof(struct ocfs2_info))) return -EFAULT; return ocfs2_info_handle(inode, &info, 0); @@ -960,22 +960,20 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&range, (struct fstrim_range *)arg, - sizeof(range))) + if (copy_from_user(&range, argp, sizeof(range))) return -EFAULT; ret = ocfs2_trim_fs(sb, &range); if (ret < 0) return ret; - if (copy_to_user((struct fstrim_range *)arg, &range, - sizeof(range))) + if (copy_to_user(argp, &range, sizeof(range))) return -EFAULT; return 0; } case OCFS2_IOC_MOVE_EXT: - return ocfs2_ioctl_move_extents(filp, (void __user *)arg); + return ocfs2_ioctl_move_extents(filp, argp); default: return -ENOTTY; } @@ -988,6 +986,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) struct reflink_arguments args; struct inode *inode = file->f_path.dentry->d_inode; struct ocfs2_info info; + void __user *argp = (void __user *)arg; switch (cmd) { case OCFS2_IOC32_GETFLAGS: @@ -1006,16 +1005,14 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) case FITRIM: break; case OCFS2_IOC_REFLINK: - if (copy_from_user(&args, (struct reflink_arguments *)arg, - sizeof(args))) + if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; preserve = (args.preserve != 0); return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), compat_ptr(args.new_path), preserve); case OCFS2_IOC_INFO: - if (copy_from_user(&info, (struct ocfs2_info __user *)arg, - sizeof(struct ocfs2_info))) + if (copy_from_user(&info, argp, sizeof(struct ocfs2_info))) return -EFAULT; return ocfs2_info_handle(inode, &info, 1); diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index b1e3fce72ea476..6083432f667e30 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -1082,8 +1082,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) context->file = filp; if (argp) { - if (copy_from_user(&range, (struct ocfs2_move_extents *)argp, - sizeof(range))) { + if (copy_from_user(&range, argp, sizeof(range))) { status = -EFAULT; goto out; } @@ -1138,8 +1137,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) * length and new_offset even if failure happens somewhere. */ if (argp) { - if (copy_to_user((struct ocfs2_move_extents *)argp, &range, - sizeof(range))) + if (copy_to_user(argp, &range, sizeof(range))) status = -EFAULT; } From 1db5df98faaf7aa6c25bc7d9703342d13678452a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 Apr 2012 19:58:53 -0400 Subject: [PATCH 18/95] ocfs2: kill endianness abuses in blockcheck.c ocfs2_block_check is for little-endian contents; if we just want to its fields converted to host-endian in a couple of functions, just put those values into local u32 and u16... Signed-off-by: Al Viro --- fs/ocfs2/blockcheck.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index c7ee03c2222625..0725e605465040 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -422,45 +422,46 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, struct ocfs2_blockcheck_stats *stats) { int rc = 0; - struct ocfs2_block_check check; + u32 bc_crc32e; + u16 bc_ecc; u32 crc, ecc; ocfs2_blockcheck_inc_check(stats); - check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); - check.bc_ecc = le16_to_cpu(bc->bc_ecc); + bc_crc32e = le32_to_cpu(bc->bc_crc32e); + bc_ecc = le16_to_cpu(bc->bc_ecc); memset(bc, 0, sizeof(struct ocfs2_block_check)); /* Fast path - if the crc32 validates, we're good to go */ crc = crc32_le(~0, data, blocksize); - if (crc == check.bc_crc32e) + if (crc == bc_crc32e) goto out; ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n", - (unsigned int)check.bc_crc32e, (unsigned int)crc); + (unsigned int)bc_crc32e, (unsigned int)crc); /* Ok, try ECC fixups */ ecc = ocfs2_hamming_encode_block(data, blocksize); - ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc); + ocfs2_hamming_fix_block(data, blocksize, ecc ^ bc_ecc); /* And check the crc32 again */ crc = crc32_le(~0, data, blocksize); - if (crc == check.bc_crc32e) { + if (crc == bc_crc32e) { ocfs2_blockcheck_inc_recover(stats); goto out; } mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n", - (unsigned int)check.bc_crc32e, (unsigned int)crc); + (unsigned int)bc_crc32e, (unsigned int)crc); rc = -EIO; out: - bc->bc_crc32e = cpu_to_le32(check.bc_crc32e); - bc->bc_ecc = cpu_to_le16(check.bc_ecc); + bc->bc_crc32e = cpu_to_le32(bc_crc32e); + bc->bc_ecc = cpu_to_le16(bc_ecc); return rc; } @@ -528,7 +529,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, struct ocfs2_blockcheck_stats *stats) { int i, rc = 0; - struct ocfs2_block_check check; + u32 bc_crc32e; + u16 bc_ecc; u32 crc, ecc, fix; BUG_ON(nr < 0); @@ -538,21 +540,21 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, ocfs2_blockcheck_inc_check(stats); - check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); - check.bc_ecc = le16_to_cpu(bc->bc_ecc); + bc_crc32e = le32_to_cpu(bc->bc_crc32e); + bc_ecc = le16_to_cpu(bc->bc_ecc); memset(bc, 0, sizeof(struct ocfs2_block_check)); /* Fast path - if the crc32 validates, we're good to go */ for (i = 0, crc = ~0; i < nr; i++) crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); - if (crc == check.bc_crc32e) + if (crc == bc_crc32e) goto out; ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", - (unsigned int)check.bc_crc32e, (unsigned int)crc); + (unsigned int)bc_crc32e, (unsigned int)crc); /* Ok, try ECC fixups */ for (i = 0, ecc = 0; i < nr; i++) { @@ -565,7 +567,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, bhs[i]->b_size * 8, bhs[i]->b_size * 8 * i); } - fix = ecc ^ check.bc_ecc; + fix = ecc ^ bc_ecc; for (i = 0; i < nr; i++) { /* * Try the fix against each buffer. It will only affect @@ -578,19 +580,19 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, /* And check the crc32 again */ for (i = 0, crc = ~0; i < nr; i++) crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); - if (crc == check.bc_crc32e) { + if (crc == bc_crc32e) { ocfs2_blockcheck_inc_recover(stats); goto out; } mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", - (unsigned int)check.bc_crc32e, (unsigned int)crc); + (unsigned int)bc_crc32e, (unsigned int)crc); rc = -EIO; out: - bc->bc_crc32e = cpu_to_le32(check.bc_crc32e); - bc->bc_ecc = cpu_to_le16(check.bc_ecc); + bc->bc_crc32e = cpu_to_le32(bc_crc32e); + bc->bc_ecc = cpu_to_le16(bc_ecc); return rc; } From 528c032764f4d3c6cb5f5ece090d9d5882655982 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Apr 2012 11:03:55 -0400 Subject: [PATCH 19/95] btrfs: trivial endianness annotations Signed-off-by: Al Viro --- fs/btrfs/free-space-cache.c | 7 ++++--- fs/btrfs/ulist.c | 4 ++-- fs/btrfs/ulist.h | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 202008ec367d4c..eb453506facd09 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, return ERR_PTR(-ENOENT); } - inode->i_mapping->flags &= ~__GFP_FS; + mapping_set_gfp_mask(inode->i_mapping, + mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); return inode; } @@ -365,7 +366,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) { - u64 *val; + __le64 *val; io_ctl_map_page(io_ctl, 1); @@ -388,7 +389,7 @@ static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) { - u64 *gen; + __le64 *gen; /* * Skip the crc area. If we don't check crcs then we just have a 64bit diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index 12f5147bd2b1ae..ad993bc2df93eb 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c @@ -95,7 +95,7 @@ EXPORT_SYMBOL(ulist_reinit); * * The allocated ulist will be returned in an initialized state. */ -struct ulist *ulist_alloc(unsigned long gfp_mask) +struct ulist *ulist_alloc(gfp_t gfp_mask) { struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); @@ -144,7 +144,7 @@ EXPORT_SYMBOL(ulist_free); * unaltered. */ int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, - unsigned long gfp_mask) + gfp_t gfp_mask) { int i; diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 2e25dec58ec0e5..ad85b0e60af276 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h @@ -59,10 +59,10 @@ struct ulist { void ulist_init(struct ulist *ulist); void ulist_fini(struct ulist *ulist); void ulist_reinit(struct ulist *ulist); -struct ulist *ulist_alloc(unsigned long gfp_mask); +struct ulist *ulist_alloc(gfp_t gfp_mask); void ulist_free(struct ulist *ulist); int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, - unsigned long gfp_mask); + gfp_t gfp_mask); struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev); #endif From de5e2b36289e6c81c3f7dcb9eef38d78de1f8b5c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Apr 2012 01:24:37 -0400 Subject: [PATCH 20/95] hpfs: endianness bugs a couple of le32 and le16 used with wrong le..._to_cpu(), plus idiotic use of le32_to_cpu() on 1-bit bitfield Signed-off-by: Al Viro --- fs/hpfs/ea.c | 2 +- fs/hpfs/super.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c index d8b84d113c891b..cd098e3eb0e645 100644 --- a/fs/hpfs/ea.c +++ b/fs/hpfs/ea.c @@ -246,7 +246,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) { hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x", (unsigned long)inode->i_ino, - le32_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); + le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); return; } if ((le16_to_cpu(fnode->ea_size_s) || !le32_to_cpu(fnode->ea_size_l)) && diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 54f6eccb79d9ed..08e85b019131e1 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -572,7 +572,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) mark_buffer_dirty(bh2); } - if (le32_to_cpu(spareblock->hotfixes_used) || le32_to_cpu(spareblock->n_spares_used)) { + if (spareblock->hotfixes_used || le32_to_cpu(spareblock->n_spares_used)) { if (errs >= 2) { printk("HPFS: Hotfixes not supported here, try chkdsk\n"); mark_dirty(s, 0); @@ -645,7 +645,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) root->i_mtime.tv_nsec = 0; root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date)); root->i_ctime.tv_nsec = 0; - hpfs_i(root)->i_ea_size = le16_to_cpu(de->ea_size); + hpfs_i(root)->i_ea_size = le32_to_cpu(de->ea_size); hpfs_i(root)->i_parent_dir = root->i_ino; if (root->i_size == -1) root->i_size = 2048; From 185553b22436fe754f4ae8ec11344e822bb83717 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 17 Apr 2012 17:03:25 -0700 Subject: [PATCH 21/95] fs: fix inode.c kernel-doc warnings Fix kernel-doc warnings in fs/inode.c: Warning(fs/inode.c:1493): No description found for parameter 'path' Warning(fs/inode.c:1493): Excess function parameter 'mnt' description in 'touch_atime' Warning(fs/inode.c:1493): Excess function parameter 'dentry' description in 'touch_atime' Signed-off-by: Randy Dunlap Signed-off-by: Al Viro --- fs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 6bc8761cc33335..183ddd6cda7115 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1489,8 +1489,7 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, /** * touch_atime - update the access time - * @mnt: mount the inode is accessed on - * @dentry: dentry accessed + * @path: the &struct path to update * * Update the accessed time on an inode and mark it for writeback. * This function automatically handles read only file systems and media, From 4085e155b14a89ee36f7bfc5bd07294b0c34b0e6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 6 Apr 2012 13:21:09 -0400 Subject: [PATCH 22/95] hpfs: get rid of bitfields endianness wanking in extended_attribute Signed-off-by: Al Viro --- fs/hpfs/anode.c | 4 ++-- fs/hpfs/ea.c | 34 +++++++++++++++++----------------- fs/hpfs/hpfs.h | 31 ++++++++++++++++--------------- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index 08b503e8ed29ec..3cb4d9c3eca4ea 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c @@ -483,8 +483,8 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno) else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno)); ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) - if (ea->indirect) - hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); + if (ea_indirect(ea)) + hpfs_ea_remove(s, ea_sec(ea), ea_in_anode(ea), ea_len(ea)); hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l)); brelse(bh); hpfs_free_sectors(s, fno, 1); diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c index cd098e3eb0e645..7cd0092771f4b6 100644 --- a/fs/hpfs/ea.c +++ b/fs/hpfs/ea.c @@ -23,15 +23,15 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len) return; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; - if (ea->indirect) { + if (ea_indirect(ea)) { if (ea_valuelen(ea) != 8) { - hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x", + hpfs_error(s, "ea_indirect(ea) set while ea->valuelen!=8, %s %08x, pos %08x", ano ? "anode" : "sectors", a, pos); return; } if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 9, ex+4)) return; - hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); + hpfs_ea_remove(s, ea_sec(ea), ea_in_anode(ea), ea_len(ea)); } pos += ea->namelen + ea_valuelen(ea) + 5; } @@ -81,7 +81,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, struct extended_attribute *ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (!strcmp(ea->name, key)) { - if (ea->indirect) + if (ea_indirect(ea)) goto indirect; if (ea_valuelen(ea) >= size) return -EINVAL; @@ -101,10 +101,10 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, return -EIO; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return -EIO; - if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) + if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) return -EIO; if (!strcmp(ea->name, key)) { - if (ea->indirect) + if (ea_indirect(ea)) goto indirect; if (ea_valuelen(ea) >= size) return -EINVAL; @@ -119,7 +119,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, indirect: if (ea_len(ea) >= size) return -EINVAL; - if (hpfs_ea_read(s, ea_sec(ea), ea->anode, 0, ea_len(ea), buf)) + if (hpfs_ea_read(s, ea_sec(ea), ea_in_anode(ea), 0, ea_len(ea), buf)) return -EIO; buf[ea_len(ea)] = 0; return 0; @@ -136,8 +136,8 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si struct extended_attribute *ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (!strcmp(ea->name, key)) { - if (ea->indirect) - return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); + if (ea_indirect(ea)) + return get_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), *size = ea_len(ea)); if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { printk("HPFS: out of memory for EA\n"); return NULL; @@ -159,11 +159,11 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si return NULL; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return NULL; - if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) + if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) return NULL; if (!strcmp(ea->name, key)) { - if (ea->indirect) - return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); + if (ea_indirect(ea)) + return get_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), *size = ea_len(ea)); if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { printk("HPFS: out of memory for EA\n"); return NULL; @@ -199,9 +199,9 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, struct extended_attribute *ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (!strcmp(ea->name, key)) { - if (ea->indirect) { + if (ea_indirect(ea)) { if (ea_len(ea) == size) - set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); + set_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), data, size); } else if (ea_valuelen(ea) == size) { memcpy(ea_data(ea), data, size); } @@ -220,12 +220,12 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, return; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; - if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) + if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) return; if (!strcmp(ea->name, key)) { - if (ea->indirect) { + if (ea_indirect(ea)) { if (ea_len(ea) == size) - set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); + set_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), data, size); } else { if (ea_valuelen(ea) == size) diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 8b0650aae32812..ca90bde6564c0e 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -528,32 +528,23 @@ struct anode run, or in multiple runs. Flags in the fnode tell whether the EA list is immediate, in a single run, or in multiple runs. */ +enum {EA_indirect = 1, EA_anode = 2, EA_needea = 128 }; struct extended_attribute { -#ifdef __LITTLE_ENDIAN - u8 indirect: 1; /* 1 -> value gives sector number + u8 flags; /* bit 0 set -> value gives sector number where real value starts */ - u8 anode: 1; /* 1 -> sector is an anode - that points to fragmented value */ - u8 flag23456: 5; - u8 needea: 1; /* required ea */ -#else - u8 needea: 1; /* required ea */ - u8 flag23456: 5; - u8 anode: 1; /* 1 -> sector is an anode + /* bit 1 set -> sector is an anode that points to fragmented value */ - u8 indirect: 1; /* 1 -> value gives sector number - where real value starts */ -#endif + /* bit 7 set -> required ea */ u8 namelen; /* length of name, bytes */ u8 valuelen_lo; /* length of value, bytes */ u8 valuelen_hi; /* length of value, bytes */ - u8 name[0]; + u8 name[]; /* u8 name[namelen]; ascii attrib name u8 nul; terminating '\0', not counted u8 value[valuelen]; value, arbitrary - if this.indirect, valuelen is 8 and the value is + if this.flags & 1, valuelen is 8 and the value is u32 length; real length of value, bytes secno secno; sector address where it starts if this.anode, the above sector number is the root of an anode tree @@ -561,6 +552,16 @@ struct extended_attribute */ }; +static inline bool ea_indirect(struct extended_attribute *ea) +{ + return ea->flags & EA_indirect; +} + +static inline bool ea_in_anode(struct extended_attribute *ea) +{ + return ea->flags & EA_anode; +} + /* Local Variables: comment-column: 40 From c4c995430a94e7d94526fcb347c4ba4b2ae82500 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 6 Apr 2012 14:30:07 -0400 Subject: [PATCH 23/95] hpfs: get rid of bitfields in struct fnode Signed-off-by: Al Viro --- fs/hpfs/anode.c | 4 ++-- fs/hpfs/dir.c | 2 +- fs/hpfs/dnode.c | 2 +- fs/hpfs/ea.c | 24 ++++++++++++------------ fs/hpfs/hpfs.h | 32 +++++++++++++------------------- fs/hpfs/inode.c | 2 +- fs/hpfs/map.c | 2 +- fs/hpfs/namei.c | 2 +- 8 files changed, 32 insertions(+), 38 deletions(-) diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index 3cb4d9c3eca4ea..ec5f8b9e5c2a7e 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c @@ -479,13 +479,13 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno) struct extended_attribute *ea; struct extended_attribute *ea_end; if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return; - if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree); + if (!fnode_is_dir(fnode)) hpfs_remove_btree(s, &fnode->btree); else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno)); ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (ea_indirect(ea)) hpfs_ea_remove(s, ea_sec(ea), ea_in_anode(ea), ea_len(ea)); - hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l)); + hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l)); brelse(bh); hpfs_free_sectors(s, fno, 1); } diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 2fa0089a02a8ec..b8472f803f4e54 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -87,7 +87,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) ret = -EIOERROR; goto out; } - if (!fno->dirflag) { + if (!fnode_is_dir(fno)) { e = 1; hpfs_error(inode->i_sb, "not a directory, fnode %08lx", (unsigned long)inode->i_ino); diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 1e0e2ac30fd3be..6bf9fdeba6f2a9 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c @@ -1015,7 +1015,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno, kfree(name2); return NULL; } - if (!upf->dirflag) { + if (!fnode_is_dir(upf)) { brelse(bh); hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, le32_to_cpu(f->up)); kfree(name2); diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c index 7cd0092771f4b6..bcaafcd2666ac2 100644 --- a/fs/hpfs/ea.c +++ b/fs/hpfs/ea.c @@ -91,7 +91,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, } a = le32_to_cpu(fnode->ea_secno); len = le32_to_cpu(fnode->ea_size_l); - ano = fnode->ea_anode; + ano = fnode_in_anode(fnode); pos = 0; while (pos < len) { ea = (struct extended_attribute *)ex; @@ -148,7 +148,7 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si } a = le32_to_cpu(fnode->ea_secno); len = le32_to_cpu(fnode->ea_size_l); - ano = fnode->ea_anode; + ano = fnode_in_anode(fnode); pos = 0; while (pos < len) { char ex[4 + 255 + 1 + 8]; @@ -209,7 +209,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, } a = le32_to_cpu(fnode->ea_secno); len = le32_to_cpu(fnode->ea_size_l); - ano = fnode->ea_anode; + ano = fnode_in_anode(fnode); pos = 0; while (pos < len) { char ex[4 + 255 + 1 + 8]; @@ -276,7 +276,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, fnode->ea_size_l = cpu_to_le32(le16_to_cpu(fnode->ea_size_s)); fnode->ea_size_s = cpu_to_le16(0); fnode->ea_secno = cpu_to_le32(n); - fnode->ea_anode = cpu_to_le32(0); + fnode->flags &= ~FNODE_anode; mark_buffer_dirty(bh); brelse(bh); } @@ -288,9 +288,9 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, secno q = hpfs_alloc_sector(s, fno, 1, 0); if (!q) goto bail; fnode->ea_secno = cpu_to_le32(q); - fnode->ea_anode = 0; + fnode->flags &= ~FNODE_anode; len++; - } else if (!fnode->ea_anode) { + } else if (!fnode_in_anode(fnode)) { if (hpfs_alloc_if_possible(s, le32_to_cpu(fnode->ea_secno) + len)) { len++; } else { @@ -310,7 +310,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, anode->u.external[0].length = cpu_to_le32(len); mark_buffer_dirty(bh); brelse(bh); - fnode->ea_anode = 1; + fnode->flags |= FNODE_anode; fnode->ea_secno = cpu_to_le32(a_s);*/ secno new_sec; int i; @@ -338,7 +338,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, len = (pos + 511) >> 9; } } - if (fnode->ea_anode) { + if (fnode_in_anode(fnode)) { if (hpfs_add_sector_to_btree(s, le32_to_cpu(fnode->ea_secno), 0, len) != -1) { len++; @@ -351,16 +351,16 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, h[1] = strlen(key); h[2] = size & 0xff; h[3] = size >> 8; - if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail; - if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail; - if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail; fnode->ea_size_l = cpu_to_le32(pos); ret: hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size; return; bail: if (le32_to_cpu(fnode->ea_secno)) - if (fnode->ea_anode) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9); + if (fnode_in_anode(fnode)) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9); else hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno) + ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9), len - ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9)); else fnode->ea_secno = fnode->ea_size_l = cpu_to_le32(0); } diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index ca90bde6564c0e..37cc4483fbd19d 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -436,6 +436,7 @@ struct bplus_header #define FNODE_MAGIC 0xf7e40aae +enum {FNODE_anode = cpu_to_le16(2), FNODE_dir = cpu_to_le16(256)}; struct fnode { u32 magic; /* f7e4 0aae */ @@ -451,26 +452,9 @@ struct fnode secno ea_secno; /* first sector of disk-resident ea's*/ u16 ea_size_s; /* length of fnode-resident ea's */ -#ifdef __LITTLE_ENDIAN - u8 flag0: 1; - u8 ea_anode: 1; /* 1 -> ea_secno is an anode */ - u8 flag234567: 6; -#else - u8 flag234567: 6; - u8 ea_anode: 1; /* 1 -> ea_secno is an anode */ - u8 flag0: 1; -#endif - -#ifdef __LITTLE_ENDIAN - u8 dirflag: 1; /* 1 -> directory. first & only extent - points to dnode. */ - u8 flag9012345: 7; -#else - u8 flag9012345: 7; - u8 dirflag: 1; /* 1 -> directory. first & only extent + __le16 flags; /* bit 1 set -> ea_secno is an anode */ + /* bit 8 set -> directory. first & only extent points to dnode. */ -#endif - struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ union { struct bplus_leaf_node external[8]; @@ -492,6 +476,16 @@ struct fnode via fnode + ea_offs. I think.) */ }; +static inline bool fnode_in_anode(struct fnode *p) +{ + return (p->flags & FNODE_anode) != 0; +} + +static inline bool fnode_is_dir(struct fnode *p) +{ + return (p->flags & FNODE_dir) != 0; +} + /* anode: 99.44% pure allocation tree */ diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index b43066cbdc6a7c..ed671e0ea78443 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -110,7 +110,7 @@ void hpfs_read_inode(struct inode *i) } } } - if (fnode->dirflag) { + if (fnode_is_dir(fnode)) { int n_dnodes, n_subdirs; i->i_mode |= S_IFDIR; i->i_op = &hpfs_dir_iops; diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index a790821366a7f0..fffcb330694e34 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -130,7 +130,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea (unsigned long)ino); goto bail; } - if (!fnode->dirflag) { + if (!fnode_is_dir(fnode)) { if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != (fnode->btree.internal ? 12 : 8)) { hpfs_error(s, diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 30dd7b10b507a0..9083ef8af58c16 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -70,7 +70,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fnode->len = len; memcpy(fnode->name, name, len > 15 ? 15 : len); fnode->up = cpu_to_le32(dir->i_ino); - fnode->dirflag = 1; + fnode->flags |= FNODE_dir; fnode->btree.n_free_nodes = 7; fnode->btree.n_used_nodes = 1; fnode->btree.first_free = cpu_to_le16(0x14); From 52576da3545e78c534d901a39f6f2391665c641b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 15:28:51 -0400 Subject: [PATCH 24/95] hpfs: bitmaps are little-endian annotate properly... Signed-off-by: Al Viro --- fs/hpfs/alloc.c | 14 +++++++------- fs/hpfs/hpfs_fn.h | 6 +++--- fs/hpfs/map.c | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index 7a5eb2c718c854..cdb84a8380682b 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c @@ -16,9 +16,9 @@ static int chk_if_allocated(struct super_block *s, secno sec, char *msg) { struct quad_buffer_head qbh; - u32 *bmp; + __le32 *bmp; if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail; - if ((cpu_to_le32(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) { + if ((le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) { hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec); goto fail1; } @@ -62,7 +62,7 @@ int hpfs_chk_sectors(struct super_block *s, secno start, int len, char *msg) static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigned forward) { struct quad_buffer_head qbh; - unsigned *bmp; + __le32 *bmp; unsigned bs = near & ~0x3fff; unsigned nr = (near & 0x3fff) & ~(n - 1); /*unsigned mnr;*/ @@ -236,7 +236,7 @@ static secno alloc_in_dirband(struct super_block *s, secno near) int hpfs_alloc_if_possible(struct super_block *s, secno sec) { struct quad_buffer_head qbh; - u32 *bmp; + __le32 *bmp; if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end; if (le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) & (1 << (sec & 0x1f))) { bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f))); @@ -254,7 +254,7 @@ int hpfs_alloc_if_possible(struct super_block *s, secno sec) void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n) { struct quad_buffer_head qbh; - u32 *bmp; + __le32 *bmp; struct hpfs_sb_info *sbi = hpfs_sb(s); /*printk("2 - ");*/ if (!n) return; @@ -299,7 +299,7 @@ int hpfs_check_free_dnodes(struct super_block *s, int n) int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14; int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff; int i, j; - u32 *bmp; + __le32 *bmp; struct quad_buffer_head qbh; if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) { for (j = 0; j < 512; j++) { @@ -351,7 +351,7 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno) hpfs_free_sectors(s, dno, 4); } else { struct quad_buffer_head qbh; - u32 *bmp; + __le32 *bmp; unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4; if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { return; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index de946170ebb109..88f096d9dbee6c 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -178,7 +178,7 @@ static inline void copy_de(struct hpfs_dirent *dst, struct hpfs_dirent *src) dst->not_8x3 = n; } -static inline unsigned tstbits(u32 *bmp, unsigned b, unsigned n) +static inline unsigned tstbits(__le32 *bmp, unsigned b, unsigned n) { int i; if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n; @@ -275,8 +275,8 @@ void hpfs_evict_inode(struct inode *); /* map.c */ -unsigned *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); -unsigned *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); +__le32 *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); +__le32 *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); unsigned char *hpfs_load_code_page(struct super_block *, secno); secno *hpfs_load_bitmap_directory(struct super_block *, secno bmp); struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index fffcb330694e34..bbb174df7f95b4 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -8,12 +8,12 @@ #include "hpfs_fn.h" -unsigned *hpfs_map_dnode_bitmap(struct super_block *s, struct quad_buffer_head *qbh) +__le32 *hpfs_map_dnode_bitmap(struct super_block *s, struct quad_buffer_head *qbh) { return hpfs_map_4sectors(s, hpfs_sb(s)->sb_dmap, qbh, 0); } -unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, +__le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, struct quad_buffer_head *qbh, char *id) { secno sec; From 39413c6046de282a92739110cfafb8f1e862680d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 15:32:22 -0400 Subject: [PATCH 25/95] hpfs: annotate struct dnode little-endians... Signed-off-by: Al Viro --- fs/hpfs/hpfs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 37cc4483fbd19d..b4e035ce65dbf1 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -278,8 +278,8 @@ struct code_page_data #define DNODE_MAGIC 0x77e40aae struct dnode { - u32 magic; /* 77e4 0aae */ - u32 first_free; /* offset from start of dnode to + __le32 magic; /* 77e4 0aae */ + __le32 first_free; /* offset from start of dnode to first free dir entry */ #ifdef __LITTLE_ENDIAN u8 root_dnode: 1; /* Is it root dnode? */ @@ -293,9 +293,9 @@ struct dnode { u8 root_dnode: 1; /* Is it root dnode? */ #endif u8 increment_me2[3]; - secno up; /* (root dnode) directory's fnode + __le32 up; /* (root dnode) directory's fnode (nonroot) parent dnode */ - dnode_secno self; /* pointer to this dnode */ + __le32 self; /* pointer to this dnode */ u8 dirent[2028]; /* one or more dirents */ }; From ddc19e6e04c1131a48f5b9a25aa433bbd8430cdd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 15:59:35 -0400 Subject: [PATCH 26/95] hpfs: annotate btree nodes, get rid of bitfields mess Signed-off-by: Al Viro --- fs/hpfs/anode.c | 35 ++++++++++++++++------------- fs/hpfs/hpfs.h | 60 +++++++++++++++++++++++-------------------------- fs/hpfs/map.c | 8 +++---- 3 files changed, 52 insertions(+), 51 deletions(-) diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index ec5f8b9e5c2a7e..4bae4a4a60b193 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c @@ -20,7 +20,7 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode, int c1, c2 = 0; go_down: if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1; - if (btree->internal) { + if (bp_internal(btree)) { for (i = 0; i < btree->n_used_nodes; i++) if (le32_to_cpu(btree->u.internal[i].file_secno) > sec) { a = le32_to_cpu(btree->u.internal[i].down); @@ -82,7 +82,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi brelse(bh); return -1; } - if (btree->internal) { + if (bp_internal(btree)) { a = le32_to_cpu(btree->u.internal[n].down); btree->u.internal[n].file_secno = cpu_to_le32(-1); mark_buffer_dirty(bh); @@ -129,12 +129,12 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } if (a == node && fnod) { anode->up = cpu_to_le32(node); - anode->btree.fnode_parent = 1; + anode->btree.flags |= BP_fnode_parent; anode->btree.n_used_nodes = btree->n_used_nodes; anode->btree.first_free = btree->first_free; anode->btree.n_free_nodes = 40 - anode->btree.n_used_nodes; memcpy(&anode->u, &btree->u, btree->n_used_nodes * 12); - btree->internal = 1; + btree->flags |= BP_internal; btree->n_free_nodes = 11; btree->n_used_nodes = 1; btree->first_free = cpu_to_le16((char *)&(btree->u.internal[1]) - (char *)btree); @@ -184,7 +184,10 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi hpfs_free_sectors(s, ra, 1); if ((anode = hpfs_map_anode(s, na, &bh))) { anode->up = cpu_to_le32(up); - anode->btree.fnode_parent = up == node && fnod; + if (up == node && fnod) + anode->btree.flags |= BP_fnode_parent; + else + anode->btree.flags &= ~BP_fnode_parent; mark_buffer_dirty(bh); brelse(bh); } @@ -198,7 +201,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) { anode = new_anode; /*anode->up = cpu_to_le32(up != -1 ? up : ra);*/ - anode->btree.internal = 1; + anode->btree.flags |= BP_internal; anode->btree.n_used_nodes = 1; anode->btree.n_free_nodes = 59; anode->btree.first_free = cpu_to_le16(16); @@ -215,7 +218,8 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } if ((anode = hpfs_map_anode(s, na, &bh))) { anode->up = cpu_to_le32(node); - if (fnod) anode->btree.fnode_parent = 1; + if (fnod) + anode->btree.flags |= BP_fnode_parent; mark_buffer_dirty(bh); brelse(bh); } @@ -234,18 +238,19 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } ranode->up = cpu_to_le32(node); memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free)); - if (fnod) ranode->btree.fnode_parent = 1; - ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes; - if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) { + if (fnod) + ranode->btree.flags |= BP_fnode_parent; + ranode->btree.n_free_nodes = (bp_internal(&ranode->btree) ? 60 : 40) - ranode->btree.n_used_nodes; + if (bp_internal(&ranode->btree)) for (n = 0; n < ranode->btree.n_used_nodes; n++) { struct anode *unode; if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) { unode->up = cpu_to_le32(ra); - unode->btree.fnode_parent = 0; + unode->btree.flags &= ~BP_fnode_parent; mark_buffer_dirty(bh1); brelse(bh1); } } - btree->internal = 1; + btree->flags |= BP_internal; btree->n_free_nodes = fnod ? 10 : 58; btree->n_used_nodes = 2; btree->first_free = cpu_to_le16((char *)&btree->u.internal[2] - (char *)btree); @@ -278,7 +283,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree) int d1, d2; go_down: d2 = 0; - while (btree1->internal) { + while (bp_internal(btree1)) { ano = le32_to_cpu(btree1->u.internal[pos].down); if (level) brelse(bh); if (hpfs_sb(s)->sb_chk) @@ -412,13 +417,13 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs) btree->n_free_nodes = 8; btree->n_used_nodes = 0; btree->first_free = cpu_to_le16(8); - btree->internal = 0; + btree->flags &= ~BP_internal; mark_buffer_dirty(bh); } else hpfs_free_sectors(s, f, 1); brelse(bh); return; } - while (btree->internal) { + while (bp_internal(btree)) { nodes = btree->n_used_nodes + btree->n_free_nodes; for (i = 0; i < btree->n_used_nodes; i++) if (le32_to_cpu(btree->u.internal[i].file_secno) >= secs) goto f; diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index b4e035ce65dbf1..49d9315492d8c8 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -375,50 +375,36 @@ struct hpfs_dirent { struct bplus_leaf_node { - u32 file_secno; /* first file sector in extent */ - u32 length; /* length, sectors */ - secno disk_secno; /* first corresponding disk sector */ + __le32 file_secno; /* first file sector in extent */ + __le32 length; /* length, sectors */ + __le32 disk_secno; /* first corresponding disk sector */ }; struct bplus_internal_node { - u32 file_secno; /* subtree maps sectors < this */ - anode_secno down; /* pointer to subtree */ + __le32 file_secno; /* subtree maps sectors < this */ + __le32 down; /* pointer to subtree */ }; +enum { + BP_hbff = 1, + BP_fnode_parent = 0x20, + BP_binary_search = 0x40, + BP_internal = 0x80 +}; struct bplus_header { -#ifdef __LITTLE_ENDIAN - u8 hbff: 1; /* high bit of first free entry offset */ - u8 flag1234: 4; - u8 fnode_parent: 1; /* ? we're pointed to by an fnode, - the data btree or some ea or the - main ea bootage pointer ea_secno */ - /* also can get set in fnodes, which - may be a chkdsk glitch or may mean - this bit is irrelevant in fnodes, - or this interpretation is all wet */ - u8 binary_search: 1; /* suggest binary search (unused) */ - u8 internal: 1; /* 1 -> (internal) tree of anodes - 0 -> (leaf) list of extents */ -#else - u8 internal: 1; /* 1 -> (internal) tree of anodes - 0 -> (leaf) list of extents */ - u8 binary_search: 1; /* suggest binary search (unused) */ - u8 fnode_parent: 1; /* ? we're pointed to by an fnode, + u8 flags; /* bit 0 - high bit of first free entry offset + bit 5 - we're pointed to by an fnode, the data btree or some ea or the - main ea bootage pointer ea_secno */ - /* also can get set in fnodes, which - may be a chkdsk glitch or may mean - this bit is irrelevant in fnodes, - or this interpretation is all wet */ - u8 flag1234: 4; - u8 hbff: 1; /* high bit of first free entry offset */ -#endif + main ea bootage pointer ea_secno + bit 6 - suggest binary search (unused) + bit 7 - 1 -> (internal) tree of anodes + 0 -> (leaf) list of extents */ u8 fill[3]; u8 n_free_nodes; /* free nodes in following array */ u8 n_used_nodes; /* used nodes in following array */ - u16 first_free; /* offset from start of header to + __le16 first_free; /* offset from start of header to first free node in array */ union { struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving @@ -428,6 +414,16 @@ struct bplus_header } u; }; +static inline bool bp_internal(struct bplus_header *bp) +{ + return bp->flags & BP_internal; +} + +static inline bool bp_fnode_parent(struct bplus_header *bp) +{ + return bp->flags & BP_fnode_parent; +} + /* fnode: root of allocation b+ tree, and EA's */ /* Every file and every directory has one fnode, pointed to by the directory diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index bbb174df7f95b4..d8bed6da053cfa 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -132,14 +132,14 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea } if (!fnode_is_dir(fnode)) { if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != - (fnode->btree.internal ? 12 : 8)) { + (bp_internal(&fnode->btree) ? 12 : 8)) { hpfs_error(s, "bad number of nodes in fnode %08lx", (unsigned long)ino); goto bail; } if (le16_to_cpu(fnode->btree.first_free) != - 8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) { + 8 + fnode->btree.n_used_nodes * (bp_internal(&fnode->btree) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in fnode %08lx", (unsigned long)ino); @@ -187,12 +187,12 @@ struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buff goto bail; } if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != - (anode->btree.internal ? 60 : 40)) { + (bp_internal(&anode->btree) ? 60 : 40)) { hpfs_error(s, "bad number of nodes in anode %08x", ano); goto bail; } if (le16_to_cpu(anode->btree.first_free) != - 8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) { + 8 + anode->btree.n_used_nodes * (bp_internal(&anode->btree) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in anode %08x", ano); goto bail; } From 2b9f1cc29ba0e56089fe04501ec6d3b49eee3c3e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 16:09:25 -0400 Subject: [PATCH 27/95] hpfs: annotate struct fnode Signed-off-by: Al Viro --- fs/hpfs/hpfs.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 49d9315492d8c8..b66c8b21dcec5e 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -435,18 +435,18 @@ static inline bool bp_fnode_parent(struct bplus_header *bp) enum {FNODE_anode = cpu_to_le16(2), FNODE_dir = cpu_to_le16(256)}; struct fnode { - u32 magic; /* f7e4 0aae */ - u32 zero1[2]; /* read history */ + __le32 magic; /* f7e4 0aae */ + __le32 zero1[2]; /* read history */ u8 len, name[15]; /* true length, truncated name */ - fnode_secno up; /* pointer to file's directory fnode */ - secno acl_size_l; - secno acl_secno; - u16 acl_size_s; + __le32 up; /* pointer to file's directory fnode */ + __le32 acl_size_l; + __le32 acl_secno; + __le16 acl_size_s; u8 acl_anode; u8 zero2; /* history bit count */ - u32 ea_size_l; /* length of disk-resident ea's */ - secno ea_secno; /* first sector of disk-resident ea's*/ - u16 ea_size_s; /* length of fnode-resident ea's */ + __le32 ea_size_l; /* length of disk-resident ea's */ + __le32 ea_secno; /* first sector of disk-resident ea's*/ + __le16 ea_size_s; /* length of fnode-resident ea's */ __le16 flags; /* bit 1 set -> ea_secno is an anode */ /* bit 8 set -> directory. first & only extent @@ -457,15 +457,15 @@ struct fnode struct bplus_internal_node internal[12]; } u; - u32 file_size; /* file length, bytes */ - u32 n_needea; /* number of EA's with NEEDEA set */ + __le32 file_size; /* file length, bytes */ + __le32 n_needea; /* number of EA's with NEEDEA set */ u8 user_id[16]; /* unused */ - u16 ea_offs; /* offset from start of fnode + __le16 ea_offs; /* offset from start of fnode to first fnode-resident ea */ u8 dasd_limit_treshhold; u8 dasd_limit_delta; - u32 dasd_limit; - u32 dasd_usage; + __le32 dasd_limit; + __le32 dasd_usage; u8 ea[316]; /* zero or more EA's, packed together with no alignment padding. (Do not use this name, get here From 6ce2bbba5266c1dd5c27dd8af1887ed8ca564919 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 16:11:25 -0400 Subject: [PATCH 28/95] hpfs: annotate struct anode Signed-off-by: Al Viro --- fs/hpfs/hpfs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index b66c8b21dcec5e..f0dc109303323e 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -489,9 +489,9 @@ static inline bool fnode_is_dir(struct fnode *p) struct anode { - u32 magic; /* 37e4 0aae */ - anode_secno self; /* pointer to this anode */ - secno up; /* parent anode or fnode */ + __le32 magic; /* 37e4 0aae */ + __le32 self; /* pointer to this anode */ + __le32 up; /* parent anode or fnode */ struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */ union { @@ -499,7 +499,7 @@ struct anode struct bplus_internal_node internal[60]; } u; - u32 fill[3]; /* unused */ + __le32 fill[3]; /* unused */ }; From 46287aa652fa8ea1edac41817ddc63332495ffc3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 16:20:49 -0400 Subject: [PATCH 29/95] hpfs: annotate struct hpfs_dirent Signed-off-by: Al Viro --- fs/hpfs/dnode.c | 8 ++++---- fs/hpfs/hpfs.h | 14 +++++++------- fs/hpfs/hpfs_fn.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 6bf9fdeba6f2a9..3228c524ebe56f 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c @@ -153,7 +153,7 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno } de->length = cpu_to_le16(36); de->down = 1; - *(dnode_secno *)((char *)de + 32) = cpu_to_le32(ptr); + *(__le32 *)((char *)de + 32) = cpu_to_le32(ptr); } } @@ -177,7 +177,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d, memmove((char *)de + d_size, de, (char *)de_end - (char *)de); memset(de, 0, d_size); if (down_ptr) { - *(dnode_secno *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr); + *(__le32 *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr); de->down = 1; } de->length = cpu_to_le16(d_size); @@ -656,7 +656,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) del->down = 0; d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4); } else if (down) - *(dnode_secno *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); + *(__le32 *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); } else goto endm; if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) { printk("HPFS: out of memory for dtree balancing\n"); @@ -672,7 +672,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) de_prev->down = 1; dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4); } - *(dnode_secno *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); + *(__le32 *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4); diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index f0dc109303323e..051ff455b26da2 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -300,7 +300,7 @@ struct dnode { }; struct hpfs_dirent { - u16 length; /* offset to next dirent */ + __le16 length; /* offset to next dirent */ #ifdef __LITTLE_ENDIAN u8 first: 1; /* set on phony ^A^A (".") entry */ @@ -346,12 +346,12 @@ struct hpfs_dirent { u8 read_only: 1; /* dos attrib */ #endif - fnode_secno fnode; /* fnode giving allocation info */ - time32_t write_date; /* mtime */ - u32 file_size; /* file length, bytes */ - time32_t read_date; /* atime */ - time32_t creation_date; /* ctime */ - u32 ea_size; /* total EA length, bytes */ + __le32 fnode; /* fnode giving allocation info */ + __le32 write_date; /* mtime */ + __le32 file_size; /* file length, bytes */ + __le32 read_date; /* atime */ + __le32 creation_date; /* ctime */ + __le32 ea_size; /* total EA length, bytes */ u8 no_of_acls; /* number of ACL's (low 3 bits) */ u8 ix; /* code page index (of filename), see struct code_page_data */ diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 88f096d9dbee6c..067269143543c2 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -100,7 +100,7 @@ struct quad_buffer_head { static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) { CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n")); - return le32_to_cpu(*(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4)); + return le32_to_cpu(*(__le32 *) ((void *) de + le16_to_cpu(de->length) - 4)); } /* The first dir entry in a dnode */ From 77ee26e44c28823a29bc09091950544566ae7cea Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 16:26:46 -0400 Subject: [PATCH 30/95] hpfs: annotate ea Signed-off-by: Al Viro --- fs/hpfs/hpfs_fn.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 067269143543c2..1acb40f55a3a8b 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -148,12 +148,12 @@ static inline struct extended_attribute *next_ea(struct extended_attribute *ea) static inline secno ea_sec(struct extended_attribute *ea) { - return le32_to_cpu(get_unaligned((secno *)((char *)ea + 9 + ea->namelen))); + return le32_to_cpu(get_unaligned((__le32 *)((char *)ea + 9 + ea->namelen))); } static inline secno ea_len(struct extended_attribute *ea) { - return le32_to_cpu(get_unaligned((secno *)((char *)ea + 5 + ea->namelen))); + return le32_to_cpu(get_unaligned((__le32 *)((char *)ea + 5 + ea->namelen))); } static inline char *ea_data(struct extended_attribute *ea) From 28fe3c1963b0bafa56ec92df1987828090151d87 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 16:41:13 -0400 Subject: [PATCH 31/95] hpfs: assorted endianness annotations Signed-off-by: Al Viro --- fs/hpfs/hpfs.h | 108 +++++++++++++++++++++++----------------------- fs/hpfs/hpfs_fn.h | 4 +- fs/hpfs/map.c | 6 +-- fs/hpfs/super.c | 2 +- 4 files changed, 60 insertions(+), 60 deletions(-) diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 051ff455b26da2..cce025aff1b19b 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -51,11 +51,11 @@ struct hpfs_boot_block u8 n_rootdir_entries[2]; u8 n_sectors_s[2]; u8 media_byte; - u16 sectors_per_fat; - u16 sectors_per_track; - u16 heads_per_cyl; - u32 n_hidden_sectors; - u32 n_sectors_l; /* size of partition */ + __le16 sectors_per_fat; + __le16 sectors_per_track; + __le16 heads_per_cyl; + __le32 n_hidden_sectors; + __le32 n_sectors_l; /* size of partition */ u8 drive_number; u8 mbz; u8 sig_28h; /* 28h */ @@ -63,7 +63,7 @@ struct hpfs_boot_block u8 vol_label[11]; u8 sig_hpfs[8]; /* "HPFS " */ u8 pad[448]; - u16 magic; /* aa55 */ + __le16 magic; /* aa55 */ }; @@ -75,28 +75,28 @@ struct hpfs_boot_block struct hpfs_super_block { - u32 magic; /* f995 e849 */ - u32 magic1; /* fa53 e9c5, more magic? */ + __le32 magic; /* f995 e849 */ + __le32 magic1; /* fa53 e9c5, more magic? */ u8 version; /* version of a filesystem usually 2 */ u8 funcversion; /* functional version - oldest version of filesystem that can understand this disk */ - u16 zero; /* 0 */ - fnode_secno root; /* fnode of root directory */ - secno n_sectors; /* size of filesystem */ - u32 n_badblocks; /* number of bad blocks */ - secno bitmaps; /* pointers to free space bit maps */ - u32 zero1; /* 0 */ - secno badblocks; /* bad block list */ - u32 zero3; /* 0 */ - time32_t last_chkdsk; /* date last checked, 0 if never */ - time32_t last_optimize; /* date last optimized, 0 if never */ - secno n_dir_band; /* number of sectors in dir band */ - secno dir_band_start; /* first sector in dir band */ - secno dir_band_end; /* last sector in dir band */ - secno dir_band_bitmap; /* free space map, 1 dnode per bit */ + __le16 zero; /* 0 */ + __le32 root; /* fnode of root directory */ + __le32 n_sectors; /* size of filesystem */ + __le32 n_badblocks; /* number of bad blocks */ + __le32 bitmaps; /* pointers to free space bit maps */ + __le32 zero1; /* 0 */ + __le32 badblocks; /* bad block list */ + __le32 zero3; /* 0 */ + __le32 last_chkdsk; /* date last checked, 0 if never */ + __le32 last_optimize; /* date last optimized, 0 if never */ + __le32 n_dir_band; /* number of sectors in dir band */ + __le32 dir_band_start; /* first sector in dir band */ + __le32 dir_band_end; /* last sector in dir band */ + __le32 dir_band_bitmap; /* free space map, 1 dnode per bit */ u8 volume_name[32]; /* not used */ - secno user_id_table; /* 8 preallocated sectors - user id */ + __le32 user_id_table; /* 8 preallocated sectors - user id */ u32 zero6[103]; /* 0 */ }; @@ -109,8 +109,8 @@ struct hpfs_super_block struct hpfs_spare_block { - u32 magic; /* f991 1849 */ - u32 magic1; /* fa52 29c5, more magic? */ + __le32 magic; /* f991 1849 */ + __le32 magic1; /* fa52 29c5, more magic? */ #ifdef __LITTLE_ENDIAN u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */ @@ -153,21 +153,21 @@ struct hpfs_spare_block u8 mm_contlgulty; u8 unused; - secno hotfix_map; /* info about remapped bad sectors */ - u32 n_spares_used; /* number of hotfixes */ - u32 n_spares; /* number of spares in hotfix map */ - u32 n_dnode_spares_free; /* spare dnodes unused */ - u32 n_dnode_spares; /* length of spare_dnodes[] list, + __le32 hotfix_map; /* info about remapped bad sectors */ + __le32 n_spares_used; /* number of hotfixes */ + __le32 n_spares; /* number of spares in hotfix map */ + __le32 n_dnode_spares_free; /* spare dnodes unused */ + __le32 n_dnode_spares; /* length of spare_dnodes[] list, follows in this block*/ - secno code_page_dir; /* code page directory block */ - u32 n_code_pages; /* number of code pages */ - u32 super_crc; /* on HPFS386 and LAN Server this is + __le32 code_page_dir; /* code page directory block */ + __le32 n_code_pages; /* number of code pages */ + __le32 super_crc; /* on HPFS386 and LAN Server this is checksum of superblock, on normal OS/2 unused */ - u32 spare_crc; /* on HPFS386 checksum of spareblock */ - u32 zero1[15]; /* unused */ - dnode_secno spare_dnodes[100]; /* emergency free dnode list */ - u32 zero2[1]; /* room for more? */ + __le32 spare_crc; /* on HPFS386 checksum of spareblock */ + __le32 zero1[15]; /* unused */ + __le32 spare_dnodes[100]; /* emergency free dnode list */ + __le32 zero2[1]; /* room for more? */ }; /* The bad block list is 4 sectors long. The first word must be zero, @@ -202,18 +202,18 @@ struct hpfs_spare_block struct code_page_directory { - u32 magic; /* 4945 21f7 */ - u32 n_code_pages; /* number of pointers following */ - u32 zero1[2]; + __le32 magic; /* 4945 21f7 */ + __le32 n_code_pages; /* number of pointers following */ + __le32 zero1[2]; struct { - u16 ix; /* index */ - u16 code_page_number; /* code page number */ - u32 bounds; /* matches corresponding word + __le16 ix; /* index */ + __le16 code_page_number; /* code page number */ + __le32 bounds; /* matches corresponding word in data block */ - secno code_page_data; /* sector number of a code_page_data + __le32 code_page_data; /* sector number of a code_page_data containing c.p. array */ - u16 index; /* index in c.p. array in that sector*/ - u16 unknown; /* some unknown value; usually 0; + __le16 index; /* index in c.p. array in that sector*/ + __le16 unknown; /* some unknown value; usually 0; 2 in Japanese version */ } array[31]; /* unknown length */ }; @@ -224,19 +224,19 @@ struct code_page_directory struct code_page_data { - u32 magic; /* 8945 21f7 */ - u32 n_used; /* # elements used in c_p_data[] */ - u32 bounds[3]; /* looks a bit like + __le32 magic; /* 8945 21f7 */ + __le32 n_used; /* # elements used in c_p_data[] */ + __le32 bounds[3]; /* looks a bit like (beg1,end1), (beg2,end2) one byte each */ - u16 offs[3]; /* offsets from start of sector + __le16 offs[3]; /* offsets from start of sector to start of c_p_data[ix] */ struct { - u16 ix; /* index */ - u16 code_page_number; /* code page number */ - u16 unknown; /* the same as in cp directory */ + __le16 ix; /* index */ + __le16 code_page_number; /* code page number */ + __le16 unknown; /* the same as in cp directory */ u8 map[128]; /* upcase table for chars 80..ff */ - u16 zero2; + __le16 zero2; } code_page[3]; u8 incognita[78]; }; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 1acb40f55a3a8b..8515bbbc9e2e91 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -82,7 +82,7 @@ struct hpfs_sb_info { unsigned char *sb_cp_table; /* code page tables: */ /* 128 bytes uppercasing table & */ /* 128 bytes lowercasing table */ - unsigned *sb_bmp_dir; /* main bitmap directory */ + __le32 *sb_bmp_dir; /* main bitmap directory */ unsigned sb_c_bitmap; /* current bitmap */ unsigned sb_max_fwd_alloc; /* max forwad allocation */ int sb_timeshift; @@ -278,7 +278,7 @@ void hpfs_evict_inode(struct inode *); __le32 *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); __le32 *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); unsigned char *hpfs_load_code_page(struct super_block *, secno); -secno *hpfs_load_bitmap_directory(struct super_block *, secno bmp); +__le32 *hpfs_load_bitmap_directory(struct super_block *, secno bmp); struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); struct anode *hpfs_map_anode(struct super_block *s, anode_secno, struct buffer_head **); struct dnode *hpfs_map_dnode(struct super_block *s, dnode_secno, struct quad_buffer_head *); diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index d8bed6da053cfa..4acb19d78359d4 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -89,18 +89,18 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps) return cp_table; } -secno *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) +__le32 *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) { struct buffer_head *bh; int n = (hpfs_sb(s)->sb_fs_size + 0x200000 - 1) >> 21; int i; - secno *b; + __le32 *b; if (!(b = kmalloc(n * 512, GFP_KERNEL))) { printk("HPFS: can't allocate memory for bitmap directory\n"); return NULL; } for (i=0;ihotfixes_used || le32_to_cpu(spareblock->n_spares_used)) { + if (spareblock->hotfixes_used || spareblock->n_spares_used) { if (errs >= 2) { printk("HPFS: Hotfixes not supported here, try chkdsk\n"); mark_dirty(s, 0); From 408bd629badbd4353b238ab6f58001529b274d73 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 May 2012 09:34:20 -0400 Subject: [PATCH 32/95] get rid of pointless allocations and copying in ecryptfs_follow_link() switch to generic_readlink(), while we are at it Signed-off-by: Al Viro --- fs/ecryptfs/inode.c | 48 +++++++++------------------------------------ 1 file changed, 9 insertions(+), 39 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index ab35b113003b90..a07441a0a8789a 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -660,11 +660,10 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); char *lower_buf; - size_t lower_bufsiz = PATH_MAX; mm_segment_t old_fs; int rc; - lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); + lower_buf = kmalloc(PATH_MAX, GFP_KERNEL); if (!lower_buf) { rc = -ENOMEM; goto out; @@ -673,58 +672,29 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, set_fs(get_ds()); rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, (char __user *)lower_buf, - lower_bufsiz); + PATH_MAX); set_fs(old_fs); if (rc < 0) goto out; - lower_bufsiz = rc; rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry, - lower_buf, lower_bufsiz); + lower_buf, rc); out: kfree(lower_buf); return rc; } -static int -ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) { - char *kbuf; - size_t kbufsiz, copied; + char *buf; + size_t len = PATH_MAX; int rc; - rc = ecryptfs_readlink_lower(dentry, &kbuf, &kbufsiz); + rc = ecryptfs_readlink_lower(dentry, &buf, &len); if (rc) goto out; - copied = min_t(size_t, bufsiz, kbufsiz); - rc = copy_to_user(buf, kbuf, copied) ? -EFAULT : copied; - kfree(kbuf); fsstack_copy_attr_atime(dentry->d_inode, ecryptfs_dentry_to_lower(dentry)->d_inode); -out: - return rc; -} - -static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - char *buf; - int len = PAGE_SIZE, rc; - mm_segment_t old_fs; - - /* Released in ecryptfs_put_link(); only release here on error */ - buf = kmalloc(len, GFP_KERNEL); - if (!buf) { - buf = ERR_PTR(-ENOMEM); - goto out; - } - old_fs = get_fs(); - set_fs(get_ds()); - rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); - set_fs(old_fs); - if (rc < 0) { - kfree(buf); - buf = ERR_PTR(rc); - } else - buf[rc] = '\0'; + buf[len] = '\0'; out: nd_set_link(nd, buf); return NULL; @@ -1153,7 +1123,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name) } const struct inode_operations ecryptfs_symlink_iops = { - .readlink = ecryptfs_readlink, + .readlink = generic_readlink, .follow_link = ecryptfs_follow_link, .put_link = ecryptfs_put_link, .permission = ecryptfs_permission, From ea022dfb3c2a4680483b00eb2fecc9fc4f6091d1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 May 2012 10:14:29 -0400 Subject: [PATCH 33/95] ocfs: simplify symlink handling seeing that "fast" symlinks still get allocation + copy, we might as well simply switch them to pagecache-based variant of ->follow_link(); just need an appropriate ->readpage() for them... Signed-off-by: Al Viro --- fs/ocfs2/inode.c | 13 +++-- fs/ocfs2/namei.c | 5 +- fs/ocfs2/symlink.c | 115 +++++++++------------------------------------ fs/ocfs2/symlink.h | 2 +- 4 files changed, 31 insertions(+), 104 deletions(-) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 735514ca400f79..d89e08a81eda88 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -273,11 +273,13 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, inode->i_gid = le32_to_cpu(fe->i_gid); /* Fast symlinks will have i_size but no allocated clusters. */ - if (S_ISLNK(inode->i_mode) && !fe->i_clusters) + if (S_ISLNK(inode->i_mode) && !fe->i_clusters) { inode->i_blocks = 0; - else + inode->i_mapping->a_ops = &ocfs2_fast_symlink_aops; + } else { inode->i_blocks = ocfs2_inode_sector_count(inode); - inode->i_mapping->a_ops = &ocfs2_aops; + inode->i_mapping->a_ops = &ocfs2_aops; + } inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); @@ -331,10 +333,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, OCFS2_I(inode)->ip_dir_lock_gen = 1; break; case S_IFLNK: - if (ocfs2_inode_is_fast_symlink(inode)) - inode->i_op = &ocfs2_fast_symlink_inode_operations; - else - inode->i_op = &ocfs2_symlink_inode_operations; + inode->i_op = &ocfs2_symlink_inode_operations; i_size_write(inode, le64_to_cpu(fe->i_size)); break; default: diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index a9856e3eaaf097..9f39c640cddf20 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1724,15 +1724,16 @@ static int ocfs2_symlink(struct inode *dir, fe = (struct ocfs2_dinode *) new_fe_bh->b_data; inode->i_rdev = 0; newsize = l - 1; + inode->i_op = &ocfs2_symlink_inode_operations; if (l > ocfs2_fast_symlink_chars(sb)) { u32 offset = 0; - inode->i_op = &ocfs2_symlink_inode_operations; status = dquot_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, 1)); if (status) goto bail; did_quota = 1; + inode->i_mapping->a_ops = &ocfs2_aops; status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, new_fe_bh, handle, data_ac, NULL, @@ -1750,7 +1751,7 @@ static int ocfs2_symlink(struct inode *dir, i_size_write(inode, newsize); inode->i_blocks = ocfs2_inode_sector_count(inode); } else { - inode->i_op = &ocfs2_fast_symlink_inode_operations; + inode->i_mapping->a_ops = &ocfs2_fast_symlink_aops; memcpy((char *) fe->id2.i_symlink, symname, l); i_size_write(inode, newsize); inode->i_blocks = 0; diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 5d22872e2bb360..f1fbb4b552ad36 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -54,101 +54,40 @@ #include "buffer_head_io.h" -static char *ocfs2_fast_symlink_getlink(struct inode *inode, - struct buffer_head **bh) +static int ocfs2_fast_symlink_readpage(struct file *unused, struct page *page) { - int status; - char *link = NULL; + struct inode *inode = page->mapping->host; + struct buffer_head *bh; + int status = ocfs2_read_inode_block(inode, &bh); struct ocfs2_dinode *fe; + const char *link; + void *kaddr; + size_t len; - status = ocfs2_read_inode_block(inode, bh); if (status < 0) { mlog_errno(status); - link = ERR_PTR(status); - goto bail; + return status; } - fe = (struct ocfs2_dinode *) (*bh)->b_data; + fe = (struct ocfs2_dinode *) bh->b_data; link = (char *) fe->id2.i_symlink; -bail: - - return link; -} - -static int ocfs2_readlink(struct dentry *dentry, - char __user *buffer, - int buflen) -{ - int ret; - char *link; - struct buffer_head *bh = NULL; - struct inode *inode = dentry->d_inode; - - link = ocfs2_fast_symlink_getlink(inode, &bh); - if (IS_ERR(link)) { - ret = PTR_ERR(link); - goto out; - } - - /* - * Without vfsmount we can't update atime now, - * but we will update atime here ultimately. - */ - ret = vfs_readlink(dentry, buffer, buflen, link); - + /* will be less than a page size */ + len = strnlen(link, ocfs2_fast_symlink_chars(inode->i_sb)); + kaddr = kmap_atomic(page); + memcpy(kaddr, link, len + 1); + kunmap_atomic(kaddr); + SetPageUptodate(page); + unlock_page(page); brelse(bh); -out: - if (ret < 0) - mlog_errno(ret); - return ret; + return 0; } -static void *ocfs2_fast_follow_link(struct dentry *dentry, - struct nameidata *nd) -{ - int status = 0; - int len; - char *target, *link = ERR_PTR(-ENOMEM); - struct inode *inode = dentry->d_inode; - struct buffer_head *bh = NULL; - - BUG_ON(!ocfs2_inode_is_fast_symlink(inode)); - target = ocfs2_fast_symlink_getlink(inode, &bh); - if (IS_ERR(target)) { - status = PTR_ERR(target); - mlog_errno(status); - goto bail; - } - - /* Fast symlinks can't be large */ - len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb)); - link = kzalloc(len + 1, GFP_NOFS); - if (!link) { - status = -ENOMEM; - mlog_errno(status); - goto bail; - } - - memcpy(link, target, len); - -bail: - nd_set_link(nd, status ? ERR_PTR(status) : link); - brelse(bh); - - if (status) - mlog_errno(status); - return NULL; -} - -static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) -{ - char *link = nd_get_link(nd); - if (!IS_ERR(link)) - kfree(link); -} +const struct address_space_operations ocfs2_fast_symlink_aops = { + .readpage = ocfs2_fast_symlink_readpage, +}; const struct inode_operations ocfs2_symlink_inode_operations = { - .readlink = page_readlink, + .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, .getattr = ocfs2_getattr, @@ -159,15 +98,3 @@ const struct inode_operations ocfs2_symlink_inode_operations = { .removexattr = generic_removexattr, .fiemap = ocfs2_fiemap, }; -const struct inode_operations ocfs2_fast_symlink_inode_operations = { - .readlink = ocfs2_readlink, - .follow_link = ocfs2_fast_follow_link, - .put_link = ocfs2_fast_put_link, - .getattr = ocfs2_getattr, - .setattr = ocfs2_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ocfs2_listxattr, - .removexattr = generic_removexattr, - .fiemap = ocfs2_fiemap, -}; diff --git a/fs/ocfs2/symlink.h b/fs/ocfs2/symlink.h index 65a6c9c6ad51d1..71ee4245e91922 100644 --- a/fs/ocfs2/symlink.h +++ b/fs/ocfs2/symlink.h @@ -27,7 +27,7 @@ #define OCFS2_SYMLINK_H extern const struct inode_operations ocfs2_symlink_inode_operations; -extern const struct inode_operations ocfs2_fast_symlink_inode_operations; +extern const struct address_space_operations ocfs2_fast_symlink_aops; /* * Test whether an inode is a fast symlink. From 9dd6fa03ab31bb57cee4623a689d058d222fbe68 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 8 May 2012 13:29:45 +0930 Subject: [PATCH 34/95] lglock: remove online variants of lock Optimizing the slow paths adds a lot of complexity. If you need to grab every lock often, you have other problems. Signed-off-by: Rusty Russell Acked-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/lglock.h | 58 ++---------------------------------------- 1 file changed, 2 insertions(+), 56 deletions(-) diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 87f402ccec5556..0fdd821e77b702 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -28,8 +28,8 @@ #define br_lock_init(name) name##_lock_init() #define br_read_lock(name) name##_local_lock() #define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock_online() -#define br_write_unlock(name) name##_global_unlock_online() +#define br_write_lock(name) name##_global_lock() +#define br_write_unlock(name) name##_global_unlock() #define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) @@ -42,8 +42,6 @@ #define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) #define lg_global_lock(name) name##_global_lock() #define lg_global_unlock(name) name##_global_unlock() -#define lg_global_lock_online(name) name##_global_lock_online() -#define lg_global_unlock_online(name) name##_global_unlock_online() #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -68,36 +66,13 @@ extern void name##_local_unlock_cpu(int cpu); \ extern void name##_global_lock(void); \ extern void name##_global_unlock(void); \ - extern void name##_global_lock_online(void); \ - extern void name##_global_unlock_online(void); \ #define DEFINE_LGLOCK(name) \ \ DEFINE_SPINLOCK(name##_cpu_lock); \ - cpumask_t name##_cpus __read_mostly; \ DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ DEFINE_LGLOCK_LOCKDEP(name); \ \ - static int \ - name##_lg_cpu_callback(struct notifier_block *nb, \ - unsigned long action, void *hcpu) \ - { \ - switch (action & ~CPU_TASKS_FROZEN) { \ - case CPU_UP_PREPARE: \ - spin_lock(&name##_cpu_lock); \ - cpu_set((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - break; \ - case CPU_UP_CANCELED: case CPU_DEAD: \ - spin_lock(&name##_cpu_lock); \ - cpu_clear((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - } \ - return NOTIFY_OK; \ - } \ - static struct notifier_block name##_lg_cpu_notifier = { \ - .notifier_call = name##_lg_cpu_callback, \ - }; \ void name##_lock_init(void) { \ int i; \ LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ @@ -106,11 +81,6 @@ lock = &per_cpu(name##_lock, i); \ *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ } \ - register_hotcpu_notifier(&name##_lg_cpu_notifier); \ - get_online_cpus(); \ - for_each_online_cpu(i) \ - cpu_set(i, name##_cpus); \ - put_online_cpus(); \ } \ EXPORT_SYMBOL(name##_lock_init); \ \ @@ -150,30 +120,6 @@ } \ EXPORT_SYMBOL(name##_local_unlock_cpu); \ \ - void name##_global_lock_online(void) { \ - int i; \ - spin_lock(&name##_cpu_lock); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock_online); \ - \ - void name##_global_unlock_online(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - spin_unlock(&name##_cpu_lock); \ - } \ - EXPORT_SYMBOL(name##_global_unlock_online); \ - \ void name##_global_lock(void) { \ int i; \ preempt_disable(); \ From eea62f831b8030b0eeea8314eed73b6132d1de26 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 8 May 2012 13:32:24 +0930 Subject: [PATCH 35/95] brlocks/lglocks: turn into functions lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. Since there are at least two users it makes sense to share this code in a library. This is also easier maintainable than a macro forest. This will also make it later possible to dynamically allocate lglocks and also use them in modules (this would both still need some additional, but now straightforward, code) [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen Cc: Al Viro Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell Signed-off-by: Al Viro --- fs/file_table.c | 1 - fs/internal.h | 2 +- include/linux/lglock.h | 125 +++++++++-------------------------------- kernel/Makefile | 2 +- kernel/lglock.c | 89 +++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+), 102 deletions(-) create mode 100644 kernel/lglock.c diff --git a/fs/file_table.c b/fs/file_table.c index 70f2a0fd6aec62..f5c67c59ec10ce 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -34,7 +34,6 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -DECLARE_LGLOCK(files_lglock); DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ diff --git a/fs/internal.h b/fs/internal.h index 9962c59ba280b1..8040af489c7829 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -56,7 +56,7 @@ extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); -DECLARE_BRLOCK(vfsmount_lock); +extern struct lglock vfsmount_lock; /* diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 0fdd821e77b702..f01e5f6d1f07a4 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -23,26 +23,17 @@ #include #include #include +#include /* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) name##_lock_init() -#define br_read_lock(name) name##_local_lock() -#define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock() -#define br_write_unlock(name) name##_global_unlock() +#define br_lock_init(name) lg_lock_init(name, #name) +#define br_read_lock(name) lg_local_lock(name) +#define br_read_unlock(name) lg_local_unlock(name) +#define br_write_lock(name) lg_global_lock(name) +#define br_write_unlock(name) lg_global_unlock(name) -#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) - -#define lg_lock_init(name) name##_lock_init() -#define lg_local_lock(name) name##_local_lock() -#define lg_local_unlock(name) name##_local_unlock() -#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) -#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) -#define lg_global_lock(name) name##_global_lock() -#define lg_global_unlock(name) name##_global_unlock() - #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -57,90 +48,26 @@ #define DEFINE_LGLOCK_LOCKDEP(name) #endif - -#define DECLARE_LGLOCK(name) \ - extern void name##_lock_init(void); \ - extern void name##_local_lock(void); \ - extern void name##_local_unlock(void); \ - extern void name##_local_lock_cpu(int cpu); \ - extern void name##_local_unlock_cpu(int cpu); \ - extern void name##_global_lock(void); \ - extern void name##_global_unlock(void); \ +struct lglock { + arch_spinlock_t __percpu *lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lock_key; + struct lockdep_map lock_dep_map; +#endif +}; #define DEFINE_LGLOCK(name) \ - \ - DEFINE_SPINLOCK(name##_cpu_lock); \ - DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ - DEFINE_LGLOCK_LOCKDEP(name); \ - \ - void name##_lock_init(void) { \ - int i; \ - LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ - } \ - } \ - EXPORT_SYMBOL(name##_lock_init); \ - \ - void name##_local_lock(void) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock); \ - \ - void name##_local_unlock(void) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock); \ - \ - void name##_local_lock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock_cpu); \ - \ - void name##_local_unlock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock_cpu); \ - \ - void name##_global_lock(void) { \ - int i; \ - preempt_disable(); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock); \ - \ - void name##_global_unlock(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_global_unlock); + DEFINE_LGLOCK_LOCKDEP(name); \ + DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ + = __ARCH_SPIN_LOCK_UNLOCKED; \ + struct lglock name = { .lock = &name ## _lock } + +void lg_lock_init(struct lglock *lg, char *name); +void lg_local_lock(struct lglock *lg); +void lg_local_unlock(struct lglock *lg); +void lg_local_lock_cpu(struct lglock *lg, int cpu); +void lg_local_unlock_cpu(struct lglock *lg, int cpu); +void lg_global_lock(struct lglock *lg); +void lg_global_unlock(struct lglock *lg); + #endif diff --git a/kernel/Makefile b/kernel/Makefile index 6c07f30fa9b7e6..296132c19a5786 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o + async.o range.o groups.o lglock.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/lglock.c b/kernel/lglock.c new file mode 100644 index 00000000000000..6535a667a5a797 --- /dev/null +++ b/kernel/lglock.c @@ -0,0 +1,89 @@ +/* See include/linux/lglock.h for description */ +#include +#include +#include +#include + +/* + * Note there is no uninit, so lglocks cannot be defined in + * modules (but it's fine to use them from there) + * Could be added though, just undo lg_lock_init + */ + +void lg_lock_init(struct lglock *lg, char *name) +{ + LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); +} +EXPORT_SYMBOL(lg_lock_init); + +void lg_local_lock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock); + +void lg_local_unlock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock); + +void lg_local_lock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock_cpu); + +void lg_local_unlock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock_cpu); + +void lg_global_lock(struct lglock *lg) +{ + int i; + + preempt_disable(); + rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_lock(lock); + } +} +EXPORT_SYMBOL(lg_global_lock); + +void lg_global_unlock(struct lglock *lg) +{ + int i; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_unlock(lock); + } + preempt_enable(); +} +EXPORT_SYMBOL(lg_global_unlock); From 962830df366b66e71849040770ae6ba55a8b4aec Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 8 May 2012 13:32:02 +0930 Subject: [PATCH 36/95] brlocks/lglocks: API cleanups lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. In preparation, this patch changes the API to look more like normal function calls with pointers, not magic macros. The patch is rather large because I move over all users in one go to keep it bisectable. This impacts the VFS somewhat in terms of lines changed. But no actual behaviour change. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen Cc: Al Viro Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell Signed-off-by: Al Viro --- fs/dcache.c | 4 +- fs/file_table.c | 16 ++--- fs/namei.c | 24 ++++---- fs/namespace.c | 139 ++++++++++++++++++++++---------------------- fs/pnode.c | 4 +- fs/proc_namespace.c | 4 +- 6 files changed, 96 insertions(+), 95 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 4435d8b329044d..40469044088def 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2575,7 +2575,7 @@ static int prepend_path(const struct path *path, bool slash = false; int error = 0; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -2606,7 +2606,7 @@ static int prepend_path(const struct path *path, error = prepend(buffer, buflen, "/", 1); out: - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return error; global_root: diff --git a/fs/file_table.c b/fs/file_table.c index f5c67c59ec10ce..a305d9e2d1b2aa 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -420,9 +420,9 @@ static inline void __file_sb_list_add(struct file *file, struct super_block *sb) */ void file_sb_list_add(struct file *file, struct super_block *sb) { - lg_local_lock(files_lglock); + lg_local_lock(&files_lglock); __file_sb_list_add(file, sb); - lg_local_unlock(files_lglock); + lg_local_unlock(&files_lglock); } /** @@ -435,9 +435,9 @@ void file_sb_list_add(struct file *file, struct super_block *sb) void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - lg_local_lock_cpu(files_lglock, file_list_cpu(file)); + lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); + lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); } } @@ -484,7 +484,7 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - lg_global_lock(files_lglock); + lg_global_lock(&files_lglock); do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) @@ -501,12 +501,12 @@ void mark_files_ro(struct super_block *sb) file_release_write(f); mnt = mntget(f->f_path.mnt); /* This can sleep, so we can't hold the spinlock. */ - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; } while_file_list_for_each_entry; - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); } void __init files_init(unsigned long mempages) @@ -524,6 +524,6 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); - lg_lock_init(files_lglock); + lg_lock_init(&files_lglock, "files_lglock"); percpu_counter_init(&nr_files, 0); } diff --git a/fs/namei.c b/fs/namei.c index c651f02c9fecb9..93ac9323b1f7c5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -449,7 +449,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); nd->flags &= ~LOOKUP_RCU; return 0; @@ -507,14 +507,14 @@ static int complete_walk(struct nameidata *nd) if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } BUG_ON(nd->inode != dentry->d_inode); spin_unlock(&dentry->d_lock); mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -681,15 +681,15 @@ int follow_up(struct path *path) struct mount *parent; struct dentry *mountpoint; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); parent = mnt->mnt_parent; if (&parent->mnt == path->mnt) { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -947,7 +947,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } @@ -1265,7 +1265,7 @@ static void terminate_walk(struct nameidata *nd) if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } } @@ -1620,7 +1620,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } else { @@ -1633,7 +1633,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (*name=='/') { if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); set_root_rcu(nd); } else { @@ -1646,7 +1646,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct fs_struct *fs = current->fs; unsigned seq; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); do { @@ -1682,7 +1682,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (fput_needed) *fp = file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); } else { path_get(&file->f_path); diff --git a/fs/namespace.c b/fs/namespace.c index e6081996c9a2f9..224aff1c0dfda0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -397,7 +397,7 @@ static int mnt_make_readonly(struct mount *mnt) { int ret = 0; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -431,15 +431,15 @@ static int mnt_make_readonly(struct mount *mnt) */ smp_wmb(); mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct mount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags &= ~MNT_READONLY; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } int sb_prepare_remount_readonly(struct super_block *sb) @@ -451,7 +451,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (atomic_long_read(&sb->s_remove_count)) return -EBUSY; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -473,7 +473,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return err; } @@ -522,14 +522,14 @@ struct vfsmount *lookup_mnt(struct path *path) { struct mount *child_mnt; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); if (child_mnt) { mnt_add_count(child_mnt, 1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return &child_mnt->mnt; } else { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return NULL; } } @@ -714,9 +714,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); @@ -745,9 +745,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); @@ -803,35 +803,36 @@ static void mntput_no_expire(struct mount *mnt) { put_again: #ifdef CONFIG_SMP - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (likely(atomic_read(&mnt->mnt_longterm))) { mnt_add_count(mnt, -1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return; } #else mnt_add_count(mnt, -1); if (likely(mnt_get_count(mnt))) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); #endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); acct_auto_close_mnt(&mnt->mnt); goto put_again; } + list_del(&mnt->mnt_instance); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); mntfree(mnt); } @@ -857,21 +858,21 @@ EXPORT_SYMBOL(mntget); void mnt_pin(struct vfsmount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); real_mount(mnt)->mnt_pinned++; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *m) { struct mount *mnt = real_mount(m); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt->mnt_pinned) { mnt_add_count(mnt, 1); mnt->mnt_pinned--; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -988,12 +989,12 @@ int may_umount_tree(struct vfsmount *m) BUG_ON(!m); /* write lock needed for mnt_get_count */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1020,10 +1021,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (propagate_mount_busy(real_mount(mnt), 2)) ret = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1040,13 +1041,13 @@ void release_mounts(struct list_head *head) struct dentry *dentry; struct mount *m; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); dput(dentry); mntput(&m->mnt); } @@ -1112,12 +1113,12 @@ static int do_umount(struct mount *mnt, int flags) * probably don't strictly need the lock here if we examined * all race cases, but it's a slowpath. */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt_get_count(mnt) != 2) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return -EBUSY; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1159,7 +1160,7 @@ static int do_umount(struct mount *mnt, int flags) } down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1171,7 +1172,7 @@ static int do_umount(struct mount *mnt, int flags) umount_tree(mnt, 1, &umount_list); retval = 0; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1286,19 +1287,19 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt.mnt_root, flag); if (!q) goto Enomem; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(res, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1318,9 +1319,9 @@ void drop_collected_mounts(struct vfsmount *mnt) { LIST_HEAD(umount_list); down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(real_mount(mnt), 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1448,7 +1449,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, if (err) goto out_cleanup_ids; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1467,7 +1468,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return 0; @@ -1565,10 +1566,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1617,9 +1618,9 @@ static int do_loopback(struct path *path, char *old_name, err = graft_tree(mnt, path); if (err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(mnt, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } out2: unlock_mount(path); @@ -1677,16 +1678,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; mnt->mnt.mnt_flags = mnt_flags; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } up_write(&sb->s_umount); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); touch_mnt_namespace(mnt->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } return err; } @@ -1893,9 +1894,9 @@ int finish_automount(struct vfsmount *m, struct path *path) /* remove m from any expiration list it may be on */ if (!list_empty(&mnt->mnt_expire)) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_del_init(&mnt->mnt_expire); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } mntput(m); @@ -1911,11 +1912,11 @@ int finish_automount(struct vfsmount *m, struct path *path) void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } EXPORT_SYMBOL(mnt_set_expiry); @@ -1935,7 +1936,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1954,7 +1955,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -2218,9 +2219,9 @@ void mnt_make_shortterm(struct vfsmount *m) struct mount *mnt = real_mount(m); if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); atomic_dec(&mnt->mnt_longterm); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); #endif } @@ -2250,9 +2251,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, return ERR_PTR(-ENOMEM); } new_ns->root = new; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new->mnt_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2416,9 +2417,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, int path_is_under(struct path *path1, struct path *path2) { int res; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -2505,7 +2506,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* make sure we can reach put_old from new_root */ if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) goto out4; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); /* mount old root on put_old */ @@ -2513,7 +2514,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new_mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; out4: @@ -2576,7 +2577,7 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); - br_lock_init(vfsmount_lock); + br_lock_init(&vfsmount_lock); err = sysfs_init(); if (err) @@ -2596,9 +2597,9 @@ void put_mnt_ns(struct mnt_namespace *ns) if (!atomic_dec_and_test(&ns->count)) return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(ns->root, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff --git a/fs/pnode.c b/fs/pnode.c index ab5fa9e1a79ac8..bed378db075813 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -257,12 +257,12 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, prev_src_mnt = child; } out: - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct mount, mnt_hash); umount_tree(child, 0, &umount_list); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); return ret; } diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 12412852d88a94..5e289a7cbad17d 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -23,12 +23,12 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) poll_wait(file, &p->ns->poll, wait); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (p->m.poll_event != ns->event) { p->m.poll_event = ns->event; res |= POLLERR | POLLPRI; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } From 16767652380074fd2a87ae7486e0fe0b609daf00 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 09:54:29 -0400 Subject: [PATCH 37/95] get rid of idiotic misplaced __kernel_mode_t in ncfps kernel-private data structure Signed-off-by: Al Viro --- fs/ncpfs/ncp_fs_sb.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h index 4af803f13516c9..54cc0cdb3dcbda 100644 --- a/fs/ncpfs/ncp_fs_sb.h +++ b/fs/ncpfs/ncp_fs_sb.h @@ -23,17 +23,17 @@ struct ncp_mount_data_kernel { unsigned long flags; /* NCP_MOUNT_* flags */ unsigned int int_flags; /* internal flags */ #define NCP_IMOUNT_LOGGEDIN_POSSIBLE 0x0001 - __kernel_uid32_t mounted_uid; /* Who may umount() this filesystem? */ + uid_t mounted_uid; /* Who may umount() this filesystem? */ struct pid *wdog_pid; /* Who cares for our watchdog packets? */ unsigned int ncp_fd; /* The socket to the ncp port */ unsigned int time_out; /* How long should I wait after sending a NCP request? */ unsigned int retry_count; /* And how often should I retry? */ unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; - __kernel_uid32_t uid; - __kernel_gid32_t gid; - __kernel_mode_t file_mode; - __kernel_mode_t dir_mode; + uid_t uid; + gid_t gid; + umode_t file_mode; + umode_t dir_mode; int info_fd; }; From 726592a9be0bdf919399d3dfa633f8e2d69cbf13 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:00:52 -0400 Subject: [PATCH 38/95] mode_t whack-a-mole: ->is_visible() returns umode_t... Signed-off-by: Al Viro --- drivers/base/soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/soc.c b/drivers/base/soc.c index ba29b2e73d4893..72b5e7280d1479 100644 --- a/drivers/base/soc.c +++ b/drivers/base/soc.c @@ -42,7 +42,7 @@ struct device *soc_device_to_device(struct soc_device *soc_dev) return &soc_dev->dev; } -static mode_t soc_attribute_mode(struct kobject *kobj, +static umode_t soc_attribute_mode(struct kobject *kobj, struct attribute *attr, int index) { From dcc62b6b38334075271eaffb1dc42cd47ceb5692 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:13:52 -0400 Subject: [PATCH 39/95] mips: get rid of nlink_t, use explictly-sized type (__u32 in all cases) Signed-off-by: Al Viro --- arch/mips/include/asm/stat.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/stat.h b/arch/mips/include/asm/stat.h index 6e00f751ab6dc6..fe9a4c3ec5a1f2 100644 --- a/arch/mips/include/asm/stat.h +++ b/arch/mips/include/asm/stat.h @@ -20,7 +20,7 @@ struct stat { long st_pad1[3]; /* Reserved for network id */ ino_t st_ino; mode_t st_mode; - nlink_t st_nlink; + __u32 st_nlink; uid_t st_uid; gid_t st_gid; unsigned st_rdev; @@ -55,7 +55,7 @@ struct stat64 { unsigned long long st_ino; mode_t st_mode; - nlink_t st_nlink; + __u32 st_nlink; uid_t st_uid; gid_t st_gid; @@ -96,7 +96,7 @@ struct stat { unsigned long st_ino; mode_t st_mode; - nlink_t st_nlink; + __u32 st_nlink; uid_t st_uid; gid_t st_gid; From e57f93cc53b772b2049222410cf6a141a724529a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:16:30 -0400 Subject: [PATCH 40/95] powerpc: get rid of nlink_t uses, switch to explicitly-sized type Signed-off-by: Al Viro --- arch/powerpc/include/asm/stat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/stat.h b/arch/powerpc/include/asm/stat.h index e4edc510b530cf..10cfb558e0fd7d 100644 --- a/arch/powerpc/include/asm/stat.h +++ b/arch/powerpc/include/asm/stat.h @@ -30,11 +30,11 @@ struct stat { unsigned long st_dev; ino_t st_ino; #ifdef __powerpc64__ - nlink_t st_nlink; + unsigned short st_nlink; mode_t st_mode; #else mode_t st_mode; - nlink_t st_nlink; + unsigned short st_nlink; #endif uid_t st_uid; gid_t st_gid; From 1dfb5751a4de7c6a57a5602e8e2b87267cfc8c81 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:17:45 -0400 Subject: [PATCH 41/95] parisc: get rid of nlink_t, switch to explicitly-sized type Signed-off-by: Al Viro --- arch/parisc/include/asm/stat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/stat.h b/arch/parisc/include/asm/stat.h index 9d5fbbc5c31f14..d76fbda5d62c04 100644 --- a/arch/parisc/include/asm/stat.h +++ b/arch/parisc/include/asm/stat.h @@ -7,7 +7,7 @@ struct stat { unsigned int st_dev; /* dev_t is 32 bits on parisc */ ino_t st_ino; /* 32 bits */ mode_t st_mode; /* 16 bits */ - nlink_t st_nlink; /* 16 bits */ + unsigned short st_nlink; /* 16 bits */ unsigned short st_reserved1; /* old st_uid */ unsigned short st_reserved2; /* old st_gid */ unsigned int st_rdev; @@ -42,7 +42,7 @@ struct hpux_stat64 { unsigned int st_dev; /* dev_t is 32 bits on parisc */ ino_t st_ino; /* 32 bits */ mode_t st_mode; /* 16 bits */ - nlink_t st_nlink; /* 16 bits */ + unsigned short st_nlink; /* 16 bits */ unsigned short st_reserved1; /* old st_uid */ unsigned short st_reserved2; /* old st_gid */ unsigned int st_rdev; From bb8ac181a5cf50458a0d83b4460790badc9fdc16 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:25:23 -0400 Subject: [PATCH 42/95] bury __kernel_nlink_t, make internal nlink_t consistent Signed-off-by: Al Viro --- arch/alpha/include/asm/posix_types.h | 3 --- arch/arm/include/asm/posix_types.h | 3 --- arch/avr32/include/asm/posix_types.h | 3 --- arch/blackfin/include/asm/posix_types.h | 3 --- arch/cris/include/asm/posix_types.h | 3 --- arch/frv/include/asm/posix_types.h | 3 --- arch/h8300/include/asm/posix_types.h | 3 --- arch/ia64/include/asm/posix_types.h | 3 --- arch/m32r/include/asm/posix_types.h | 3 --- arch/m68k/include/asm/posix_types.h | 3 --- arch/mips/include/asm/posix_types.h | 5 ----- arch/mn10300/include/asm/posix_types.h | 3 --- arch/parisc/include/asm/posix_types.h | 3 --- arch/powerpc/include/asm/posix_types.h | 3 --- arch/s390/include/asm/posix_types.h | 3 --- arch/sh/include/asm/posix_types_32.h | 2 -- arch/sh/include/asm/posix_types_64.h | 2 -- arch/sparc/include/asm/posix_types.h | 5 ----- arch/tile/include/asm/compat.h | 1 - arch/x86/include/asm/posix_types_32.h | 3 --- include/asm-generic/posix_types.h | 4 ---- include/linux/types.h | 2 +- 22 files changed, 1 insertion(+), 65 deletions(-) diff --git a/arch/alpha/include/asm/posix_types.h b/arch/alpha/include/asm/posix_types.h index 24779fc95994ef..5a8a48320efe9f 100644 --- a/arch/alpha/include/asm/posix_types.h +++ b/arch/alpha/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned int __kernel_ino_t; #define __kernel_ino_t __kernel_ino_t -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #include diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h index efdf99045d879e..d2de9cbbcd9bca 100644 --- a/arch/arm/include/asm/posix_types.h +++ b/arch/arm/include/asm/posix_types.h @@ -22,9 +22,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/avr32/include/asm/posix_types.h b/arch/avr32/include/asm/posix_types.h index 74667bfc88cc76..9ba9e749b3f34d 100644 --- a/arch/avr32/include/asm/posix_types.h +++ b/arch/avr32/include/asm/posix_types.h @@ -17,9 +17,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/blackfin/include/asm/posix_types.h b/arch/blackfin/include/asm/posix_types.h index 41bc1875c4d7fd..1bd3436db6a7b7 100644 --- a/arch/blackfin/include/asm/posix_types.h +++ b/arch/blackfin/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned int __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/cris/include/asm/posix_types.h b/arch/cris/include/asm/posix_types.h index 234891c74e2bbe..ce4e517931514f 100644 --- a/arch/cris/include/asm/posix_types.h +++ b/arch/cris/include/asm/posix_types.h @@ -15,9 +15,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/frv/include/asm/posix_types.h b/arch/frv/include/asm/posix_types.h index 3f34cb45fbb3fa..fe512af74a5afb 100644 --- a/arch/frv/include/asm/posix_types.h +++ b/arch/frv/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/h8300/include/asm/posix_types.h b/arch/h8300/include/asm/posix_types.h index bc4c34efb1ad16..91e62ba4c7b02e 100644 --- a/arch/h8300/include/asm/posix_types.h +++ b/arch/h8300/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/ia64/include/asm/posix_types.h b/arch/ia64/include/asm/posix_types.h index 7323ab9467ebae..99ee1d6510cfc9 100644 --- a/arch/ia64/include/asm/posix_types.h +++ b/arch/ia64/include/asm/posix_types.h @@ -1,9 +1,6 @@ #ifndef _ASM_IA64_POSIX_TYPES_H #define _ASM_IA64_POSIX_TYPES_H -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #include diff --git a/arch/m32r/include/asm/posix_types.h b/arch/m32r/include/asm/posix_types.h index 0195850e1f8869..236de26a409b3f 100644 --- a/arch/m32r/include/asm/posix_types.h +++ b/arch/m32r/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/m68k/include/asm/posix_types.h b/arch/m68k/include/asm/posix_types.h index 6373093be72bb0..cf4dbf70fdc73f 100644 --- a/arch/m68k/include/asm/posix_types.h +++ b/arch/m68k/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/mips/include/asm/posix_types.h b/arch/mips/include/asm/posix_types.h index e0308dcca1358f..fa03ec3fbf897a 100644 --- a/arch/mips/include/asm/posix_types.h +++ b/arch/mips/include/asm/posix_types.h @@ -17,11 +17,6 @@ * assume GCC is being used. */ -#if (_MIPS_SZLONG == 64) -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t -#endif - typedef long __kernel_daddr_t; #define __kernel_daddr_t __kernel_daddr_t diff --git a/arch/mn10300/include/asm/posix_types.h b/arch/mn10300/include/asm/posix_types.h index ab506181ec3108..d31eeea480cfdd 100644 --- a/arch/mn10300/include/asm/posix_types.h +++ b/arch/mn10300/include/asm/posix_types.h @@ -20,9 +20,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/parisc/include/asm/posix_types.h b/arch/parisc/include/asm/posix_types.h index 5212b0357daf15..b9344256f76b36 100644 --- a/arch/parisc/include/asm/posix_types.h +++ b/arch/parisc/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/powerpc/include/asm/posix_types.h b/arch/powerpc/include/asm/posix_types.h index f1393252bbdad8..2958c5b97b2dd4 100644 --- a/arch/powerpc/include/asm/posix_types.h +++ b/arch/powerpc/include/asm/posix_types.h @@ -16,9 +16,6 @@ typedef int __kernel_ssize_t; typedef long __kernel_ptrdiff_t; #define __kernel_size_t __kernel_size_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t #endif diff --git a/arch/s390/include/asm/posix_types.h b/arch/s390/include/asm/posix_types.h index edf8527ff08d9b..7be104c0f19230 100644 --- a/arch/s390/include/asm/posix_types.h +++ b/arch/s390/include/asm/posix_types.h @@ -24,7 +24,6 @@ typedef unsigned short __kernel_old_dev_t; typedef unsigned long __kernel_ino_t; typedef unsigned short __kernel_mode_t; -typedef unsigned short __kernel_nlink_t; typedef unsigned short __kernel_ipc_pid_t; typedef unsigned short __kernel_uid_t; typedef unsigned short __kernel_gid_t; @@ -35,7 +34,6 @@ typedef int __kernel_ptrdiff_t; typedef unsigned int __kernel_ino_t; typedef unsigned int __kernel_mode_t; -typedef unsigned int __kernel_nlink_t; typedef int __kernel_ipc_pid_t; typedef unsigned int __kernel_uid_t; typedef unsigned int __kernel_gid_t; @@ -47,7 +45,6 @@ typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #define __kernel_ino_t __kernel_ino_t #define __kernel_mode_t __kernel_mode_t -#define __kernel_nlink_t __kernel_nlink_t #define __kernel_ipc_pid_t __kernel_ipc_pid_t #define __kernel_uid_t __kernel_uid_t #define __kernel_gid_t __kernel_gid_t diff --git a/arch/sh/include/asm/posix_types_32.h b/arch/sh/include/asm/posix_types_32.h index abda58467ece9e..ba0bdc423b072f 100644 --- a/arch/sh/include/asm/posix_types_32.h +++ b/arch/sh/include/asm/posix_types_32.h @@ -3,8 +3,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t typedef unsigned short __kernel_uid_t; diff --git a/arch/sh/include/asm/posix_types_64.h b/arch/sh/include/asm/posix_types_64.h index fcda07b4a616be..244f7e950e176b 100644 --- a/arch/sh/include/asm/posix_types_64.h +++ b/arch/sh/include/asm/posix_types_64.h @@ -3,8 +3,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t typedef unsigned short __kernel_uid_t; diff --git a/arch/sparc/include/asm/posix_types.h b/arch/sparc/include/asm/posix_types.h index 3070f25ae90a3e..156220ed99eb7d 100644 --- a/arch/sparc/include/asm/posix_types.h +++ b/arch/sparc/include/asm/posix_types.h @@ -9,8 +9,6 @@ #if defined(__sparc__) && defined(__arch64__) /* sparc 64 bit */ -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_old_uid_t; typedef unsigned short __kernel_old_gid_t; @@ -38,9 +36,6 @@ typedef unsigned short __kernel_gid_t; typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef long __kernel_daddr_t; #define __kernel_daddr_t __kernel_daddr_t diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 69adc08d36a525..6e74450ff0a110 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -44,7 +44,6 @@ typedef __kernel_uid32_t __compat_gid32_t; typedef __kernel_mode_t compat_mode_t; typedef __kernel_dev_t compat_dev_t; typedef __kernel_loff_t compat_loff_t; -typedef __kernel_nlink_t compat_nlink_t; typedef __kernel_ipc_pid_t compat_ipc_pid_t; typedef __kernel_daddr_t compat_daddr_t; typedef __kernel_fsid_t compat_fsid_t; diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h index 99f262e04b91b6..8e525059e7d81c 100644 --- a/arch/x86/include/asm/posix_types_32.h +++ b/arch/x86/include/asm/posix_types_32.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h index 91d44bd4dde325..fe74fccf18db75 100644 --- a/include/asm-generic/posix_types.h +++ b/include/asm-generic/posix_types.h @@ -23,10 +23,6 @@ typedef __kernel_ulong_t __kernel_ino_t; typedef unsigned int __kernel_mode_t; #endif -#ifndef __kernel_nlink_t -typedef __kernel_ulong_t __kernel_nlink_t; -#endif - #ifndef __kernel_pid_t typedef int __kernel_pid_t; #endif diff --git a/include/linux/types.h b/include/linux/types.h index 7f480db60231a7..9c1bd539ea70e7 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -25,7 +25,7 @@ typedef __kernel_dev_t dev_t; typedef __kernel_ino_t ino_t; typedef __kernel_mode_t mode_t; typedef unsigned short umode_t; -typedef __kernel_nlink_t nlink_t; +typedef __u32 nlink_t; typedef __kernel_off_t off_t; typedef __kernel_pid_t pid_t; typedef __kernel_daddr_t daddr_t; From 3a0c0e26b64505522b8bce8578a6e61609c31318 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 7 May 2012 19:56:50 +0300 Subject: [PATCH 43/95] jffs2: remove lock_super We do not need 'lock_super()'/'unlock_super()' in JFFS2 - kill them. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/jffs2/super.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index f9916f312bd81e..3422a2db2ba444 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -67,15 +67,12 @@ static void jffs2_write_super(struct super_block *sb) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - lock_super(sb); sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) { jffs2_dbg(1, "%s()\n", __func__); jffs2_flush_wbuf_gc(c, 0); } - - unlock_super(sb); } static const char *jffs2_compr_name(unsigned int compr) From d0490eea14cc9221cb8343091c216fb862d19958 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 7 May 2012 19:56:51 +0300 Subject: [PATCH 44/95] jffs2: remove unnecessary GC pass on umount We do not need to call 'jffs2_write_super()' on unmount. This function causes a GC pass to make sure the current contents is pushed out with the data which we already have on the media. But this is not needed on unmount and only slows unmount down unnecessarily. It is enough to just sync the write-buffer. This call was added by one of the generic VFS rework patch-sets, see 8c85e125124a473d6f3e9bb187b0b84207f81d91. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/jffs2/super.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 3422a2db2ba444..d3dc9d8d3a10a6 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -316,9 +316,6 @@ static void jffs2_put_super (struct super_block *sb) jffs2_dbg(2, "%s()\n", __func__); - if (sb->s_dirt) - jffs2_write_super(sb); - mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); From 06688905cc36b86c700f376e9bc9bb68bc67d801 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 7 May 2012 19:56:52 +0300 Subject: [PATCH 45/95] jffs2: remove unnecessary GC pass on sync We do not need to call 'jffs2_write_super()' on sync. This function causes a GC pass to make sure the current contents is pushed out with the data which we already have on the media. But this is not needed on unmount and only slows sync down unnecessarily. It is enough to just sync the write-buffer. This call was added by one of the generic VFS rework patch-sets, see d579ed00aa96a7f7486978540a0d7cecaff742ae. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/jffs2/super.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index d3dc9d8d3a10a6..dc366c0c9304e0 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -110,8 +110,6 @@ static int jffs2_sync_fs(struct super_block *sb, int wait) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - jffs2_write_super(sb); - mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); From 8bdc81c5069e43755d6e59e5e990e21ca200e8e2 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 7 May 2012 19:56:53 +0300 Subject: [PATCH 46/95] jffs2: get rid of jffs2_sync_super Currently JFFS2 file-system maps the VFS "superblock" abstraction to the write-buffer. Namely, it uses VFS services to synchronize the write-buffer periodically. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblock using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds no matter what. So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super' VFS service, and then remove it together with the kernel thread. This patch switches the JFFS2 write-buffer management from '->write_super()'/'->s_dirt' to a delayed work. Instead of setting the 's_dirt' flag we just schedule a delayed work for synchronizing the write-buffer. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/jffs2/jffs2_fs_sb.h | 4 +++ fs/jffs2/os-linux.h | 7 ++---- fs/jffs2/super.c | 13 ---------- fs/jffs2/wbuf.c | 55 +++++++++++++++++++++++++++++++++++++++--- 4 files changed, 58 insertions(+), 21 deletions(-) diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 55a0c1dceadfdd..44dca1f041c5cb 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -126,6 +126,10 @@ struct jffs2_sb_info { struct jffs2_inodirty *wbuf_inodes; struct rw_semaphore wbuf_sem; /* Protects the write buffer */ + struct delayed_work wbuf_dwork; /* write-buffer write-out work */ + int wbuf_queued; /* non-zero delayed work is queued */ + spinlock_t wbuf_dwork_lock; /* protects wbuf_dwork and and wbuf_queued */ + unsigned char *oobbuf; int oobavail; /* How many bytes are available for JFFS2 in OOB */ #endif diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 1cd3aec9d9ae28..bcd983d7e7f99e 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -95,6 +95,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) #define jffs2_ubivol(c) (0) #define jffs2_ubivol_setup(c) (0) #define jffs2_ubivol_cleanup(c) do {} while (0) +#define jffs2_dirty_trigger(c) do {} while (0) #else /* NAND and/or ECC'd NOR support present */ @@ -135,14 +136,10 @@ void jffs2_ubivol_cleanup(struct jffs2_sb_info *c); #define jffs2_nor_wbuf_flash(c) (c->mtd->type == MTD_NORFLASH && ! (c->mtd->flags & MTD_BIT_WRITEABLE)) int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c); void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c); +void jffs2_dirty_trigger(struct jffs2_sb_info *c); #endif /* WRITEBUFFER */ -static inline void jffs2_dirty_trigger(struct jffs2_sb_info *c) -{ - OFNI_BS_2SFFJ(c)->s_dirt = 1; -} - /* background.c */ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c); void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index dc366c0c9304e0..bc586f20422863 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -63,18 +63,6 @@ static void jffs2_i_init_once(void *foo) inode_init_once(&f->vfs_inode); } -static void jffs2_write_super(struct super_block *sb) -{ - struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - - sb->s_dirt = 0; - - if (!(sb->s_flags & MS_RDONLY)) { - jffs2_dbg(1, "%s()\n", __func__); - jffs2_flush_wbuf_gc(c, 0); - } -} - static const char *jffs2_compr_name(unsigned int compr) { switch (compr) { @@ -246,7 +234,6 @@ static const struct super_operations jffs2_super_operations = .alloc_inode = jffs2_alloc_inode, .destroy_inode =jffs2_destroy_inode, .put_super = jffs2_put_super, - .write_super = jffs2_write_super, .statfs = jffs2_statfs, .remount_fs = jffs2_remount_fs, .evict_inode = jffs2_evict_inode, diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 74d9be19df3f1f..6f4529d3697fd3 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "nodelist.h" @@ -85,7 +86,7 @@ static void jffs2_wbuf_dirties_inode(struct jffs2_sb_info *c, uint32_t ino) { struct jffs2_inodirty *new; - /* Mark the superblock dirty so that kupdated will flush... */ + /* Schedule delayed write-buffer write-out */ jffs2_dirty_trigger(c); if (jffs2_wbuf_pending_for_ino(c, ino)) @@ -1148,6 +1149,47 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock * return 1; } +static struct jffs2_sb_info *work_to_sb(struct work_struct *work) +{ + struct delayed_work *dwork; + + dwork = container_of(work, struct delayed_work, work); + return container_of(dwork, struct jffs2_sb_info, wbuf_dwork); +} + +static void delayed_wbuf_sync(struct work_struct *work) +{ + struct jffs2_sb_info *c = work_to_sb(work); + struct super_block *sb = OFNI_BS_2SFFJ(c); + + spin_lock(&c->wbuf_dwork_lock); + c->wbuf_queued = 0; + spin_unlock(&c->wbuf_dwork_lock); + + if (!(sb->s_flags & MS_RDONLY)) { + jffs2_dbg(1, "%s()\n", __func__); + jffs2_flush_wbuf_gc(c, 0); + } +} + +void jffs2_dirty_trigger(struct jffs2_sb_info *c) +{ + struct super_block *sb = OFNI_BS_2SFFJ(c); + unsigned long delay; + + if (sb->s_flags & MS_RDONLY) + return; + + spin_lock(&c->wbuf_dwork_lock); + if (!c->wbuf_queued) { + jffs2_dbg(1, "%s()\n", __func__); + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay); + c->wbuf_queued = 1; + } + spin_unlock(&c->wbuf_dwork_lock); +} + int jffs2_nand_flash_setup(struct jffs2_sb_info *c) { struct nand_ecclayout *oinfo = c->mtd->ecclayout; @@ -1169,6 +1211,8 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c) /* Initialise write buffer */ init_rwsem(&c->wbuf_sem); + spin_lock_init(&c->wbuf_dwork_lock); + INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; @@ -1207,8 +1251,8 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); - - + spin_lock_init(&c->wbuf_dwork_lock); + INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->erasesize; /* Find a suitable c->sector_size @@ -1267,6 +1311,9 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); + spin_lock_init(&c->wbuf_dwork_lock); + INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); + c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; @@ -1299,6 +1346,8 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) { return 0; init_rwsem(&c->wbuf_sem); + spin_lock_init(&c->wbuf_dwork_lock); + INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; From 3ed37648e1cbf1bbebc200c6ea8fd8daf8325843 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 15 May 2012 14:57:33 +0800 Subject: [PATCH 47/95] fs: move file_remove_suid() to fs/inode.c file_remove_suid() is a generic function operates on struct file, it almost has no relations with file mapping, so move it to fs/inode.c. Cc: Alexander Viro Signed-off-by: Cong Wang Signed-off-by: Al Viro --- fs/inode.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/filemap.c | 65 ---------------------------------------------------- 2 files changed, 65 insertions(+), 65 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 183ddd6cda7115..a79555e492e075 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1530,6 +1530,71 @@ void touch_atime(struct path *path) } EXPORT_SYMBOL(touch_atime); +/* + * The logic we want is + * + * if suid or (sgid and xgrp) + * remove privs + */ +int should_remove_suid(struct dentry *dentry) +{ + umode_t mode = dentry->d_inode->i_mode; + int kill = 0; + + /* suid always must be killed */ + if (unlikely(mode & S_ISUID)) + kill = ATTR_KILL_SUID; + + /* + * sgid without any exec bits is just a mandatory locking mark; leave + * it alone. If some exec bits are set, it's a real sgid; kill it. + */ + if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) + kill |= ATTR_KILL_SGID; + + if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) + return kill; + + return 0; +} +EXPORT_SYMBOL(should_remove_suid); + +static int __remove_suid(struct dentry *dentry, int kill) +{ + struct iattr newattrs; + + newattrs.ia_valid = ATTR_FORCE | kill; + return notify_change(dentry, &newattrs); +} + +int file_remove_suid(struct file *file) +{ + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + int killsuid; + int killpriv; + int error = 0; + + /* Fast path for nothing security related */ + if (IS_NOSEC(inode)) + return 0; + + killsuid = should_remove_suid(dentry); + killpriv = security_inode_need_killpriv(dentry); + + if (killpriv < 0) + return killpriv; + if (killpriv) + error = security_inode_killpriv(dentry); + if (!error && killsuid) + error = __remove_suid(dentry, killsuid); + if (!error && (inode->i_sb->s_flags & MS_NOSEC)) + inode->i_flags |= S_NOSEC; + + return error; +} +EXPORT_SYMBOL(file_remove_suid); + /** * file_update_time - update mtime and ctime time * @file: file accessed diff --git a/mm/filemap.c b/mm/filemap.c index 79c4b2b0b14eec..21e5abfbcdf661 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1938,71 +1938,6 @@ struct page *read_cache_page(struct address_space *mapping, } EXPORT_SYMBOL(read_cache_page); -/* - * The logic we want is - * - * if suid or (sgid and xgrp) - * remove privs - */ -int should_remove_suid(struct dentry *dentry) -{ - umode_t mode = dentry->d_inode->i_mode; - int kill = 0; - - /* suid always must be killed */ - if (unlikely(mode & S_ISUID)) - kill = ATTR_KILL_SUID; - - /* - * sgid without any exec bits is just a mandatory locking mark; leave - * it alone. If some exec bits are set, it's a real sgid; kill it. - */ - if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) - kill |= ATTR_KILL_SGID; - - if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) - return kill; - - return 0; -} -EXPORT_SYMBOL(should_remove_suid); - -static int __remove_suid(struct dentry *dentry, int kill) -{ - struct iattr newattrs; - - newattrs.ia_valid = ATTR_FORCE | kill; - return notify_change(dentry, &newattrs); -} - -int file_remove_suid(struct file *file) -{ - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; - int killsuid; - int killpriv; - int error = 0; - - /* Fast path for nothing security related */ - if (IS_NOSEC(inode)) - return 0; - - killsuid = should_remove_suid(dentry); - killpriv = security_inode_need_killpriv(dentry); - - if (killpriv < 0) - return killpriv; - if (killpriv) - error = security_inode_killpriv(dentry); - if (!error && killsuid) - error = __remove_suid(dentry, killsuid); - if (!error && (inode->i_sb->s_flags & MS_NOSEC)) - inode->i_flags |= S_NOSEC; - - return error; -} -EXPORT_SYMBOL(file_remove_suid); - static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { From a0a9b0433767713342f9cc70f563483c38e26f30 Mon Sep 17 00:00:00 2001 From: Shai Fultheim Date: Tue, 15 May 2012 12:29:52 +0300 Subject: [PATCH 48/95] fs: Move bh_cachep to the __read_mostly section bh_cachep is only written to once on initialization, so move it to the __read_mostly section. Signed-off-by: Shai Fultheim Signed-off-by: Vlad Zolotarov Signed-off-by: Al Viro --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/buffer.c b/fs/buffer.c index ad5938ca357c27..838a9cf246bd0f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3152,7 +3152,7 @@ SYSCALL_DEFINE2(bdflush, int, func, long, data) /* * Buffer-head allocation */ -static struct kmem_cache *bh_cachep; +static struct kmem_cache *bh_cachep __read_mostly; /* * Once the number of bh's in the machine exceeds this level, we start From 799243a389bde0de10fa21ca1ca453d2fe538b85 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Tue, 29 May 2012 11:02:21 -0700 Subject: [PATCH 49/95] vfs: increment iversion when a file is truncated When a file is truncated with truncate()/ftruncate() and then closed, iversion is not updated. This patch uses ATTR_SIZE flag as an indication to increment iversion. Mimi said: On fput(), i_version is used to detect and flag files that have changed and need to be re-measured in the IMA measurement policy. When a file is truncated with truncate()/ftruncate() and then closed, i_version is not updated. As a result, although the file has changed, it will not be re-measured and added to the IMA measurement list on subsequent access. Signed-off-by: Dmitry Kasatkin Acked-by: Mimi Zohar Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/attr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/attr.c b/fs/attr.c index 584620e5dee52b..0da90951d2776f 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -176,6 +176,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr) return -EPERM; } + if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) { + if (attr->ia_size != inode->i_size) + inode_inc_iversion(inode); + } + if ((ia_valid & ATTR_MODE)) { umode_t amode = attr->ia_mode; /* Flag setting protected by i_mutex */ From a4f9a9a635e4d54ac93df4b861ed8792e17bd4a2 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 29 May 2012 11:02:24 -0700 Subject: [PATCH 50/95] fsnotify: handle subfiles' perm events Recently I'm working on fanotify and found the following strange behaviors. I wrote a program to set fanotify_mark on "/tmp/block" and FAN_DENY all events notified. fanotify_mask = FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD: $ cd /tmp/block; cat foo cat: foo: Operation not permitted Operation on the file is blocked as expected. But, fanotify_mask = FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD: $ cd /tmp/block; cat foo aaa It's not blocked anymore. This is confusing behavior. Also reading commit "fsnotify: call fsnotify_parent in perm events", it seems like fsnotify should handle subfiles' perm events as well as the other notify events. With this patch, regardless of FAN_ALL_EVENTS set or not: $ cd /tmp/block; cat foo cat: foo: Operation not permitted Operation on the file is now blocked properly. FS_OPEN_PERM and FS_ACCESS_PERM are not listed on FS_EVENTS_POSS_ON_CHILD. Due to fsnotify_inode_watches_children() check, if you only specify only these events as fsnotify_mask, you don't get subfiles' perm events notified. This patch add the events to FS_EVENTS_POSS_ON_CHILD to get them notified even if only these events are specified to fsnotify_mask. Signed-off-by: Naohiro Aota Cc: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 91d0e0a34ef318..63d966d5c2ea7a 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -60,7 +60,7 @@ #define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ - FS_DELETE) + FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM) #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) From fd657170c039a918c0b46f51db8005317d4c83fa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 29 May 2012 11:02:24 -0700 Subject: [PATCH 51/95] fsnotify: remove unused parameter from send_to_group() We don't use "mnt" anymore in send_to_group() after 1968f5eed5 ("fanotify: use both marks when possible") was applied. Signed-off-by: Dan Carpenter Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/notify/fsnotify.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index ccb14d3fc0de99..b39c5c161adb64 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -123,7 +123,7 @@ int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) } EXPORT_SYMBOL_GPL(__fsnotify_parent); -static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, +static int send_to_group(struct inode *to_tell, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, __u32 mask, void *data, @@ -168,10 +168,10 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, vfsmount_test_mask &= ~inode_mark->ignored_mask; } - pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x inode_mark=%p" + pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" " data=%p data_is=%d cookie=%d event=%p\n", - __func__, group, to_tell, mnt, mask, inode_mark, + __func__, group, to_tell, mask, inode_mark, inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, data_is, cookie, *event); @@ -258,16 +258,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, if (inode_group > vfsmount_group) { /* handle inode */ - ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, + ret = send_to_group(to_tell, inode_mark, NULL, mask, data, data_is, cookie, file_name, &event); /* we didn't use the vfsmount_mark */ vfsmount_group = NULL; } else if (vfsmount_group > inode_group) { - ret = send_to_group(to_tell, &mnt->mnt, NULL, vfsmount_mark, mask, data, + ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data, data_is, cookie, file_name, &event); inode_group = NULL; } else { - ret = send_to_group(to_tell, &mnt->mnt, inode_mark, vfsmount_mark, + ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, data, data_is, cookie, file_name, &event); } From 244ca2b4d0b5e500681e52ad9e6d7f3f2b9362a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 29 May 2012 21:24:36 -0400 Subject: [PATCH 52/95] i810: switch to vm_mmap() Weirdness around do_mmap() in there does not rely on ->mmap_sem for exclusion, so no need to keep it under that. As the result, we can turn that do_mmap() into vm_mmap(). Signed-off-by: Al Viro --- drivers/gpu/drm/i810/i810_dma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c index f920fb5e42b638..fa9439159ebd6b 100644 --- a/drivers/gpu/drm/i810/i810_dma.c +++ b/drivers/gpu/drm/i810/i810_dma.c @@ -130,11 +130,10 @@ static int i810_map_buffer(struct drm_buf *buf, struct drm_file *file_priv) return -EINVAL; /* This is all entirely broken */ - down_write(¤t->mm->mmap_sem); old_fops = file_priv->filp->f_op; file_priv->filp->f_op = &i810_buffer_fops; dev_priv->mmap_buffer = buf; - buf_priv->virtual = (void *)do_mmap(file_priv->filp, 0, buf->total, + buf_priv->virtual = (void *)vm_mmap(file_priv->filp, 0, buf->total, PROT_READ | PROT_WRITE, MAP_SHARED, buf->bus_address); dev_priv->mmap_buffer = NULL; @@ -145,7 +144,6 @@ static int i810_map_buffer(struct drm_buf *buf, struct drm_file *file_priv) retcode = PTR_ERR(buf_priv->virtual); buf_priv->virtual = NULL; } - up_write(¤t->mm->mmap_sem); return retcode; } From 7732a557b1342c6e6966efb5f07effcf99f56167 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 9 May 2012 17:18:05 -0400 Subject: [PATCH 53/95] vfs: stop d_splice_alias creating directory aliases A directory should never have more than one dentry pointing to it. But d_splice_alias() will add one if it finds a directory with an already-existing non-DISCONNECTED dentry. I can't find an obvious reproducer, but I also can't see what prevents d_splice_alias() from encountering such a case. It therefore seems safest to allow d_splice_alias to use any dentry it finds. (Prior to the removal of dentry_unhash() from vfs_rmdir(), around v3.0, this could cause an nfsd deadlock like this: - Somebody attempts to remove a non-empty directory. - The dentry_unhash() in vfs_rmdir() unhashes the dentry pointing to the non-empty directory. - ->rmdir() then fails with -ENOTEMPTY - Before the vfs_rmdir() caller reaches dput(), an nfsd process in rename looks up the directory by filehandle; at the end of that lookup, this dentry is found by d_alloc_anon(), and a reference is taken on it, preventing dput() from removing it. - A regular lookup of the directory calls d_splice_alias(), finds only an unhashed (not a DISCONNECTED) dentry, and insteads adds a new one, so the directory now has two dentries. - The nfsd process in rename, which was previously looking up the source directory of the rename, now looks up the target directory (which is the same), and gets the dentry newly created by the previous lookup. - The rename, seeing two different dentries, assumes this is a cross-directory rename and attempts to take the i_mutex on the directory twice. That reproducer no longer exists, but I don't think there was anything fundamentally incorrect about the vfs_rmdir() behavior there, so I think the real fault was here in d_splice_alias().) Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- fs/dcache.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 40469044088def..3d4be6f8e49eaf 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1650,9 +1650,8 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) if (inode && S_ISDIR(inode->i_mode)) { spin_lock(&inode->i_lock); - new = __d_find_alias(inode, 1); + new = __d_find_any_alias(inode); if (new) { - BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); spin_unlock(&inode->i_lock); security_d_instantiate(new, inode); d_move(new, dentry); From 3f50fff4dace23d3cfeb195d5cd4ee813cee68b7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 9 May 2012 17:18:06 -0400 Subject: [PATCH 54/95] vfs: remove unused __d_splice_alias argument Nobody sets want_disconn any more. Reported-by: Peng Tao Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro --- fs/dcache.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 3d4be6f8e49eaf..85c9e2bff8e651 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -683,8 +683,6 @@ EXPORT_SYMBOL(dget_parent); /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question - * @want_discon: flag, used by d_splice_alias, to request - * that only a DISCONNECTED alias be returned. * * If inode has a hashed alias, or is a directory and has any alias, * acquire the reference to alias and return it. Otherwise return NULL. @@ -693,10 +691,9 @@ EXPORT_SYMBOL(dget_parent); * of a filesystem. * * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer - * any other hashed alias over that one unless @want_discon is set, - * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. + * any other hashed alias over that. */ -static struct dentry *__d_find_alias(struct inode *inode, int want_discon) +static struct dentry *__d_find_alias(struct inode *inode) { struct dentry *alias, *discon_alias; @@ -708,7 +705,7 @@ static struct dentry *__d_find_alias(struct inode *inode, int want_discon) if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - } else if (!want_discon) { + } else { __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; @@ -739,7 +736,7 @@ struct dentry *d_find_alias(struct inode *inode) if (!list_empty(&inode->i_dentry)) { spin_lock(&inode->i_lock); - de = __d_find_alias(inode, 0); + de = __d_find_alias(inode); spin_unlock(&inode->i_lock); } return de; @@ -2481,7 +2478,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) struct dentry *alias; /* Does an aliased dentry already exist? */ - alias = __d_find_alias(inode, 0); + alias = __d_find_alias(inode); if (alias) { actual = alias; write_seqlock(&rename_lock); From 46ce341b2f176c2611f12ac390adf862e932eb02 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 May 2012 11:39:13 +0100 Subject: [PATCH 55/95] pipe: return -ENOIOCTLCMD instead of -EINVAL on unknown ioctl command As described in commit 07d106d0a ("vfs: fix up ENOIOCTLCMD error handling"), drivers should return -ENOIOCTLCMD if they receive an ioctl command which they don't understand. Doing so will result in -ENOTTY being returned to userspace, which matches the behaviour of the compat layer if it fails to translate an ioctl command. This patch fixes the pipe ioctl to return -ENOIOCTLCMD instead of -EINVAL when passed an unknown ioctl command. Cc: Al Viro Cc: Andrew Morton Signed-off-by: Will Deacon Signed-off-by: Al Viro --- fs/pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pipe.c b/fs/pipe.c index fec5e4ad071a36..95ebb56de494de 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -693,7 +693,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return put_user(count, (int __user *)arg); default: - return -EINVAL; + return -ENOIOCTLCMD; } } From 63d37a84ab6004c235314ffd7a76c5eb28c2fae0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 29 May 2012 22:03:48 -0400 Subject: [PATCH 56/95] vfs: umount_tree() might be called on subtree that had never made it __mnt_make_shortterm() in there undoes the effect of __mnt_make_longterm() we'd done back when we set ->mnt_ns non-NULL; it should not be done to vfsmounts that had never gone through commit_tree() and friends. Kudos to lczerner for catching that one... Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 224aff1c0dfda0..1e4a5fe3d7b7f7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1074,8 +1074,9 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); + if (p->mnt_ns) + __mnt_make_shortterm(p); p->mnt_ns = NULL; - __mnt_make_shortterm(p); list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { p->mnt_parent->mnt_ghosts++; From 5a5e4c2eca0307deeb438c97dbdc608663515c0a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 01:49:38 -0400 Subject: [PATCH 57/95] binfmt_elf: switch elf_map() to vm_mmap/vm_munmap No reason to hold ->mmap_sem over the sequence Signed-off-by: Al Viro --- fs/binfmt_elf.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e658dd134b95fb..1b52956afe33ab 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -329,7 +329,6 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, if (!size) return addr; - down_write(¤t->mm->mmap_sem); /* * total_size is the size of the ELF (interpreter) image. * The _first_ mmap needs to know the full size, otherwise @@ -340,13 +339,12 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, */ if (total_size) { total_size = ELF_PAGEALIGN(total_size); - map_addr = do_mmap(filep, addr, total_size, prot, type, off); + map_addr = vm_mmap(filep, addr, total_size, prot, type, off); if (!BAD_ADDR(map_addr)) - do_munmap(current->mm, map_addr+size, total_size-size); + vm_munmap(map_addr+size, total_size-size); } else - map_addr = do_mmap(filep, addr, size, prot, type, off); + map_addr = vm_mmap(filep, addr, size, prot, type, off); - up_write(¤t->mm->mmap_sem); return(map_addr); } From 7696e0c37f43187431388df7d8087a099b3e2f1c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 01:56:23 -0400 Subject: [PATCH 58/95] binfmt_flat: use vm_munmap, we are missing ->mmap_sem there Signed-off-by: Al Viro --- fs/binfmt_flat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 6b2daf99fab8bc..178cb70acc26de 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -562,7 +562,7 @@ static int load_flat_file(struct linux_binprm * bprm, realdatastart = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process data, errno %d\n", (int)-realdatastart); - do_munmap(current->mm, textpos, text_len); + vm_munmap(textpos, text_len); ret = realdatastart; goto err; } @@ -586,8 +586,8 @@ static int load_flat_file(struct linux_binprm * bprm, } if (IS_ERR_VALUE(result)) { printk("Unable to read data+bss, errno %d\n", (int)-result); - do_munmap(current->mm, textpos, text_len); - do_munmap(current->mm, realdatastart, len); + vm_munmap(textpos, text_len); + vm_munmap(realdatastart, len); ret = result; goto err; } @@ -654,7 +654,7 @@ static int load_flat_file(struct linux_binprm * bprm, } if (IS_ERR_VALUE(result)) { printk("Unable to read code+data+bss, errno %d\n",(int)-result); - do_munmap(current->mm, textpos, text_len + data_len + extra + + vm_munmap(textpos, text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long)); ret = result; goto err; From 657bec850ff98b82b3a617b588a6523487344ccc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 02:12:40 -0400 Subject: [PATCH 59/95] ia64, sparc64: convert wrappers around do_mremap() to sys_mremap() they contain open-coded sys_mremap() Signed-off-by: Al Viro --- arch/ia64/kernel/sys_ia64.c | 19 +++---------------- arch/sparc/kernel/sys_sparc_64.c | 11 ++--------- 2 files changed, 5 insertions(+), 25 deletions(-) diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 609d50056a6c7b..d9439ef2f66187 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -171,22 +171,9 @@ asmlinkage unsigned long ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) { - extern unsigned long do_mremap (unsigned long addr, - unsigned long old_len, - unsigned long new_len, - unsigned long flags, - unsigned long new_addr); - - down_write(¤t->mm->mmap_sem); - { - addr = do_mremap(addr, old_len, new_len, flags, new_addr); - } - up_write(¤t->mm->mmap_sem); - - if (IS_ERR((void *) addr)) - return addr; - - force_successful_syscall_return(); + addr = sys_mremap(addr, old_len, new_len, flags, new_addr); + if (!IS_ERR((void *) addr)) + force_successful_syscall_return(); return addr; } diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 3ee51f189a5529..275f74fd6f6a3f 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -580,16 +580,9 @@ SYSCALL_DEFINE5(64_mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr) { - unsigned long ret = -EINVAL; - if (test_thread_flag(TIF_32BIT)) - goto out; - - down_write(¤t->mm->mmap_sem); - ret = do_mremap(addr, old_len, new_len, flags, new_addr); - up_write(¤t->mm->mmap_sem); -out: - return ret; + return -EINVAL; + return sys_mremap(addr, old_len, new_len, flags, new_addr); } /* we come to here via sys_nis_syscall so it can setup the regs argument */ From 63a81db13205f21047a0799adfd3a2419f190577 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 11:32:04 -0400 Subject: [PATCH 60/95] merge do_mremap() into sys_mremap() Signed-off-by: Al Viro --- mm/mremap.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index db8d983b5a7d7a..169c53b87749ea 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -432,15 +432,17 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise * This option implies MREMAP_MAYMOVE. */ -unsigned long do_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr) +SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, + unsigned long, new_len, unsigned long, flags, + unsigned long, new_addr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long ret = -EINVAL; unsigned long charged = 0; + down_write(¤t->mm->mmap_sem); + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) goto out; @@ -538,17 +540,6 @@ unsigned long do_mremap(unsigned long addr, out: if (ret & ~PAGE_MASK) vm_unacct_memory(charged); - return ret; -} - -SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, - unsigned long, new_len, unsigned long, flags, - unsigned long, new_addr) -{ - unsigned long ret; - - down_write(¤t->mm->mmap_sem); - ret = do_mremap(addr, old_len, new_len, flags, new_addr); up_write(¤t->mm->mmap_sem); return ret; } From 4ad310b836d5c61ac6e9b5fd7db12d0cd57136d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 11:55:49 -0400 Subject: [PATCH 61/95] ia64 perfmon: fix get_unmapped_area() use there get_unmapped_area() returns -E... on failure, not 0. Moreover, the wrapper around it is completely pointless. Signed-off-by: Al Viro --- arch/ia64/kernel/perfmon.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index f00ba025375d56..d7f558c1e7117b 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -604,12 +604,6 @@ pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) spin_unlock(&(x)->ctx_lock); } -static inline unsigned long -pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) -{ - return get_unmapped_area(file, addr, len, pgoff, flags); -} - /* forward declaration */ static const struct dentry_operations pfmfs_dentry_operations; @@ -2333,8 +2327,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t down_write(&task->mm->mmap_sem); /* find some free area in address space, must have mmap sem held */ - vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); - if (vma->vm_start == 0UL) { + vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); + if (IS_ERR_VALUE(vma->vm_start)) { DPRINT(("Cannot find unmapped area for size %ld\n", size)); up_write(&task->mm->mmap_sem); goto error; From cf74d14c4fbce9bcc9eb62f52d721d3399a2b87f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 12:09:53 -0400 Subject: [PATCH 62/95] unexport do_mmap() Signed-off-by: Al Viro --- mm/mmap.c | 1 - mm/nommu.c | 1 - 2 files changed, 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index e8dcfc7de866e2..83c56624f1f66a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1118,7 +1118,6 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return -EINVAL; return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); } -EXPORT_SYMBOL(do_mmap); unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, diff --git a/mm/nommu.c b/mm/nommu.c index bb8f4f004a82ce..de6084e3a04665 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1481,7 +1481,6 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return -EINVAL; return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); } -EXPORT_SYMBOL(do_mmap); unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, From d007794a182bc072a7b7479909dbd0d67ba341be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 13:11:37 -0400 Subject: [PATCH 63/95] split cap_mmap_addr() out of cap_file_mmap() ... switch callers. Signed-off-by: Al Viro --- include/linux/security.h | 3 ++- security/apparmor/lsm.c | 2 +- security/commoncap.c | 32 +++++++++++++++++++++++--------- security/selinux/hooks.c | 2 +- security/smack/smack_lsm.c | 2 +- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index ab0e091ce5facf..4ad59c9fa731f6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -86,6 +86,7 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); +extern int cap_mmap_addr(unsigned long addr); extern int cap_file_mmap(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags, unsigned long addr, unsigned long addr_only); @@ -2187,7 +2188,7 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { - return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + return cap_mmap_addr(addr); } static inline int security_file_mprotect(struct vm_area_struct *vma, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 032daab449b0bb..8430d8937afb79 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -497,7 +497,7 @@ static int apparmor_file_mmap(struct file *file, unsigned long reqprot, int rc = 0; /* do DAC check */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; diff --git a/security/commoncap.c b/security/commoncap.c index e771cb1b2d7947..ebac3618896ee1 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -958,22 +958,15 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) } /* - * cap_file_mmap - check if able to map given addr - * @file: unused - * @reqprot: unused - * @prot: unused - * @flags: unused + * cap_mmap_addr - check if able to map given addr * @addr: address attempting to be mapped - * @addr_only: unused * * If the process is attempting to map memory below dac_mmap_min_addr they need * CAP_SYS_RAWIO. The other parameters to this function are unused by the * capability security module. Returns 0 if this mapping should be allowed * -EPERM if not. */ -int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int cap_mmap_addr(unsigned long addr) { int ret = 0; @@ -986,3 +979,24 @@ int cap_file_mmap(struct file *file, unsigned long reqprot, } return ret; } + +/* + * cap_file_mmap - check if able to map given addr + * @file: unused + * @reqprot: unused + * @prot: unused + * @flags: unused + * @addr: address attempting to be mapped + * @addr_only: unused + * + * If the process is attempting to map memory below dac_mmap_min_addr they need + * CAP_SYS_RAWIO. The other parameters to this function are unused by the + * capability security module. Returns 0 if this mapping should be allowed + * -EPERM if not. + */ +int cap_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only) +{ + return cap_mmap_addr(addr); +} diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index fa2341b683314b..25c125eaa3d850 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3104,7 +3104,7 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, } /* do DAC check on address space usage */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index d583c054580889..a6219771876885 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1199,7 +1199,7 @@ static int smack_file_mmap(struct file *file, int rc; /* do DAC check on address space usage */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; From e5467859f7f79b69fc49004403009dfdba3bec53 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 13:30:51 -0400 Subject: [PATCH 64/95] split ->file_mmap() into ->mmap_addr()/->mmap_file() ... i.e. file-dependent and address-dependent checks. Signed-off-by: Al Viro --- fs/exec.c | 4 ---- include/linux/security.h | 36 ++++++++++++++++++++---------------- mm/mmap.c | 12 ++++++++---- mm/mremap.c | 4 ++-- mm/nommu.c | 5 ++++- security/apparmor/lsm.c | 15 ++++----------- security/capability.c | 3 ++- security/commoncap.c | 21 +++------------------ security/security.c | 12 ++++++++---- security/selinux/hooks.c | 15 ++++++++------- security/smack/smack_lsm.c | 15 +++++---------- 11 files changed, 64 insertions(+), 78 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 52c9e2ff6e6bd8..a79786a8d2c88d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -280,10 +280,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); INIT_LIST_HEAD(&vma->anon_vma_chain); - err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); - if (err) - goto err; - err = insert_vm_struct(mm, vma); if (err) goto err; diff --git a/include/linux/security.h b/include/linux/security.h index 4ad59c9fa731f6..f1bae0963ddc0b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -87,9 +87,8 @@ extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); extern int cap_mmap_addr(unsigned long addr); -extern int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only); +extern int cap_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); @@ -587,15 +586,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * simple integer value. When @arg represents a user space pointer, it * should never be used by the security module. * Return 0 if permission is granted. - * @file_mmap : + * @mmap_addr : + * Check permissions for a mmap operation at @addr. + * @addr contains virtual address that will be used for the operation. + * Return 0 if permission is granted. + * @mmap_file : * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). * @reqprot contains the protection requested by the application. * @prot contains the protection that will be applied by the kernel. * @flags contains the operational flags. - * @addr contains virtual address that will be used for the operation. - * @addr_only contains a boolean: 0 if file-backed VMA, otherwise 1. * Return 0 if permission is granted. * @file_mprotect: * Check permissions before changing memory access permissions. @@ -1482,10 +1483,10 @@ struct security_operations { void (*file_free_security) (struct file *file); int (*file_ioctl) (struct file *file, unsigned int cmd, unsigned long arg); - int (*file_mmap) (struct file *file, + int (*mmap_addr) (unsigned long addr); + int (*mmap_file) (struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, unsigned long addr, - unsigned long addr_only); + unsigned long flags); int (*file_mprotect) (struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -1744,9 +1745,9 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int security_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only); +int security_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); +int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); int security_file_lock(struct file *file, unsigned int cmd); @@ -2182,11 +2183,14 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } -static inline int security_file_mmap(struct file *file, unsigned long reqprot, +static inline int security_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, - unsigned long addr, - unsigned long addr_only) + unsigned long flags) +{ + return 0; +} + +static inline int security_mmap_addr(unsigned long addr) { return cap_mmap_addr(addr); } diff --git a/mm/mmap.c b/mm/mmap.c index 83c56624f1f66a..49283da9a2ae48 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1101,7 +1101,11 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } } - error = security_file_mmap(file, reqprot, prot, flags, addr, 0); + error = security_mmap_addr(addr); + if (error) + return error; + + error = security_mmap_file(file, reqprot, prot, flags); if (error) return error; @@ -1817,7 +1821,7 @@ int expand_downwards(struct vm_area_struct *vma, return -ENOMEM; address &= PAGE_MASK; - error = security_file_mmap(NULL, 0, 0, 0, address, 1); + error = security_mmap_addr(address); if (error) return error; @@ -2205,7 +2209,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) if (!len) return addr; - error = security_file_mmap(NULL, 0, 0, 0, addr, 1); + error = security_mmap_addr(addr); if (error) return error; @@ -2561,7 +2565,7 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; - ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); + ret = security_mmap_addr(vma->vm_start); if (ret) goto out; diff --git a/mm/mremap.c b/mm/mremap.c index 169c53b87749ea..ebf10892b63df3 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -371,7 +371,7 @@ static unsigned long mremap_to(unsigned long addr, if ((addr <= new_addr) && (addr+old_len) > new_addr) goto out; - ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + ret = security_mmap_addr(new_addr); if (ret) goto out; @@ -532,7 +532,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, goto out; } - ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + ret = security_mmap_addr(new_addr); if (ret) goto out; ret = move_vma(vma, addr, old_len, new_len, new_addr); diff --git a/mm/nommu.c b/mm/nommu.c index de6084e3a04665..acfe419785dbca 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1047,7 +1047,10 @@ static int validate_mmap_request(struct file *file, } /* allow the security API to have its say */ - ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); + ret = security_mmap_addr(addr); + if (ret < 0) + return ret; + ret = security_mmap_file(file, reqprot, prot, flags); if (ret < 0) return ret; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 8430d8937afb79..8ea39aabe94889 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -490,17 +490,9 @@ static int common_mmap(int op, struct file *file, unsigned long prot, return common_file_perm(op, file, mask); } -static int apparmor_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +static int apparmor_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { - int rc = 0; - - /* do DAC check */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; - return common_mmap(OP_FMMAP, file, prot, flags); } @@ -646,7 +638,8 @@ static struct security_operations apparmor_ops = { .file_permission = apparmor_file_permission, .file_alloc_security = apparmor_file_alloc_security, .file_free_security = apparmor_file_free_security, - .file_mmap = apparmor_file_mmap, + .mmap_file = apparmor_mmap_file, + .mmap_addr = cap_mmap_addr, .file_mprotect = apparmor_file_mprotect, .file_lock = apparmor_file_lock, diff --git a/security/capability.c b/security/capability.c index fca889676c5e9e..61095df8b89ac4 100644 --- a/security/capability.c +++ b/security/capability.c @@ -949,7 +949,8 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, file_alloc_security); set_to_cap_if_null(ops, file_free_security); set_to_cap_if_null(ops, file_ioctl); - set_to_cap_if_null(ops, file_mmap); + set_to_cap_if_null(ops, mmap_addr); + set_to_cap_if_null(ops, mmap_file); set_to_cap_if_null(ops, file_mprotect); set_to_cap_if_null(ops, file_lock); set_to_cap_if_null(ops, file_fcntl); diff --git a/security/commoncap.c b/security/commoncap.c index ebac3618896ee1..6dbae4650abe20 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -980,23 +980,8 @@ int cap_mmap_addr(unsigned long addr) return ret; } -/* - * cap_file_mmap - check if able to map given addr - * @file: unused - * @reqprot: unused - * @prot: unused - * @flags: unused - * @addr: address attempting to be mapped - * @addr_only: unused - * - * If the process is attempting to map memory below dac_mmap_min_addr they need - * CAP_SYS_RAWIO. The other parameters to this function are unused by the - * capability security module. Returns 0 if this mapping should be allowed - * -EPERM if not. - */ -int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int cap_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { - return cap_mmap_addr(addr); + return 0; } diff --git a/security/security.c b/security/security.c index 5497a57fba0154..d91c66d3956bc6 100644 --- a/security/security.c +++ b/security/security.c @@ -657,18 +657,22 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_ioctl(file, cmd, arg); } -int security_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int security_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { int ret; - ret = security_ops->file_mmap(file, reqprot, prot, flags, addr, addr_only); + ret = security_ops->mmap_file(file, reqprot, prot, flags); if (ret) return ret; return ima_file_mmap(file, prot); } +int security_mmap_addr(unsigned long addr) +{ + return security_ops->mmap_addr(addr); +} + int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 25c125eaa3d850..372ec6502aa875 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3083,9 +3083,7 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared return rc; } -static int selinux_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +static int selinux_mmap_addr(unsigned long addr) { int rc = 0; u32 sid = current_sid(); @@ -3104,10 +3102,12 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, } /* do DAC check on address space usage */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; + return cap_mmap_addr(addr); +} +static int selinux_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) +{ if (selinux_checkreqprot) prot = reqprot; @@ -5570,7 +5570,8 @@ static struct security_operations selinux_ops = { .file_alloc_security = selinux_file_alloc_security, .file_free_security = selinux_file_free_security, .file_ioctl = selinux_file_ioctl, - .file_mmap = selinux_file_mmap, + .mmap_file = selinux_mmap_file, + .mmap_addr = selinux_mmap_addr, .file_mprotect = selinux_file_mprotect, .file_lock = selinux_file_lock, .file_fcntl = selinux_file_fcntl, diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index a6219771876885..ee0bb5735f35c9 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1171,7 +1171,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, } /** - * smack_file_mmap : + * smack_mmap_file : * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). @@ -1180,10 +1180,9 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, * @flags contains the operational flags. * Return 0 if permission is granted. */ -static int smack_file_mmap(struct file *file, +static int smack_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, unsigned long addr, - unsigned long addr_only) + unsigned long flags) { struct smack_known *skp; struct smack_rule *srp; @@ -1198,11 +1197,6 @@ static int smack_file_mmap(struct file *file, int tmay; int rc; - /* do DAC check on address space usage */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; - if (file == NULL || file->f_dentry == NULL) return 0; @@ -3482,7 +3476,8 @@ struct security_operations smack_ops = { .file_ioctl = smack_file_ioctl, .file_lock = smack_file_lock, .file_fcntl = smack_file_fcntl, - .file_mmap = smack_file_mmap, + .mmap_file = smack_mmap_file, + .mmap_addr = cap_mmap_addr, .file_set_fowner = smack_file_set_fowner, .file_send_sigiotask = smack_file_send_sigiotask, .file_receive = smack_file_receive, From 8b3ec6814c83d76b85bd13badc48552836c24839 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 17:11:23 -0400 Subject: [PATCH 65/95] take security_mmap_file() outside of ->mmap_sem Signed-off-by: Al Viro --- include/linux/security.h | 7 +++---- ipc/shm.c | 5 +++++ mm/mmap.c | 23 ++++++++++++----------- mm/nommu.c | 22 ++++++++++++---------- security/security.c | 33 ++++++++++++++++++++++++++++++--- 5 files changed, 62 insertions(+), 28 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index f1bae0963ddc0b..4e5a73cdbbef18 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1745,8 +1745,8 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); +int security_mmap_file(struct file *file, unsigned long prot, + unsigned long flags); int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -2183,8 +2183,7 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } -static inline int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, +static inline int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { return 0; diff --git a/ipc/shm.c b/ipc/shm.c index 406c5b20819337..e3a8063b1768eb 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1036,6 +1036,10 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) sfd->file = shp->shm_file; sfd->vm_ops = NULL; + err = security_mmap_file(file, prot, flags); + if (err) + goto out_fput; + down_write(¤t->mm->mmap_sem); if (addr && !(shmflg & SHM_REMAP)) { err = -EINVAL; @@ -1058,6 +1062,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) invalid: up_write(¤t->mm->mmap_sem); +out_fput: fput(file); out_nattch: diff --git a/mm/mmap.c b/mm/mmap.c index 49283da9a2ae48..34b280f4238daf 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -979,7 +979,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, struct inode *inode; vm_flags_t vm_flags; int error; - unsigned long reqprot = prot; /* * Does the application expect PROT_READ to imply PROT_EXEC? @@ -1105,10 +1104,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, if (error) return error; - error = security_mmap_file(file, reqprot, prot, flags); - if (error) - return error; - return mmap_region(file, addr, len, flags, vm_flags, pgoff); } @@ -1130,9 +1125,12 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long ret; struct mm_struct *mm = current->mm; - down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); - up_write(&mm->mmap_sem); + ret = security_mmap_file(file, prot, flag); + if (!ret) { + down_write(&mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flag, offset); + up_write(&mm->mmap_sem); + } return ret; } EXPORT_SYMBOL(vm_mmap); @@ -1168,9 +1166,12 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + retval = security_mmap_file(file, prot, flags); + if (!retval) { + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + } if (file) fput(file); diff --git a/mm/nommu.c b/mm/nommu.c index acfe419785dbca..8cbfd623b04a2b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -889,7 +889,6 @@ static int validate_mmap_request(struct file *file, unsigned long *_capabilities) { unsigned long capabilities, rlen; - unsigned long reqprot = prot; int ret; /* do the simple checks first */ @@ -1048,9 +1047,6 @@ static int validate_mmap_request(struct file *file, /* allow the security API to have its say */ ret = security_mmap_addr(addr); - if (ret < 0) - return ret; - ret = security_mmap_file(file, reqprot, prot, flags); if (ret < 0) return ret; @@ -1492,9 +1488,12 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long ret; struct mm_struct *mm = current->mm; - down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); - up_write(&mm->mmap_sem); + ret = security_mmap_file(file, prot, flag); + if (!ret) { + down_write(&mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flag, offset); + up_write(&mm->mmap_sem); + } return ret; } EXPORT_SYMBOL(vm_mmap); @@ -1515,9 +1514,12 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + ret = security_mmap_file(file, prot, flags); + if (!ret) { + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + } if (file) fput(file); diff --git a/security/security.c b/security/security.c index d91c66d3956bc6..3b11b3b72fe2f0 100644 --- a/security/security.c +++ b/security/security.c @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include #define MAX_LSM_EVM_XATTR 2 @@ -657,11 +660,35 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_ioctl(file, cmd, arg); } -int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) +int security_mmap_file(struct file *file, unsigned long prot, + unsigned long flags) { + unsigned long reqprot = prot; int ret; - + /* + * Does the application expect PROT_READ to imply PROT_EXEC? + * + * (the exception is when the underlying filesystem is noexec + * mounted, in which case we dont add PROT_EXEC.) + */ + if (!(reqprot & PROT_READ)) + goto out; + if (!(current->personality & READ_IMPLIES_EXEC)) + goto out; + if (!file) { + prot |= PROT_EXEC; + } else if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { +#ifndef CONFIG_MMU + unsigned long caps = 0; + struct address_space *mapping = file->f_mapping; + if (mapping && mapping->backing_dev_info) + caps = mapping->backing_dev_info->capabilities; + if (!(caps & BDI_CAP_EXEC_MAP)) + goto out; +#endif + prot |= PROT_EXEC; + } +out: ret = security_ops->mmap_file(file, reqprot, prot, flags); if (ret) return ret; From 9ac4ed4bd0adec75db13a4b08a39a3918ec0e3c9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 17:13:15 -0400 Subject: [PATCH 66/95] move security_mmap_addr() to saner place it really should be done by get_unmapped_area(); that cuts down on the amount of callers considerably and it's the right place for that stuff anyway. Signed-off-by: Al Viro --- mm/mmap.c | 17 +++-------------- mm/mremap.c | 7 ------- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 34b280f4238daf..131521e12f1390 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -978,7 +978,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, struct mm_struct * mm = current->mm; struct inode *inode; vm_flags_t vm_flags; - int error; /* * Does the application expect PROT_READ to imply PROT_EXEC? @@ -1100,10 +1099,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } } - error = security_mmap_addr(addr); - if (error) - return error; - return mmap_region(file, addr, len, flags, vm_flags, pgoff); } @@ -1633,7 +1628,9 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, if (addr & ~PAGE_MASK) return -EINVAL; - return arch_rebalance_pgtables(addr, len); + addr = arch_rebalance_pgtables(addr, len); + error = security_mmap_addr(addr); + return error ? error : addr; } EXPORT_SYMBOL(get_unmapped_area); @@ -2210,10 +2207,6 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) if (!len) return addr; - error = security_mmap_addr(addr); - if (error) - return error; - flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); @@ -2566,10 +2559,6 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; - ret = security_mmap_addr(vma->vm_start); - if (ret) - goto out; - ret = insert_vm_struct(mm, vma); if (ret) goto out; diff --git a/mm/mremap.c b/mm/mremap.c index ebf10892b63df3..21fed202ddad86 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -371,10 +371,6 @@ static unsigned long mremap_to(unsigned long addr, if ((addr <= new_addr) && (addr+old_len) > new_addr) goto out; - ret = security_mmap_addr(new_addr); - if (ret) - goto out; - ret = do_munmap(mm, new_addr, new_len); if (ret) goto out; @@ -532,9 +528,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, goto out; } - ret = security_mmap_addr(new_addr); - if (ret) - goto out; ret = move_vma(vma, addr, old_len, new_len, new_addr); } out: From 98de59bfe4b2ff6344d9ad8e5296f80de5dcc5b6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 19:58:30 -0400 Subject: [PATCH 67/95] take calculation of final prot in security_mmap_file() into a helper Signed-off-by: Al Viro --- security/security.c | 46 +++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/security/security.c b/security/security.c index 3b11b3b72fe2f0..3efc9b12aef440 100644 --- a/security/security.c +++ b/security/security.c @@ -660,36 +660,46 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_ioctl(file, cmd, arg); } -int security_mmap_file(struct file *file, unsigned long prot, - unsigned long flags) +static inline unsigned long mmap_prot(struct file *file, unsigned long prot) { - unsigned long reqprot = prot; - int ret; /* - * Does the application expect PROT_READ to imply PROT_EXEC? - * - * (the exception is when the underlying filesystem is noexec - * mounted, in which case we dont add PROT_EXEC.) + * Does we have PROT_READ and does the application expect + * it to imply PROT_EXEC? If not, nothing to talk about... */ - if (!(reqprot & PROT_READ)) - goto out; + if ((prot & (PROT_READ | PROT_EXEC)) != PROT_READ) + return prot; if (!(current->personality & READ_IMPLIES_EXEC)) - goto out; - if (!file) { - prot |= PROT_EXEC; - } else if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { + return prot; + /* + * if that's an anonymous mapping, let it. + */ + if (!file) + return prot | PROT_EXEC; + /* + * ditto if it's not on noexec mount, except that on !MMU we need + * BDI_CAP_EXEC_MMAP (== VM_MAYEXEC) in this case + */ + if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { #ifndef CONFIG_MMU unsigned long caps = 0; struct address_space *mapping = file->f_mapping; if (mapping && mapping->backing_dev_info) caps = mapping->backing_dev_info->capabilities; if (!(caps & BDI_CAP_EXEC_MAP)) - goto out; + return prot; #endif - prot |= PROT_EXEC; + return prot | PROT_EXEC; } -out: - ret = security_ops->mmap_file(file, reqprot, prot, flags); + /* anything on noexec mount won't get PROT_EXEC */ + return prot; +} + +int security_mmap_file(struct file *file, unsigned long prot, + unsigned long flags) +{ + int ret; + ret = security_ops->mmap_file(file, prot, + mmap_prot(file, prot), flags); if (ret) return ret; return ima_file_mmap(file, prot); From e3fc629d7bb70848fbf479688a66d4e76dff46ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 20:08:42 -0400 Subject: [PATCH 68/95] switch aio and shm to do_mmap_pgoff(), make do_mmap() static after all, 0 bytes and 0 pages is the same thing... Signed-off-by: Al Viro --- fs/aio.c | 6 +++--- include/linux/mm.h | 2 +- ipc/shm.c | 2 +- mm/mmap.c | 4 ++-- mm/nommu.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index e7f2fad7b4ce7c..07154d99cc676f 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -134,9 +134,9 @@ static int aio_setup_ring(struct kioctx *ctx) info->mmap_size = nr_pages * PAGE_SIZE; dprintk("attempting mmap of %lu bytes\n", info->mmap_size); down_write(&ctx->mm->mmap_sem); - info->mmap_base = do_mmap(NULL, 0, info->mmap_size, - PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, - 0); + info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, + PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, 0); if (IS_ERR((void *)info->mmap_base)) { up_write(&ctx->mm->mmap_sem); info->mmap_size = 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index 7d5c37f24c63af..4189e0d0ac05ed 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1394,7 +1394,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff); -extern unsigned long do_mmap(struct file *, unsigned long, +extern unsigned long do_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern int do_munmap(struct mm_struct *, unsigned long, size_t); diff --git a/ipc/shm.c b/ipc/shm.c index e3a8063b1768eb..5e2cbfdab6fc0d 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1054,7 +1054,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) goto invalid; } - user_addr = do_mmap (file, addr, size, prot, flags, 0); + user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0); *raddr = user_addr; err = 0; if (IS_ERR_VALUE(user_addr)) diff --git a/mm/mmap.c b/mm/mmap.c index 131521e12f1390..f7786542c59da2 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -971,7 +971,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint) * The caller must hold down_write(¤t->mm->mmap_sem). */ -static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, +unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff) { @@ -1102,7 +1102,7 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return mmap_region(file, addr, len, flags, vm_flags, pgoff); } -unsigned long do_mmap(struct file *file, unsigned long addr, +static unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { diff --git a/mm/nommu.c b/mm/nommu.c index 8cbfd623b04a2b..a1792ed2cb1a52 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1232,7 +1232,7 @@ static int do_mmap_private(struct vm_area_struct *vma, /* * handle mapping creation for uClinux */ -static unsigned long do_mmap_pgoff(struct file *file, +unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, @@ -1470,7 +1470,7 @@ static unsigned long do_mmap_pgoff(struct file *file, return -ENOMEM; } -unsigned long do_mmap(struct file *file, unsigned long addr, +static unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { From dc982501d9643ab0c117e7d87562857ce234652d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 20:11:57 -0400 Subject: [PATCH 69/95] kill do_mmap() completely just pull into vm_mmap() Signed-off-by: Al Viro --- mm/mmap.c | 16 +++++----------- mm/nommu.c | 16 +++++----------- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index f7786542c59da2..538c905d6d9b76 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1102,28 +1102,22 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return mmap_region(file, addr, len, flags, vm_flags, pgoff); } -static unsigned long do_mmap(struct file *file, unsigned long addr, +unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { + unsigned long ret; + struct mm_struct *mm = current->mm; + if (unlikely(offset + PAGE_ALIGN(len) < offset)) return -EINVAL; if (unlikely(offset & ~PAGE_MASK)) return -EINVAL; - return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); -} - -unsigned long vm_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - unsigned long ret; - struct mm_struct *mm = current->mm; ret = security_mmap_file(file, prot, flag); if (!ret) { down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); + ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); up_write(&mm->mmap_sem); } return ret; diff --git a/mm/nommu.c b/mm/nommu.c index a1792ed2cb1a52..e6123a5b2cc14f 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1470,28 +1470,22 @@ unsigned long do_mmap_pgoff(struct file *file, return -ENOMEM; } -static unsigned long do_mmap(struct file *file, unsigned long addr, +unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { + unsigned long ret; + struct mm_struct *mm = current->mm; + if (unlikely(offset + PAGE_ALIGN(len) < offset)) return -EINVAL; if (unlikely(offset & ~PAGE_MASK)) return -EINVAL; - return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); -} - -unsigned long vm_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - unsigned long ret; - struct mm_struct *mm = current->mm; ret = security_mmap_file(file, prot, flag); if (!ret) { down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); + ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); up_write(&mm->mmap_sem); } return ret; From eb36c5873b96e8c7376768d3906da74aae6e3839 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 20:17:35 -0400 Subject: [PATCH 70/95] new helper: vm_mmap_pgoff() take it to mm/util.c, convert vm_mmap() to use of that one and take it to mm/util.c as well, convert both sys_mmap_pgoff() to use of vm_mmap_pgoff() Signed-off-by: Al Viro --- mm/internal.h | 4 ++++ mm/mmap.c | 30 +----------------------------- mm/nommu.c | 29 +---------------------------- mm/util.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 57 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index aee4761cf9a92b..5307a022731762 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -342,3 +342,7 @@ extern u64 hwpoison_filter_flags_mask; extern u64 hwpoison_filter_flags_value; extern u64 hwpoison_filter_memcg; extern u32 hwpoison_filter_enable; + +extern unsigned long vm_mmap_pgoff(struct file *, unsigned long, + unsigned long, unsigned long, + unsigned long, unsigned long); diff --git a/mm/mmap.c b/mm/mmap.c index 538c905d6d9b76..98ef8a6c2cd80e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1102,28 +1102,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return mmap_region(file, addr, len, flags, vm_flags, pgoff); } -unsigned long vm_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - unsigned long ret; - struct mm_struct *mm = current->mm; - - if (unlikely(offset + PAGE_ALIGN(len) < offset)) - return -EINVAL; - if (unlikely(offset & ~PAGE_MASK)) - return -EINVAL; - - ret = security_mmap_file(file, prot, flag); - if (!ret) { - down_write(&mm->mmap_sem); - ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); - up_write(&mm->mmap_sem); - } - return ret; -} -EXPORT_SYMBOL(vm_mmap); - SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) @@ -1155,13 +1133,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - retval = security_mmap_file(file, prot, flags); - if (!retval) { - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - } - + retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); if (file) fput(file); out: diff --git a/mm/nommu.c b/mm/nommu.c index e6123a5b2cc14f..c4acfbc099727b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1470,28 +1470,6 @@ unsigned long do_mmap_pgoff(struct file *file, return -ENOMEM; } -unsigned long vm_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - unsigned long ret; - struct mm_struct *mm = current->mm; - - if (unlikely(offset + PAGE_ALIGN(len) < offset)) - return -EINVAL; - if (unlikely(offset & ~PAGE_MASK)) - return -EINVAL; - - ret = security_mmap_file(file, prot, flag); - if (!ret) { - down_write(&mm->mmap_sem); - ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); - up_write(&mm->mmap_sem); - } - return ret; -} -EXPORT_SYMBOL(vm_mmap); - SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) @@ -1508,12 +1486,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - ret = security_mmap_file(file, prot, flags); - if (!ret) { - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - } + ret = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); if (file) fput(file); diff --git a/mm/util.c b/mm/util.c index ae962b31de888a..8c7265afa29f21 100644 --- a/mm/util.c +++ b/mm/util.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "internal.h" @@ -341,6 +342,35 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, } EXPORT_SYMBOL_GPL(get_user_pages_fast); +unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long pgoff) +{ + unsigned long ret; + struct mm_struct *mm = current->mm; + + ret = security_mmap_file(file, prot, flag); + if (!ret) { + down_write(&mm->mmap_sem); + ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff); + up_write(&mm->mmap_sem); + } + return ret; +} + +unsigned long vm_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset) +{ + if (unlikely(offset + PAGE_ALIGN(len) < offset)) + return -EINVAL; + if (unlikely(offset & ~PAGE_MASK)) + return -EINVAL; + + return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); +} +EXPORT_SYMBOL(vm_mmap); + /* Tracepoints definitions. */ EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); From 17d1587f553dbdc9a55fe253903437f7e3e3448e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 20:19:20 -0400 Subject: [PATCH 71/95] unexport do_munmap() Signed-off-by: Al Viro --- mm/mmap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/mmap.c b/mm/mmap.c index 98ef8a6c2cd80e..6cb3193cdaeedb 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2125,7 +2125,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) return 0; } -EXPORT_SYMBOL(do_munmap); int vm_munmap(unsigned long start, size_t len) { From d58367515f47371f7202d8b258ee0614a8955a6a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Apr 2012 18:17:15 -0400 Subject: [PATCH 72/95] sch_atm.c: get rid of poinless extern sockfd_lookup() is declared in linux/net.h, which is pulled by linux/skbuff.h (and needed for a lot of other stuff in sch_atm.c anyway). Signed-off-by: Al Viro --- net/sched/sch_atm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 8522a479337413..ca8e0a57d945da 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -16,8 +16,6 @@ #include #include -extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ - /* * The ATM queuing discipline provides a framework for invoking classifiers * (aka "filters"), which in turn select classes of this queuing discipline. From efaa33eb1309d65528d5a54d87d69bdcbdae8c10 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Jun 2012 17:18:04 +0300 Subject: [PATCH 73/95] reiserfs: cleanup reiserfs_fill_super a bit We have the reiserfs superblock pointer in the 'sbi' variable in this function, no need to use the 'REISERFS_SB(s)' macro which is the same. This is jut a small clean-up. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/reiserfs/super.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index c07b7d709447de..60cddb7e6389fe 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1730,19 +1730,19 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) return -ENOMEM; s->s_fs_info = sbi; /* Set default values for options: non-aggressive tails, RO on errors */ - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); + sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); + sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); + sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); /* no preallocation minimum, be smart in reiserfs_file_write instead */ - REISERFS_SB(s)->s_alloc_options.preallocmin = 0; + sbi->s_alloc_options.preallocmin = 0; /* Preallocate by 16 blocks (17-1) at once */ - REISERFS_SB(s)->s_alloc_options.preallocsize = 17; + sbi->s_alloc_options.preallocsize = 17; /* setup default block allocator options */ reiserfs_init_alloc_options(s); - mutex_init(&REISERFS_SB(s)->lock); - REISERFS_SB(s)->lock_depth = -1; + mutex_init(&sbi->lock); + sbi->lock_depth = -1; jdev_name = NULL; if (reiserfs_parse_options @@ -1751,8 +1751,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) goto error_unlocked; } if (jdev_name && jdev_name[0]) { - REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL); - if (!REISERFS_SB(s)->s_jdev) { + sbi->s_jdev = kstrdup(jdev_name, GFP_KERNEL); + if (!sbi->s_jdev) { SWARN(silent, s, "", "Cannot allocate memory for " "journal device name"); goto error; @@ -1810,7 +1810,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) /* make data=ordered the default */ if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && !reiserfs_data_writeback(s)) { - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); + sbi->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); } if (reiserfs_data_log(s)) { From 25729b0e94c2103a8d726eda843136a3775366cf Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Jun 2012 17:18:05 +0300 Subject: [PATCH 74/95] reiserfs: clean-up function return type Turn 'reiserfs_flush_old_commits()' into a void function because the callers do not cares about what it returns anyway. We are going to remove the 'sb->s_dirt' field completely and this patch is a small step towards this direction. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/reiserfs/journal.c | 8 +++----- fs/reiserfs/reiserfs.h | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index b1a08573fe1427..68aea62ea61d77 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -3492,7 +3492,7 @@ static void flush_async_commits(struct work_struct *work) ** flushes any old transactions to disk ** ends the current transaction if it is too old */ -int reiserfs_flush_old_commits(struct super_block *sb) +void reiserfs_flush_old_commits(struct super_block *sb) { time_t now; struct reiserfs_transaction_handle th; @@ -3502,9 +3502,8 @@ int reiserfs_flush_old_commits(struct super_block *sb) /* safety check so we don't flush while we are replaying the log during * mount */ - if (list_empty(&journal->j_journal_list)) { - return 0; - } + if (list_empty(&journal->j_journal_list)) + return; /* check the current transaction. If there are no writers, and it is * too old, finish it, and force the commit blocks to disk @@ -3526,7 +3525,6 @@ int reiserfs_flush_old_commits(struct super_block *sb) do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); } } - return sb->s_dirt; } /* diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 14a4f9dfb1714d..5c700550bb076b 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -2452,7 +2452,7 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); int reiserfs_commit_page(struct inode *inode, struct page *page, unsigned from, unsigned to); -int reiserfs_flush_old_commits(struct super_block *); +void reiserfs_flush_old_commits(struct super_block *); int reiserfs_commit_for_inode(struct inode *); int reiserfs_inode_needs_commit(struct inode *); void reiserfs_update_inode_transaction(struct inode *); From 717f03c4d71677d2afb68d54628def3aae5d46ab Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Jun 2012 17:18:06 +0300 Subject: [PATCH 75/95] reiserfs: remove useless superblock dirtying The 'reiserfs_resize()' function marks the superblock as dirty by assigning 1 to 's_dirt' and then calls 'journal_mark_dirty()' which does the same. Thus, we can remove the assignment from 'reiserfs_resize()'. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/reiserfs/resize.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 9a17f63c3fd7f3..3ce02cff5e90bd 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -200,7 +200,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) (bmap_nr_new - bmap_nr))); PUT_SB_BLOCK_COUNT(s, block_count_new); PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new); - s->s_dirt = 1; journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); From 5c5fd81962271d4ee2984837fef4ec37e689aa41 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Jun 2012 17:18:07 +0300 Subject: [PATCH 76/95] reiserfs: mark the superblock as dirty a bit later The 'journal_mark_dirty()' function currently first marks the superblock as dirty by setting 's_dirt' to 1, then does various sanity checks and returns, then actuall does all the magic with the journal. This is not an ideal order, though. It makes more sense to first do all the checks, then do all the internal stuff, and at the end notify the VFS that the superblock is now dirty. This patch moves the 's_dirt = 1' assignment from the very beginning of this function to the very end. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/reiserfs/journal.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 68aea62ea61d77..e5e06dd8562f14 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -3231,8 +3231,6 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, th->t_trans_id, journal->j_trans_id); } - sb->s_dirt = 1; - prepared = test_clear_buffer_journal_prepared(bh); clear_buffer_journal_restore_dirty(bh); /* already in this transaction, we are done */ @@ -3316,6 +3314,7 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, journal->j_first = cn; journal->j_last = cn; } + sb->s_dirt = 1; return 0; } From 033369d1af1264abc23bea2e174aa47cdd212f6f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Jun 2012 17:18:08 +0300 Subject: [PATCH 77/95] reiserfs: get rid of resierfs_sync_super This patch stops reiserfs using the VFS 'write_super()' method along with the s_dirt flag, because they are on their way out. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblock using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds, even if there are no diry superblocks, or there are no client file-systems which would need this (e.g., btrfs does not use '->write_super()'). So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super()' VFS service, and then remove it together with the kernel thread. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro --- fs/reiserfs/journal.c | 6 +++-- fs/reiserfs/reiserfs.h | 6 +++++ fs/reiserfs/super.c | 54 +++++++++++++++++++++++++++++++++++------- 3 files changed, 55 insertions(+), 11 deletions(-) diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index e5e06dd8562f14..afcadcc03e8ac8 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1923,6 +1923,8 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, * the workqueue job (flush_async_commit) needs this lock */ reiserfs_write_unlock(sb); + + cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); flush_workqueue(commit_wq); if (!reiserfs_mounted_fs_count) { @@ -3314,7 +3316,7 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, journal->j_first = cn; journal->j_last = cn; } - sb->s_dirt = 1; + reiserfs_schedule_old_flush(sb); return 0; } @@ -3952,7 +3954,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, ** it tells us if we should continue with the journal_end, or just return */ if (!check_journal_end(th, sb, nblocks, flags)) { - sb->s_dirt = 1; + reiserfs_schedule_old_flush(sb); wake_queued_writers(sb); reiserfs_async_progress_wait(sb); goto out; diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 5c700550bb076b..33215f57ea06ce 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -480,6 +480,11 @@ struct reiserfs_sb_info { struct dentry *priv_root; /* root of /.reiserfs_priv */ struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ int j_errno; + + int work_queued; /* non-zero delayed work is queued */ + struct delayed_work old_work; /* old transactions flush delayed work */ + spinlock_t old_work_lock; /* protects old_work and work_queued */ + #ifdef CONFIG_QUOTA char *s_qf_names[MAXQUOTAS]; int s_jquota_fmt; @@ -2487,6 +2492,7 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, unsigned int); +void reiserfs_schedule_old_flush(struct super_block *s); void add_save_link(struct reiserfs_transaction_handle *th, struct inode *inode, int truncate); int remove_save_link(struct inode *inode, int truncate); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 60cddb7e6389fe..651ce767b55d82 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -72,20 +72,58 @@ static int reiserfs_sync_fs(struct super_block *s, int wait) if (!journal_begin(&th, s, 1)) if (!journal_end_sync(&th, s, 1)) reiserfs_flush_old_commits(s); - s->s_dirt = 0; /* Even if it's not true. - * We'll loop forever in sync_supers otherwise */ reiserfs_write_unlock(s); return 0; } -static void reiserfs_write_super(struct super_block *s) +static void flush_old_commits(struct work_struct *work) { + struct reiserfs_sb_info *sbi; + struct super_block *s; + + sbi = container_of(work, struct reiserfs_sb_info, old_work.work); + s = sbi->s_journal->j_work_sb; + + spin_lock(&sbi->old_work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->old_work_lock); + reiserfs_sync_fs(s, 1); } +void reiserfs_schedule_old_flush(struct super_block *s) +{ + struct reiserfs_sb_info *sbi = REISERFS_SB(s); + unsigned long delay; + + if (s->s_flags & MS_RDONLY) + return; + + spin_lock(&sbi->old_work_lock); + if (!sbi->work_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &sbi->old_work, delay); + sbi->work_queued = 1; + } + spin_unlock(&sbi->old_work_lock); +} + +static void cancel_old_flush(struct super_block *s) +{ + struct reiserfs_sb_info *sbi = REISERFS_SB(s); + + cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); + spin_lock(&sbi->old_work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->old_work_lock); +} + static int reiserfs_freeze(struct super_block *s) { struct reiserfs_transaction_handle th; + + cancel_old_flush(s); + reiserfs_write_lock(s); if (!(s->s_flags & MS_RDONLY)) { int err = journal_begin(&th, s, 1); @@ -99,7 +137,6 @@ static int reiserfs_freeze(struct super_block *s) journal_end_sync(&th, s, 1); } } - s->s_dirt = 0; reiserfs_write_unlock(s); return 0; } @@ -483,9 +520,6 @@ static void reiserfs_put_super(struct super_block *s) reiserfs_write_lock(s); - if (s->s_dirt) - reiserfs_write_super(s); - /* change file system state to current state if it was mounted with read-write permissions */ if (!(s->s_flags & MS_RDONLY)) { if (!journal_begin(&th, s, 10)) { @@ -692,7 +726,6 @@ static const struct super_operations reiserfs_sops = { .dirty_inode = reiserfs_dirty_inode, .evict_inode = reiserfs_evict_inode, .put_super = reiserfs_put_super, - .write_super = reiserfs_write_super, .sync_fs = reiserfs_sync_fs, .freeze_fs = reiserfs_freeze, .unfreeze_fs = reiserfs_unfreeze, @@ -1400,7 +1433,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) err = journal_end(&th, s, 10); if (err) goto out_err; - s->s_dirt = 0; if (!(*mount_flags & MS_RDONLY)) { dquot_resume(s, -1); @@ -1741,6 +1773,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) /* setup default block allocator options */ reiserfs_init_alloc_options(s); + spin_lock_init(&sbi->old_work_lock); + INIT_DELAYED_WORK(&sbi->old_work, flush_old_commits); mutex_init(&sbi->lock); sbi->lock_depth = -1; @@ -2003,6 +2037,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) reiserfs_write_unlock(s); } + cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); + reiserfs_free_bitmap_cache(s); if (SB_BUFFER_WITH_SB(s)) brelse(SB_BUFFER_WITH_SB(s)); From c3b2da314834499f34cba94f7053e55f6d6f92d8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Mar 2012 09:59:21 -0400 Subject: [PATCH 78/95] fs: introduce inode operation ->update_time Btrfs has to make sure we have space to allocate new blocks in order to modify the inode, so updating time can fail. We've gotten around this by having our own file_update_time but this is kind of a pain, and Christoph has indicated he would like to make xfs do something different with atime updates. So introduce ->update_time, where we will deal with i_version an a/m/c time updates and indicate which changes need to be made. The normal version just does what it has always done, updates the time and marks the inode dirty, and then filesystems can choose to do something different. I've gone through all of the users of file_update_time and made them check for errors with the exception of the fault code since it's complicated and I wasn't quite sure what to do there, also Jan is going to be pushing the file time updates into page_mkwrite for those who have it so that should satisfy btrfs and make it not a big deal to check the file_update_time() return code in the generic fault path. Thanks, Signed-off-by: Josef Bacik --- Documentation/filesystems/Locking | 3 ++ Documentation/filesystems/vfs.txt | 4 +++ fs/fuse/file.c | 4 ++- fs/inode.c | 56 ++++++++++++++++++++++--------- fs/ncpfs/file.c | 6 ++-- fs/ntfs/file.c | 4 ++- fs/pipe.c | 7 ++-- fs/splice.c | 6 ++-- fs/xfs/xfs_file.c | 7 ++-- include/linux/fs.h | 10 +++++- mm/filemap.c | 4 ++- mm/filemap_xip.c | 4 ++- 12 files changed, 86 insertions(+), 29 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 4fca82e5276e71..d5a269a51a9efe 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -62,6 +62,7 @@ ata *); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); + void (*update_time)(struct inode *, struct timespec *, int); locking rules: all may block @@ -89,6 +90,8 @@ listxattr: no removexattr: yes truncate_range: yes fiemap: no +update_time: no + Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 0d0492028082c0..b2aa722e5ea24f 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -364,6 +364,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); + void (*update_time)(struct inode *, struct timespec *, int); }; Again, all methods are called without any locks being held, unless @@ -475,6 +476,9 @@ otherwise noted. truncate_range: a method provided by the underlying filesystem to truncate a range of blocks , i.e. punch a hole somewhere in a file. + update_time: called by the VFS to update a specific time or the i_version of + an inode. If this is not defined the VFS will update the inode itself + and call mark_inode_dirty_sync. The Address Space Object ======================== diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 504e61b7fd7515..9562109d3a879b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -962,7 +962,9 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; if (file->f_flags & O_DIRECT) { written = generic_file_direct_write(iocb, iov, &nr_segs, diff --git a/fs/inode.c b/fs/inode.c index a79555e492e075..f0335fc315eddf 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1487,6 +1487,27 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, return 0; } +/* + * This does the actual work of updating an inodes time or version. Must have + * had called mnt_want_write() before calling this. + */ +static int update_time(struct inode *inode, struct timespec *time, int flags) +{ + if (inode->i_op->update_time) + return inode->i_op->update_time(inode, time, flags); + + if (flags & S_ATIME) + inode->i_atime = *time; + if (flags & S_VERSION) + inode_inc_iversion(inode); + if (flags & S_CTIME) + inode->i_ctime = *time; + if (flags & S_MTIME) + inode->i_mtime = *time; + mark_inode_dirty_sync(inode); + return 0; +} + /** * touch_atime - update the access time * @path: the &struct path to update @@ -1524,8 +1545,14 @@ void touch_atime(struct path *path) if (mnt_want_write(mnt)) return; - inode->i_atime = now; - mark_inode_dirty_sync(inode); + /* + * File systems can error out when updating inodes if they need to + * allocate new space to modify an inode (such is the case for + * Btrfs), but since we touch atime while walking down the path we + * really don't care if we failed to update the atime of the file, + * so just ignore the return value. + */ + update_time(inode, &now, S_ATIME); mnt_drop_write(mnt); } EXPORT_SYMBOL(touch_atime); @@ -1604,18 +1631,20 @@ EXPORT_SYMBOL(file_remove_suid); * usage in the file write path of filesystems, and filesystems may * choose to explicitly ignore update via this function with the * S_NOCMTIME inode flag, e.g. for network filesystem where these - * timestamps are handled by the server. + * timestamps are handled by the server. This can return an error for + * file systems who need to allocate space in order to update an inode. */ -void file_update_time(struct file *file) +int file_update_time(struct file *file) { struct inode *inode = file->f_path.dentry->d_inode; struct timespec now; - enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; + int sync_it = 0; + int ret; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) - return; + return 0; now = current_fs_time(inode->i_sb); if (!timespec_equal(&inode->i_mtime, &now)) @@ -1628,21 +1657,16 @@ void file_update_time(struct file *file) sync_it |= S_VERSION; if (!sync_it) - return; + return 0; /* Finally allowed to write? Takes lock. */ if (mnt_want_write_file(file)) - return; + return 0; - /* Only change inode inside the lock region */ - if (sync_it & S_VERSION) - inode_inc_iversion(inode); - if (sync_it & S_CTIME) - inode->i_ctime = now; - if (sync_it & S_MTIME) - inode->i_mtime = now; - mark_inode_dirty_sync(inode); + ret = update_time(inode, &now, sync_it); mnt_drop_write_file(file); + + return ret; } EXPORT_SYMBOL(file_update_time); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 3ff5fcc1528fd2..122e260247f53c 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -221,6 +221,10 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * already_written = 0; + errno = file_update_time(file); + if (errno) + goto outrel; + bouncebuffer = vmalloc(bufsize); if (!bouncebuffer) { errno = -EIO; /* -ENOMEM */ @@ -252,8 +256,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * } vfree(bouncebuffer); - file_update_time(file); - *ppos = pos; if (pos > i_size_read(inode)) { diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 8639169221c7ae..7389d2d5e51d25 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2096,7 +2096,9 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, err = file_remove_suid(file); if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, count); out: diff --git a/fs/pipe.c b/fs/pipe.c index 95ebb56de494de..49c1065256fd10 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -654,8 +654,11 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } - if (ret > 0) - file_update_time(filp); + if (ret > 0) { + int err = file_update_time(filp); + if (err) + ret = err; + } return ret; } diff --git a/fs/splice.c b/fs/splice.c index f8476841eb04e0..47c4c1ad0c0442 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1003,8 +1003,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ret = file_remove_suid(out); if (!ret) { - file_update_time(out); - ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); + ret = file_update_time(out); + if (!ret) + ret = splice_from_pipe_feed(pipe, &sd, + pipe_to_file); } mutex_unlock(&inode->i_mutex); } while (ret > 0); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8d214b87f6bb06..9f7ec15a65222e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -586,8 +586,11 @@ xfs_file_aio_write_checks( * lock above. Eventually we should look into a way to avoid * the pointless lock roundtrip. */ - if (likely(!(file->f_mode & FMODE_NOCMTIME))) - file_update_time(file); + if (likely(!(file->f_mode & FMODE_NOCMTIME))) { + error = file_update_time(file); + if (error) + return error; + } /* * If we're writing the file then make sure to clear the setuid and diff --git a/include/linux/fs.h b/include/linux/fs.h index cdc1a9630948e1..57fc70574d2068 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1684,6 +1684,7 @@ struct inode_operations { void (*truncate_range)(struct inode *, loff_t, loff_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); + int (*update_time)(struct inode *, struct timespec *, int); } ____cacheline_aligned; struct seq_file; @@ -1843,6 +1844,13 @@ static inline void inode_inc_iversion(struct inode *inode) spin_unlock(&inode->i_lock); } +enum file_time_flags { + S_ATIME = 1, + S_MTIME = 2, + S_CTIME = 4, + S_VERSION = 8, +}; + extern void touch_atime(struct path *); static inline void file_accessed(struct file *file) { @@ -2579,7 +2587,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); extern void setattr_copy(struct inode *inode, const struct iattr *attr); -extern void file_update_time(struct file *file); +extern int file_update_time(struct file *file); extern int generic_show_options(struct seq_file *m, struct dentry *root); extern void save_mount_options(struct super_block *sb, char *options); diff --git a/mm/filemap.c b/mm/filemap.c index 21e5abfbcdf661..51070f1f1b524f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2463,7 +2463,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index a4eb3113222912..213ca1f5340980 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -426,7 +426,9 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, if (ret) goto out_backing; - file_update_time(filp); + ret = file_update_time(filp); + if (ret) + goto out_backing; ret = __xip_file_write (filp, buf, count, pos, ppos); From e41f941a23115e84a8550b3d901a13a14b2edc2f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Mar 2012 09:46:47 -0400 Subject: [PATCH 79/95] Btrfs: move over to use ->update_time Btrfs had been doing it's own file_update_time so we could catch ENOSPC properly, so just update our btrfs_update_time to work with the new stuff and then we'll be fancy later. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 1 - fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 53 +++++++++++++----------------------------------- 3 files changed, 15 insertions(+), 41 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8fd72331d6008c..ba8743b9a82542 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2922,7 +2922,6 @@ int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); int btrfs_dirty_inode(struct inode *inode); -int btrfs_update_time(struct file *file); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 53bf2d764bbc4f..974beb84ed61f8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1404,7 +1404,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, goto out; } - err = btrfs_update_time(file); + err = file_update_time(file); if (err) { mutex_unlock(&inode->i_mutex); goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ceb7b9c9edcc14..3c1723a9ae6f55 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4431,46 +4431,18 @@ int btrfs_dirty_inode(struct inode *inode) * This is a copy of file_update_time. We need this so we can return error on * ENOSPC for updating the inode in the case of file write and mmap writes. */ -int btrfs_update_time(struct file *file) +static int btrfs_update_time(struct inode *inode, struct timespec *now, + int flags) { - struct inode *inode = file->f_path.dentry->d_inode; - struct timespec now; - int ret; - enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; - - /* First try to exhaust all avenues to not sync */ - if (IS_NOCMTIME(inode)) - return 0; - - now = current_fs_time(inode->i_sb); - if (!timespec_equal(&inode->i_mtime, &now)) - sync_it = S_MTIME; - - if (!timespec_equal(&inode->i_ctime, &now)) - sync_it |= S_CTIME; - - if (IS_I_VERSION(inode)) - sync_it |= S_VERSION; - - if (!sync_it) - return 0; - - /* Finally allowed to write? Takes lock. */ - if (mnt_want_write_file(file)) - return 0; - - /* Only change inode inside the lock region */ - if (sync_it & S_VERSION) + if (flags & S_VERSION) inode_inc_iversion(inode); - if (sync_it & S_CTIME) - inode->i_ctime = now; - if (sync_it & S_MTIME) - inode->i_mtime = now; - ret = btrfs_dirty_inode(inode); - if (!ret) - mark_inode_dirty_sync(inode); - mnt_drop_write(file->f_path.mnt); - return ret; + if (flags & S_CTIME) + inode->i_ctime = *now; + if (flags & S_MTIME) + inode->i_mtime = *now; + if (flags & S_ATIME) + inode->i_atime = *now; + return btrfs_dirty_inode(inode); } /* @@ -6576,7 +6548,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); if (!ret) { - ret = btrfs_update_time(vma->vm_file); + ret = file_update_time(vma->vm_file); reserved = 1; } if (ret) { @@ -7647,6 +7619,7 @@ static const struct inode_operations btrfs_file_inode_operations = { .permission = btrfs_permission, .fiemap = btrfs_fiemap, .get_acl = btrfs_get_acl, + .update_time = btrfs_update_time, }; static const struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, @@ -7657,6 +7630,7 @@ static const struct inode_operations btrfs_special_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .get_acl = btrfs_get_acl, + .update_time = btrfs_update_time, }; static const struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, @@ -7670,6 +7644,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .get_acl = btrfs_get_acl, + .update_time = btrfs_update_time, }; const struct dentry_operations btrfs_dentry_operations = { From 697f514df10b0f46bcd7596c1be18b7e2e9b28bb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:05 +0200 Subject: [PATCH 80/95] vfs: split do_lookup() Split do_lookup() into two functions: lookup_fast() - does cached lookup without i_mutex lookup_slow() - does lookup with i_mutex Both follow managed dentries. The new functions are needed by atomic_open. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 59 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 93ac9323b1f7c5..7f4ab820811a09 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1125,8 +1125,8 @@ static struct dentry *__lookup_hash(struct qstr *name, * small and for now I'd prefer to have fast path as straight as possible. * It _is_ time-critical. */ -static int do_lookup(struct nameidata *nd, struct qstr *name, - struct path *path, struct inode **inode) +static int lookup_fast(struct nameidata *nd, struct qstr *name, + struct path *path, struct inode **inode) { struct vfsmount *mnt = nd->path.mnt; struct dentry *dentry, *parent = nd->path.dentry; @@ -1208,7 +1208,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, goto need_lookup; } } -done: + path->mnt = mnt; path->dentry = dentry; err = follow_managed(path, nd->flags); @@ -1222,6 +1222,17 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return 0; need_lookup: + return 1; +} + +/* Fast lookup failed, do it the slow way */ +static int lookup_slow(struct nameidata *nd, struct qstr *name, + struct path *path) +{ + struct dentry *dentry, *parent; + int err; + + parent = nd->path.dentry; BUG_ON(nd->inode != parent->d_inode); mutex_lock(&parent->d_inode->i_mutex); @@ -1229,7 +1240,16 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, mutex_unlock(&parent->d_inode->i_mutex); if (IS_ERR(dentry)) return PTR_ERR(dentry); - goto done; + path->mnt = nd->path.mnt; + path->dentry = dentry; + err = follow_managed(path, nd->flags); + if (unlikely(err < 0)) { + path_put_conditional(path, nd); + return err; + } + if (err) + nd->flags |= LOOKUP_JUMPED; + return 0; } static inline int may_lookup(struct nameidata *nd) @@ -1301,21 +1321,26 @@ static inline int walk_component(struct nameidata *nd, struct path *path, */ if (unlikely(type != LAST_NORM)) return handle_dots(nd, type); - err = do_lookup(nd, name, path, &inode); + err = lookup_fast(nd, name, path, &inode); if (unlikely(err)) { - terminate_walk(nd); - return err; - } - if (!inode) { - path_to_nameidata(path, nd); - terminate_walk(nd); - return -ENOENT; + if (err < 0) + goto out_err; + + err = lookup_slow(nd, name, path); + if (err < 0) + goto out_err; + + inode = path->dentry->d_inode; } + err = -ENOENT; + if (!inode) + goto out_path_put; + if (should_follow_link(inode, follow)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { - terminate_walk(nd); - return -ECHILD; + err = -ECHILD; + goto out_err; } } BUG_ON(inode != path->dentry->d_inode); @@ -1324,6 +1349,12 @@ static inline int walk_component(struct nameidata *nd, struct path *path, path_to_nameidata(path, nd); nd->inode = inode; return 0; + +out_path_put: + path_to_nameidata(path, nd); +out_err: + terminate_walk(nd); + return err; } /* From e276ae672fa2d727721b1a5a2508ff34bac85439 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:06 +0200 Subject: [PATCH 81/95] vfs: do_last(): make exit RCU safe Allow returning from do_last() with LOOKUP_RCU still set on the "out:" and "exit:" labels. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 7f4ab820811a09..edc18cd63a8884 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2383,7 +2383,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, out: if (want_write) mnt_drop_write(nd->path.mnt); - path_put(&nd->path); + terminate_walk(nd); return filp; exit_mutex_unlock: From a1eb33153090549e622ab42cb375af06614dd7a8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:07 +0200 Subject: [PATCH 82/95] vfs: do_last(): inline walk_component() Copy walk_component() into do_lookup(). Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index edc18cd63a8884..f6b31c94c1107b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2200,6 +2200,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, int want_write = 0; int acc_mode = op->acc_mode; struct file *filp; + struct inode *inode; int error; nd->flags &= ~LOOKUP_PARENT; @@ -2237,12 +2238,36 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) symlink_ok = 1; /* we _can_ be in RCU mode here */ - error = walk_component(nd, path, &nd->last, LAST_NORM, - !symlink_ok); - if (error < 0) - return ERR_PTR(error); - if (error) /* symlink */ + error = lookup_fast(nd, &nd->last, path, &inode); + if (unlikely(error)) { + if (error < 0) + goto exit; + + error = lookup_slow(nd, &nd->last, path); + if (error < 0) + goto exit; + + inode = path->dentry->d_inode; + } + error = -ENOENT; + if (!inode) { + path_to_nameidata(path, nd); + goto exit; + } + + if (should_follow_link(inode, !symlink_ok)) { + if (nd->flags & LOOKUP_RCU) { + if (unlikely(unlazy_walk(nd, path->dentry))) { + error = -ECHILD; + goto exit; + } + } + BUG_ON(inode != path->dentry->d_inode); return NULL; + } + path_to_nameidata(path, nd); + nd->inode = inode; + /* sayonara */ error = complete_walk(nd); if (error) From decf3400879d02d0eafedea52c7f208587be062a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:08 +0200 Subject: [PATCH 83/95] vfs: do_last(): use inode variable Use helper variable instead of path->dentry->d_inode before complete_walk(). This will allow this code to be used in RCU mode. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f6b31c94c1107b..41445e7fd3398d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2358,15 +2358,17 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error) nd->flags |= LOOKUP_JUMPED; + BUG_ON(nd->flags & LOOKUP_RCU); + inode = path->dentry->d_inode; error = -ENOENT; - if (!path->dentry->d_inode) + if (!inode) goto exit_dput; - if (path->dentry->d_inode->i_op->follow_link) + if (inode->i_op->follow_link) return NULL; path_to_nameidata(path, nd); - nd->inode = path->dentry->d_inode; + nd->inode = inode; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ error = complete_walk(nd); if (error) From d45ea86792db9679ed010b2c3df3db32b2ce5bde Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:09 +0200 Subject: [PATCH 84/95] vfs: make follow_link check RCU safe This will allow this code to be used in RCU mode. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 41445e7fd3398d..c6b996817bb39c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2201,6 +2201,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, int acc_mode = op->acc_mode; struct file *filp; struct inode *inode; + int symlink_ok = 0; int error; nd->flags &= ~LOOKUP_PARENT; @@ -2232,7 +2233,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path, } if (!(open_flag & O_CREAT)) { - int symlink_ok = 0; if (nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) @@ -2364,8 +2364,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (!inode) goto exit_dput; - if (inode->i_op->follow_link) + if (should_follow_link(inode, !symlink_ok)) { + if (nd->flags & LOOKUP_RCU) { + if (unlikely(unlazy_walk(nd, path->dentry))) { + error = -ECHILD; + goto exit; + } + } + BUG_ON(inode != path->dentry->d_inode); return NULL; + } path_to_nameidata(path, nd); nd->inode = inode; From 54c33e7f95284539e52ec2d99dcdf6efd29b247f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:10 +0200 Subject: [PATCH 85/95] vfs: do_last(): make ENOENT exit RCU safe This will allow this code to be used in RCU mode. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index c6b996817bb39c..a7e994bb78c2b6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2361,8 +2361,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path, BUG_ON(nd->flags & LOOKUP_RCU); inode = path->dentry->d_inode; error = -ENOENT; - if (!inode) - goto exit_dput; + if (!inode) { + path_to_nameidata(path, nd); + goto exit; + } if (should_follow_link(inode, !symlink_ok)) { if (nd->flags & LOOKUP_RCU) { From af2f55426d1d888dcc0ba8dc9e9deb49fae38e38 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:11 +0200 Subject: [PATCH 86/95] vfs: do_last(): check LOOKUP_DIRECTORY Check for ENOTDIR before finishing open. This allows this code to be shared between O_CREAT and plain opens. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index a7e994bb78c2b6..4767c0588b6ed5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2386,6 +2386,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, error = -EISDIR; if (S_ISDIR(nd->inode->i_mode)) goto exit; + error = -ENOTDIR; + if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) + goto exit; ok: if (!S_ISREG(nd->inode->i_mode)) will_truncate = 0; From 050ac841ea90610067fec26150574be8c6077738 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:12 +0200 Subject: [PATCH 87/95] vfs: do_last(): only return EISDIR for O_CREAT This allows this code to be shared between O_CREAT and plain opens. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 4767c0588b6ed5..90210b46b4614b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2384,7 +2384,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error) return ERR_PTR(error); error = -EISDIR; - if (S_ISDIR(nd->inode->i_mode)) + if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) goto exit; error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) From d7fdd7f6e1afbffda03aeacb90039c092e8cacf8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:13 +0200 Subject: [PATCH 88/95] vfs: do_last(): add audit_inode before open This allows this code to be shared between O_CREAT and plain opens. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/namei.c b/fs/namei.c index 90210b46b4614b..125386c250bb8f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2389,6 +2389,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) goto exit; + audit_inode(pathname, nd->path.dentry); ok: if (!S_ISREG(nd->inode->i_mode)) will_truncate = 0; From 5f5daac12a4cef568d1269be0215fec0667193c1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:14 +0200 Subject: [PATCH 89/95] vfs: do_last() common post lookup Now the post lookup code can be shared between O_CREAT and plain opens since they are essentially the same. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 125386c250bb8f..998d5316921a53 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2249,37 +2249,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, inode = path->dentry->d_inode; } - error = -ENOENT; - if (!inode) { - path_to_nameidata(path, nd); - goto exit; - } - - if (should_follow_link(inode, !symlink_ok)) { - if (nd->flags & LOOKUP_RCU) { - if (unlikely(unlazy_walk(nd, path->dentry))) { - error = -ECHILD; - goto exit; - } - } - BUG_ON(inode != path->dentry->d_inode); - return NULL; - } - path_to_nameidata(path, nd); - nd->inode = inode; - - /* sayonara */ - error = complete_walk(nd); - if (error) - return ERR_PTR(error); - - error = -ENOTDIR; - if (nd->flags & LOOKUP_DIRECTORY) { - if (!nd->inode->i_op->lookup) - goto exit; - } - audit_inode(pathname, nd->path.dentry); - goto ok; + goto finish_lookup; } /* create side of things */ @@ -2360,6 +2330,8 @@ static struct file *do_last(struct nameidata *nd, struct path *path, BUG_ON(nd->flags & LOOKUP_RCU); inode = path->dentry->d_inode; +finish_lookup: + /* we _can_ be in RCU mode here */ error = -ENOENT; if (!inode) { path_to_nameidata(path, nd); From 90ad1a8ecb9bfd5ff4503ac42cd049a97643ee51 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:15 +0200 Subject: [PATCH 90/95] vfs: split __dentry_open() Split __dentry_open() into two functions: do_dentry_open() - does most of the actual work, doesn't put file on failure open_check_o_direct() - after a successful open, checks direct_IO method This will allow i_op->atomic_open to do just the file initialization and leave the direct_IO checking to the VFS. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/internal.h | 1 + fs/open.c | 47 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index 8040af489c7829..18bc216ea09d95 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -100,6 +100,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, extern long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag); +extern int open_check_o_direct(struct file *f); /* * inode.c diff --git a/fs/open.c b/fs/open.c index d54301219d04f1..9daa1cea52fcf6 100644 --- a/fs/open.c +++ b/fs/open.c @@ -654,10 +654,23 @@ static inline int __get_file_write_access(struct inode *inode, return error; } -static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, - int (*open)(struct inode *, struct file *), - const struct cred *cred) +int open_check_o_direct(struct file *f) +{ + /* NB: we're sure to have correct a_ops only after f_op->open */ + if (f->f_flags & O_DIRECT) { + if (!f->f_mapping->a_ops || + ((!f->f_mapping->a_ops->direct_IO) && + (!f->f_mapping->a_ops->get_xip_mem))) { + return -EINVAL; + } + } + return 0; +} + +static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, + struct file *f, + int (*open)(struct inode *, struct file *), + const struct cred *cred) { static const struct file_operations empty_fops = {}; struct inode *inode; @@ -713,16 +726,6 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); - /* NB: we're sure to have correct a_ops only after f_op->open */ - if (f->f_flags & O_DIRECT) { - if (!f->f_mapping->a_ops || - ((!f->f_mapping->a_ops->direct_IO) && - (!f->f_mapping->a_ops->get_xip_mem))) { - fput(f); - f = ERR_PTR(-EINVAL); - } - } - return f; cleanup_all: @@ -750,6 +753,22 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, return ERR_PTR(error); } +static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, + struct file *f, + int (*open)(struct inode *, struct file *), + const struct cred *cred) +{ + struct file *res = do_dentry_open(dentry, mnt, f, open, cred); + if (!IS_ERR(res)) { + int error = open_check_o_direct(f); + if (error) { + fput(res); + res = ERR_PTR(error); + } + } + return res; +} + /** * lookup_instantiate_filp - instantiates the open intent filp * @nd: pointer to nameidata From 78f71eff3c274f3907f4aa1bbe3267281ba1c603 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:16 +0200 Subject: [PATCH 91/95] vfs: do_dentry_open(): don't put filp Move put_filp() out to __dentry_open(), the only caller now. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/open.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/open.c b/fs/open.c index 9daa1cea52fcf6..511c548b0997c8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -747,7 +747,6 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: - put_filp(f); dput(dentry); mntput(mnt); return ERR_PTR(error); @@ -765,6 +764,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, fput(res); res = ERR_PTR(error); } + } else { + put_filp(f); } return res; } From 91daee988db38b0207eec719a3160b163c077007 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:17 +0200 Subject: [PATCH 92/95] vfs: nameidata_to_filp(): inline __dentry_open() Copy __dentry_open() into nameidata_to_filp(). Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/open.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/open.c b/fs/open.c index 511c548b0997c8..9fd34b76b9593b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -828,9 +828,25 @@ struct file *nameidata_to_filp(struct nameidata *nd) /* Has the filesystem initialised the file for us? */ if (filp->f_path.dentry == NULL) { + struct file *res; + path_get(&nd->path); - filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp, - NULL, cred); + res = do_dentry_open(nd->path.dentry, nd->path.mnt, + filp, NULL, cred); + if (!IS_ERR(res)) { + int error; + + BUG_ON(res != filp); + + error = open_check_o_direct(filp); + if (error) { + fput(filp); + filp = ERR_PTR(error); + } + } else { + put_filp(filp); + filp = res; + } } return filp; } From 50ee93afcaa970620d1fb5a9894109a2ab152868 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:18 +0200 Subject: [PATCH 93/95] vfs: nameidata_to_filp(): don't throw away file on error If open fails, don't put the file. This allows it to be reused if open needs to be retried. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/open.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/open.c b/fs/open.c index 9fd34b76b9593b..d6c79a0dffc7b0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -824,10 +824,11 @@ struct file *nameidata_to_filp(struct nameidata *nd) /* Pick up the filp from the open intent */ filp = nd->intent.open.file; - nd->intent.open.file = NULL; /* Has the filesystem initialised the file for us? */ - if (filp->f_path.dentry == NULL) { + if (filp->f_path.dentry != NULL) { + nd->intent.open.file = NULL; + } else { struct file *res; path_get(&nd->path); @@ -836,6 +837,7 @@ struct file *nameidata_to_filp(struct nameidata *nd) if (!IS_ERR(res)) { int error; + nd->intent.open.file = NULL; BUG_ON(res != filp); error = open_check_o_direct(filp); @@ -844,7 +846,7 @@ struct file *nameidata_to_filp(struct nameidata *nd) filp = ERR_PTR(error); } } else { - put_filp(filp); + /* Allow nd->intent.open.file to be recycled */ filp = res; } } From 16b1c1cd71176ab0a76b26818fbf12db9183ed57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:19 +0200 Subject: [PATCH 94/95] vfs: retry last component if opening stale dentry NFS optimizes away d_revalidates for last component of open. This means that open itself can find the dentry stale. This patch allows the filesystem to return EOPENSTALE and the VFS will retry the lookup on just the last component if possible. If the lookup was done using RCU mode, including the last component, then this is not possible since the parent dentry is lost. In this case fall back to non-RCU lookup. Currently this is not used since NFS will always leave RCU mode. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/namei.c | 37 +++++++++++++++++++++++++++++++++++-- include/linux/errno.h | 1 + 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 998d5316921a53..7d694194024ac4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2202,6 +2202,8 @@ static struct file *do_last(struct nameidata *nd, struct path *path, struct file *filp; struct inode *inode; int symlink_ok = 0; + struct path save_parent = { .dentry = NULL, .mnt = NULL }; + bool retried = false; int error; nd->flags &= ~LOOKUP_PARENT; @@ -2267,6 +2269,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) goto exit; +retry_lookup: mutex_lock(&dir->d_inode->i_mutex); dentry = lookup_hash(nd); @@ -2349,12 +2352,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path, return NULL; } - path_to_nameidata(path, nd); + if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { + path_to_nameidata(path, nd); + } else { + save_parent.dentry = nd->path.dentry; + save_parent.mnt = mntget(path->mnt); + nd->path.dentry = path->dentry; + + } nd->inode = inode; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ error = complete_walk(nd); - if (error) + if (error) { + path_put(&save_parent); return ERR_PTR(error); + } error = -EISDIR; if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) goto exit; @@ -2377,6 +2389,20 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error) goto exit; filp = nameidata_to_filp(nd); + if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { + BUG_ON(save_parent.dentry != dir); + path_put(&nd->path); + nd->path = save_parent; + nd->inode = dir->d_inode; + save_parent.mnt = NULL; + save_parent.dentry = NULL; + if (want_write) { + mnt_drop_write(nd->path.mnt); + want_write = 0; + } + retried = true; + goto retry_lookup; + } if (!IS_ERR(filp)) { error = ima_file_check(filp, op->acc_mode); if (error) { @@ -2396,6 +2422,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, out: if (want_write) mnt_drop_write(nd->path.mnt); + path_put(&save_parent); terminate_walk(nd); return filp; @@ -2459,6 +2486,12 @@ static struct file *path_openat(int dfd, const char *pathname, if (base) fput(base); release_open_intent(nd); + if (filp == ERR_PTR(-EOPENSTALE)) { + if (flags & LOOKUP_RCU) + filp = ERR_PTR(-ECHILD); + else + filp = ERR_PTR(-ESTALE); + } return filp; out_filp: diff --git a/include/linux/errno.h b/include/linux/errno.h index 2d09bfa5c2628a..e0de516374da37 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -17,6 +17,7 @@ #define ENOIOCTLCMD 515 /* No ioctl command */ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ #define EPROBE_DEFER 517 /* Driver requests probe retry */ +#define EOPENSTALE 518 /* open found a stale dentry */ /* Defined for the NFSv3 protocol */ #define EBADHANDLE 521 /* Illegal NFS file handle */ From 0ef97dcfce4179a2eba046b855ee2f91d6f1b414 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:20 +0200 Subject: [PATCH 95/95] nfs: don't open in ->d_revalidate NFSv4 can't do reliable opens in d_revalidate, since it cannot know whether a mount needs to be followed or not. It does check d_mountpoint() on the dentry, which can result in a weird error if the VFS found that the mount does not in fact need to be followed, e.g.: # mount --bind /mnt/nfs /mnt/nfs-clone # echo something > /mnt/nfs/tmp/bar # echo x > /tmp/file # mount --bind /tmp/file /mnt/nfs-clone/tmp/bar # cat /mnt/nfs/tmp/bar cat: /mnt/nfs/tmp/bar: Not a directory Which should, by any sane filesystem, result in "something" being printed. So instead do the open in f_op->open() and in the unlikely case that the cached dentry turned out to be invalid, drop the dentry and return EOPENSTALE to let the VFS retry. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro --- fs/nfs/dir.c | 56 ++++--------------------------------- fs/nfs/file.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 55 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0989a2099688a3..f430057ff3b397 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1354,10 +1354,10 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru } #ifdef CONFIG_NFS_V4 -static int nfs_open_revalidate(struct dentry *, struct nameidata *); +static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *); const struct dentry_operations nfs4_dentry_operations = { - .d_revalidate = nfs_open_revalidate, + .d_revalidate = nfs4_lookup_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, @@ -1519,13 +1519,11 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry return nfs_lookup(dir, dentry, nd); } -static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) +static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) { struct dentry *parent = NULL; struct inode *inode; struct inode *dir; - struct nfs_open_context *ctx; - struct iattr attr; int openflags, ret = 0; if (nd->flags & LOOKUP_RCU) @@ -1554,57 +1552,13 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) /* We cannot do exclusive creation on a positive dentry */ if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open_dput; - /* We can't create new files here */ - openflags &= ~(O_CREAT|O_EXCL); - - ctx = create_nfs_open_context(dentry, openflags); - ret = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out; - attr.ia_valid = ATTR_OPEN; - if (openflags & O_TRUNC) { - attr.ia_valid |= ATTR_SIZE; - attr.ia_size = 0; - nfs_wb_all(inode); - } - - /* - * Note: we're not holding inode->i_mutex and so may be racing with - * operations that change the directory. We therefore save the - * change attribute *before* we do the RPC call. - */ - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - switch (ret) { - case -EPERM: - case -EACCES: - case -EDQUOT: - case -ENOSPC: - case -EROFS: - goto out_put_ctx; - default: - goto out_drop; - } - } - iput(inode); - if (inode != dentry->d_inode) - goto out_drop; + /* Let f_op->open() actually open (and revalidate) the file */ + ret = 1; - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - ret = nfs_intent_set_file(nd, ctx); - if (ret >= 0) - ret = 1; out: dput(parent); return ret; -out_drop: - d_drop(dentry); - ret = 0; -out_put_ctx: - put_nfs_open_context(ctx); - goto out; no_open_dput: dput(parent); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 56311ca5f9f818..a6708e6b438dd5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -879,12 +879,81 @@ const struct file_operations nfs_file_operations = { static int nfs4_file_open(struct inode *inode, struct file *filp) { + struct nfs_open_context *ctx; + struct dentry *dentry = filp->f_path.dentry; + struct dentry *parent = NULL; + struct inode *dir; + unsigned openflags = filp->f_flags; + struct iattr attr; + int err; + + BUG_ON(inode != dentry->d_inode); /* - * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to - * this point, then something is very wrong + * If no cached dentry exists or if it's negative, NFSv4 handled the + * opens in ->lookup() or ->create(). + * + * We only get this far for a cached positive dentry. We skipped + * revalidation, so handle it here by dropping the dentry and returning + * -EOPENSTALE. The VFS will retry the lookup/create/open. */ - dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp); - return -ENOTDIR; + + dprintk("NFS: open file(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); + + if ((openflags & O_ACCMODE) == 3) + openflags--; + + /* We can't create new files here */ + openflags &= ~(O_CREAT|O_EXCL); + + parent = dget_parent(dentry); + dir = parent->d_inode; + + ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); + err = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out; + + attr.ia_valid = ATTR_OPEN; + if (openflags & O_TRUNC) { + attr.ia_valid |= ATTR_SIZE; + attr.ia_size = 0; + nfs_wb_all(inode); + } + + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + switch (err) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + goto out_put_ctx; + default: + goto out_drop; + } + } + iput(inode); + if (inode != dentry->d_inode) + goto out_drop; + + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + nfs_file_set_open_context(filp, ctx); + err = 0; + +out_put_ctx: + put_nfs_open_context(ctx); +out: + dput(parent); + return err; + +out_drop: + d_drop(dentry); + err = -EOPENSTALE; + goto out_put_ctx; } const struct file_operations nfs4_file_operations = {