From 5549f7cdf84c02939fd368d0842aa2f472bb6e98 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 7 Jul 2009 10:28:23 -0400 Subject: inotify: drop user watch count when a watch is removed The inotify rewrite forgot to drop the inotify watch use cound when a watch was removed. This means that a single inotify fd can only ever register a maximum of /proc/sys/fs/max_user_watches even if some of those had been freed. Signed-off-by: Eric Paris --- fs/notify/inotify/inotify_user.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/notify/inotify/inotify_user.c') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ff27a29..1a870f9 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -404,6 +404,8 @@ skip_send_ignore: /* removed from idr, drop that reference */ fsnotify_put_mark(entry); + + atomic_dec(&group->inotify_data.user->inotify_watches); } /* ding dong the mark is dead */ -- cgit v1.1 From 75fe2b26394c59c8e16bd7b76f4be5d048103ad1 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 7 Jul 2009 10:28:23 -0400 Subject: inotify: do not leak inode marks in inotify_add_watch inotify_add_watch had a couple of problems. The biggest being that if inotify_add_watch was called on the same inode twice (to update or change the event mask) a refence was taken on the original inode mark by fsnotify_find_mark_entry but was not being dropped at the end of the inotify_add_watch call. Thus if inotify_rm_watch was called although the mark was removed from the inode, the refcnt wouldn't hit zero and we would leak memory. Reported-by: Catalin Marinas Signed-off-by: Eric Paris --- fs/notify/inotify/inotify_user.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs/notify/inotify/inotify_user.c') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 1a870f9..aff4214 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -463,9 +463,6 @@ retry: goto out_err; spin_lock(&group->inotify_data.idr_lock); - /* if entry is added to the idr we keep the reference obtained - * through fsnotify_mark_add. remember to drop this reference - * when entry is removed from idr */ ret = idr_get_new_above(&group->inotify_data.idr, entry, ++group->inotify_data.last_wd, &ientry->wd); @@ -476,8 +473,13 @@ retry: goto out_err; } atomic_inc(&group->inotify_data.user->inotify_watches); + + /* we put the mark on the idr, take a reference */ + fsnotify_get_mark(entry); } + ret = ientry->wd; + spin_lock(&entry->lock); old_mask = entry->mask; @@ -508,7 +510,11 @@ retry: fsnotify_recalc_group_mask(group); } - return ientry->wd; + /* this either matches fsnotify_find_mark_entry, or init_mark_entry + * depending on which path we took... */ + fsnotify_put_mark(entry); + + return ret; out_err: /* see this isn't supposed to happen, just kill the watch */ -- cgit v1.1 From 7e790dd5fc937bc8d2400c30a05e32a9e9eef276 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 7 Jul 2009 10:28:24 -0400 Subject: inotify: fix error paths in inotify_update_watch inotify_update_watch could leave things in a horrid state on a number of error paths. We could try to remove idr entries that didn't exist, we could send an IN_IGNORED to userspace for watches that don't exist, and a bit of other stupidity. Clean these up by doing the idr addition before we put the mark on the inode since we can clean that up on error and getting off the inode's mark list is hard. Signed-off-by: Eric Paris --- fs/notify/inotify/inotify_user.c | 79 +++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 30 deletions(-) (limited to 'fs/notify/inotify/inotify_user.c') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index aff4214..726118a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -365,6 +365,17 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns return error; } +static void inotify_remove_from_idr(struct fsnotify_group *group, + struct inotify_inode_mark_entry *ientry) +{ + struct idr *idr; + + spin_lock(&group->inotify_data.idr_lock); + idr = &group->inotify_data.idr; + idr_remove(idr, ientry->wd); + spin_unlock(&group->inotify_data.idr_lock); + ientry->wd = -1; +} /* * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the * internal reference help on the mark because it is in the idr. @@ -375,7 +386,6 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, struct inotify_inode_mark_entry *ientry; struct inotify_event_private_data *event_priv; struct fsnotify_event_private_data *fsn_event_priv; - struct idr *idr; ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); @@ -397,10 +407,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, skip_send_ignore: /* remove this entry from the idr */ - spin_lock(&group->inotify_data.idr_lock); - idr = &group->inotify_data.idr; - idr_remove(idr, ientry->wd); - spin_unlock(&group->inotify_data.idr_lock); + inotify_remove_from_idr(group, ientry); /* removed from idr, drop that reference */ fsnotify_put_mark(entry); @@ -420,6 +427,7 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod { struct fsnotify_mark_entry *entry = NULL; struct inotify_inode_mark_entry *ientry; + struct inotify_inode_mark_entry *tmp_ientry; int ret = 0; int add = (arg & IN_MASK_ADD); __u32 mask; @@ -430,50 +438,60 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod if (unlikely(!mask)) return -EINVAL; - ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); - if (unlikely(!ientry)) + tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); + if (unlikely(!tmp_ientry)) return -ENOMEM; /* we set the mask at the end after attaching it */ - fsnotify_init_mark(&ientry->fsn_entry, inotify_free_mark); - ientry->wd = 0; + fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); + tmp_ientry->wd = -1; find_entry: spin_lock(&inode->i_lock); entry = fsnotify_find_mark_entry(group, inode); spin_unlock(&inode->i_lock); if (entry) { - kmem_cache_free(inotify_inode_mark_cachep, ientry); ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); } else { - if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) { - ret = -ENOSPC; - goto out_err; - } - - ret = fsnotify_add_mark(&ientry->fsn_entry, group, inode); - if (ret == -EEXIST) - goto find_entry; - else if (ret) + ret = -ENOSPC; + if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) goto out_err; - - entry = &ientry->fsn_entry; retry: ret = -ENOMEM; if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) goto out_err; spin_lock(&group->inotify_data.idr_lock); - ret = idr_get_new_above(&group->inotify_data.idr, entry, - ++group->inotify_data.last_wd, - &ientry->wd); + ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, + group->inotify_data.last_wd, + &tmp_ientry->wd); spin_unlock(&group->inotify_data.idr_lock); if (ret) { if (ret == -EAGAIN) goto retry; goto out_err; } + + ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); + if (ret) { + inotify_remove_from_idr(group, tmp_ientry); + if (ret == -EEXIST) + goto find_entry; + goto out_err; + } + + /* tmp_ientry has been added to the inode, so we are all set up. + * now we just need to make sure tmp_ientry doesn't get freed and + * we need to set up entry and ientry so the generic code can + * do its thing. */ + ientry = tmp_ientry; + entry = &ientry->fsn_entry; + tmp_ientry = NULL; + atomic_inc(&group->inotify_data.user->inotify_watches); + /* update the idr hint */ + group->inotify_data.last_wd = ientry->wd; + /* we put the mark on the idr, take a reference */ fsnotify_get_mark(entry); } @@ -514,14 +532,15 @@ retry: * depending on which path we took... */ fsnotify_put_mark(entry); - return ret; - out_err: - /* see this isn't supposed to happen, just kill the watch */ - if (entry) { - fsnotify_destroy_mark_by_entry(entry); - fsnotify_put_mark(entry); + /* could be an error, could be that we found an existing mark */ + if (tmp_ientry) { + /* on the idr but didn't make it on the inode */ + if (tmp_ientry->wd != -1) + inotify_remove_from_idr(group, tmp_ientry); + kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); } + return ret; } -- cgit v1.1 From f44aebcc566d1d6275f7191867b9633dc11de2ee Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 15 Jul 2009 15:49:52 -0400 Subject: inotify: use GFP_NOFS under potential memory pressure inotify can have a watchs removed under filesystem reclaim. ================================= [ INFO: inconsistent lock state ] 2.6.31-rc2 #16 --------------------------------- inconsistent {IN-RECLAIM_FS-W} -> {RECLAIM_FS-ON-W} usage. khubd/217 [HC0[0]:SC0[0]:HE1:SE1] takes: (iprune_mutex){+.+.?.}, at: [] invalidate_inodes+0x20/0xe3 {IN-RECLAIM_FS-W} state was registered at: [] __lock_acquire+0x2c9/0xac4 [] lock_acquire+0x9f/0xc2 [] __mutex_lock_common+0x2d/0x323 [] mutex_lock_nested+0x2e/0x36 [] shrink_icache_memory+0x38/0x1b2 [] shrink_slab+0xe2/0x13c [] kswapd+0x3d1/0x55d [] kthread+0x66/0x6b [] kernel_thread_helper+0x7/0x10 [] 0xffffffff Two things are needed to fix this. First we need a method to tell fsnotify_create_event() to use GFP_NOFS and second we need to stop using one global IN_IGNORED event and allocate them one at a time. This solves current issues with multiple IN_IGNORED on a queue having tail drop problems and simplifies the allocations since we don't have to worry about two tasks opperating on the IGNORED event concurrently. Signed-off-by: Eric Paris --- fs/notify/inotify/inotify_user.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'fs/notify/inotify/inotify_user.c') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 726118a..f30d9bb 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -57,7 +57,6 @@ int inotify_max_user_watches __read_mostly; static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; struct kmem_cache *event_priv_cachep __read_mostly; -static struct fsnotify_event *inotify_ignored_event; /* * When inotify registers a new group it increments this and uses that @@ -384,12 +383,19 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) { struct inotify_inode_mark_entry *ientry; + struct fsnotify_event *ignored_event; struct inotify_event_private_data *event_priv; struct fsnotify_event_private_data *fsn_event_priv; + ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, + FSNOTIFY_EVENT_NONE, NULL, 0, + GFP_NOFS); + if (!ignored_event) + return; + ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); - event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); + event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS); if (unlikely(!event_priv)) goto skip_send_ignore; @@ -398,7 +404,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, fsn_event_priv->group = group; event_priv->wd = ientry->wd; - fsnotify_add_notify_event(group, inotify_ignored_event, fsn_event_priv); + fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); /* did the private data get added? */ if (list_empty(&fsn_event_priv->event_list)) @@ -406,6 +412,9 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, skip_send_ignore: + /* matches the reference taken when the event was created */ + fsnotify_put_event(ignored_event); + /* remove this entry from the idr */ inotify_remove_from_idr(group, ientry); @@ -748,9 +757,6 @@ static int __init inotify_user_setup(void) inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC); event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); - inotify_ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0); - if (!inotify_ignored_event) - panic("unable to allocate the inotify ignored event\n"); inotify_max_queued_events = 16384; inotify_max_user_instances = 128; -- cgit v1.1 From eef3a116be11d35396efb2a8cc7345fd3221e294 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Sun, 16 Aug 2009 21:51:44 -0400 Subject: notify: unused event private race inotify decides if private data it passed to get added to an event was used by checking list_empty(). But it's possible that the event may have been dequeued and the private event removed so it would look empty. The fix is to use the return code from fsnotify_add_notify_event rather than looking at the list. Signed-off-by: Eric Paris Signed-off-by: Linus Torvalds --- fs/notify/inotify/inotify_user.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/notify/inotify/inotify_user.c') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index f30d9bb..c172a7a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -386,6 +386,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, struct fsnotify_event *ignored_event; struct inotify_event_private_data *event_priv; struct fsnotify_event_private_data *fsn_event_priv; + int ret; ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0, @@ -404,10 +405,8 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, fsn_event_priv->group = group; event_priv->wd = ientry->wd; - fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); - - /* did the private data get added? */ - if (list_empty(&fsn_event_priv->event_list)) + ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); + if (ret) inotify_free_event_priv(fsn_event_priv); skip_send_ignore: -- cgit v1.1 From 08e53fcb0db34baca3db84a457b6d67faabee4c6 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Sun, 16 Aug 2009 21:51:55 -0400 Subject: inotify: start watch descriptor count at 1 The inotify_add_watch man page specifies that inotify_add_watch() will return a non-negative integer. However, historically the inotify watches started at 1, not at 0. Turns out that the inotifywait program provided by the inotify-tools package doesn't properly handle a 0 watch descriptor. In 7e790dd5 we changed from starting at 1 to starting at 0. This patch starts at 1, just like in previous kernels, but also just like in previous kernels it's possible for it to wrap back to 0. This preserves the kernel functionality exactly like it was before the patch (neither method broke the spec) Signed-off-by: Eric Paris Signed-off-by: Linus Torvalds --- fs/notify/inotify/inotify_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/notify/inotify/inotify_user.c') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index c172a7a..dc32ed8 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -567,7 +567,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); - group->inotify_data.last_wd = 0; + group->inotify_data.last_wd = 1; group->inotify_data.user = user; group->inotify_data.fa = NULL; -- cgit v1.1 From 52cef7555adf5ca09b3b7283097466759120d901 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 24 Aug 2009 16:03:35 -0400 Subject: inotify: seperate new watch creation updating existing watches There is nothing known wrong with the inotify watch addition/modification but this patch seperates the two code paths to make them each easy to verify as correct. Signed-off-by: Eric Paris --- fs/notify/inotify/inotify_user.c | 172 +++++++++++++++++++++++---------------- 1 file changed, 103 insertions(+), 69 deletions(-) (limited to 'fs/notify/inotify/inotify_user.c') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index dc32ed8..d8f73c2 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -431,80 +431,29 @@ static void inotify_free_mark(struct fsnotify_mark_entry *entry) kmem_cache_free(inotify_inode_mark_cachep, ientry); } -static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) +static int inotify_update_existing_watch(struct fsnotify_group *group, + struct inode *inode, + u32 arg) { - struct fsnotify_mark_entry *entry = NULL; + struct fsnotify_mark_entry *entry; struct inotify_inode_mark_entry *ientry; - struct inotify_inode_mark_entry *tmp_ientry; - int ret = 0; - int add = (arg & IN_MASK_ADD); - __u32 mask; __u32 old_mask, new_mask; + __u32 mask; + int add = (arg & IN_MASK_ADD); + int ret; /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); if (unlikely(!mask)) return -EINVAL; - tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); - if (unlikely(!tmp_ientry)) - return -ENOMEM; - /* we set the mask at the end after attaching it */ - fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); - tmp_ientry->wd = -1; - -find_entry: spin_lock(&inode->i_lock); entry = fsnotify_find_mark_entry(group, inode); spin_unlock(&inode->i_lock); - if (entry) { - ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); - } else { - ret = -ENOSPC; - if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) - goto out_err; -retry: - ret = -ENOMEM; - if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) - goto out_err; - - spin_lock(&group->inotify_data.idr_lock); - ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, - group->inotify_data.last_wd, - &tmp_ientry->wd); - spin_unlock(&group->inotify_data.idr_lock); - if (ret) { - if (ret == -EAGAIN) - goto retry; - goto out_err; - } + if (!entry) + return -ENOENT; - ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); - if (ret) { - inotify_remove_from_idr(group, tmp_ientry); - if (ret == -EEXIST) - goto find_entry; - goto out_err; - } - - /* tmp_ientry has been added to the inode, so we are all set up. - * now we just need to make sure tmp_ientry doesn't get freed and - * we need to set up entry and ientry so the generic code can - * do its thing. */ - ientry = tmp_ientry; - entry = &ientry->fsn_entry; - tmp_ientry = NULL; - - atomic_inc(&group->inotify_data.user->inotify_watches); - - /* update the idr hint */ - group->inotify_data.last_wd = ientry->wd; - - /* we put the mark on the idr, take a reference */ - fsnotify_get_mark(entry); - } - - ret = ientry->wd; + ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); spin_lock(&entry->lock); @@ -536,18 +485,103 @@ retry: fsnotify_recalc_group_mask(group); } - /* this either matches fsnotify_find_mark_entry, or init_mark_entry - * depending on which path we took... */ + /* return the wd */ + ret = ientry->wd; + + /* match the get from fsnotify_find_mark_entry() */ fsnotify_put_mark(entry); + return ret; +} + +static int inotify_new_watch(struct fsnotify_group *group, + struct inode *inode, + u32 arg) +{ + struct inotify_inode_mark_entry *tmp_ientry; + __u32 mask; + int ret; + + /* don't allow invalid bits: we don't want flags set */ + mask = inotify_arg_to_mask(arg); + if (unlikely(!mask)) + return -EINVAL; + + tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); + if (unlikely(!tmp_ientry)) + return -ENOMEM; + + fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); + tmp_ientry->fsn_entry.mask = mask; + tmp_ientry->wd = -1; + + ret = -ENOSPC; + if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) + goto out_err; +retry: + ret = -ENOMEM; + if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) + goto out_err; + + spin_lock(&group->inotify_data.idr_lock); + ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, + group->inotify_data.last_wd, + &tmp_ientry->wd); + spin_unlock(&group->inotify_data.idr_lock); + if (ret) { + /* idr was out of memory allocate and try again */ + if (ret == -EAGAIN) + goto retry; + goto out_err; + } + + /* we are on the idr, now get on the inode */ + ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); + if (ret) { + /* we failed to get on the inode, get off the idr */ + inotify_remove_from_idr(group, tmp_ientry); + goto out_err; + } + + /* we put the mark on the idr, take a reference */ + fsnotify_get_mark(&tmp_ientry->fsn_entry); + + /* update the idr hint, who cares about races, it's just a hint */ + group->inotify_data.last_wd = tmp_ientry->wd; + + /* increment the number of watches the user has */ + atomic_inc(&group->inotify_data.user->inotify_watches); + + /* return the watch descriptor for this new entry */ + ret = tmp_ientry->wd; + + /* match the ref from fsnotify_init_markentry() */ + fsnotify_put_mark(&tmp_ientry->fsn_entry); + out_err: - /* could be an error, could be that we found an existing mark */ - if (tmp_ientry) { - /* on the idr but didn't make it on the inode */ - if (tmp_ientry->wd != -1) - inotify_remove_from_idr(group, tmp_ientry); + if (ret < 0) kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); - } + + return ret; +} + +static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) +{ + int ret = 0; + +retry: + /* try to update and existing watch with the new arg */ + ret = inotify_update_existing_watch(group, inode, arg); + /* no mark present, try to add a new one */ + if (ret == -ENOENT) + ret = inotify_new_watch(group, inode, arg); + /* + * inotify_new_watch could race with another thread which did an + * inotify_new_watch between the update_existing and the add watch + * here, go back and try to update an existing mark again. + */ + if (ret == -EEXIST) + goto retry; return ret; } -- cgit v1.1 From dead537dd8a1c9495322c1d6f7c780697f474af0 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 24 Aug 2009 16:03:35 -0400 Subject: inotify: fix locking around inotify watching in the idr The are races around the idr storage of inotify watches. It's possible that a watch could be found from sys_inotify_rm_watch() in the idr, but it could be removed from the idr before that code does it's removal. Move the locking and the refcnt'ing so that these have to happen atomically. Signed-off-by: Eric Paris --- fs/notify/inotify/inotify_user.c | 50 ++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 10 deletions(-) (limited to 'fs/notify/inotify/inotify_user.c') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index d8f73c2..ce1f582 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -364,20 +364,53 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns return error; } +/* + * Remove the mark from the idr (if present) and drop the reference + * on the mark because it was in the idr. + */ static void inotify_remove_from_idr(struct fsnotify_group *group, struct inotify_inode_mark_entry *ientry) { struct idr *idr; + struct fsnotify_mark_entry *entry; + struct inotify_inode_mark_entry *found_ientry; + int wd; spin_lock(&group->inotify_data.idr_lock); idr = &group->inotify_data.idr; - idr_remove(idr, ientry->wd); - spin_unlock(&group->inotify_data.idr_lock); + wd = ientry->wd; + + if (wd == -1) + goto out; + + entry = idr_find(&group->inotify_data.idr, wd); + if (unlikely(!entry)) + goto out; + + found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); + if (unlikely(found_ientry != ientry)) { + /* We found an entry in the idr with the right wd, but it's + * not the entry we were told to remove. eparis seriously + * fucked up somewhere. */ + WARN_ON(1); + ientry->wd = -1; + goto out; + } + + /* One ref for being in the idr, one ref held by the caller */ + BUG_ON(atomic_read(&entry->refcnt) < 2); + + idr_remove(idr, wd); ientry->wd = -1; + + /* removed from the idr, drop that ref */ + fsnotify_put_mark(entry); +out: + spin_unlock(&group->inotify_data.idr_lock); } + /* - * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the - * internal reference help on the mark because it is in the idr. + * Send IN_IGNORED for this wd, remove this wd from the idr. */ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) @@ -417,9 +450,6 @@ skip_send_ignore: /* remove this entry from the idr */ inotify_remove_from_idr(group, ientry); - /* removed from idr, drop that reference */ - fsnotify_put_mark(entry); - atomic_dec(&group->inotify_data.user->inotify_watches); } @@ -535,6 +565,9 @@ retry: goto out_err; } + /* we put the mark on the idr, take a reference */ + fsnotify_get_mark(&tmp_ientry->fsn_entry); + /* we are on the idr, now get on the inode */ ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); if (ret) { @@ -543,9 +576,6 @@ retry: goto out_err; } - /* we put the mark on the idr, take a reference */ - fsnotify_get_mark(&tmp_ientry->fsn_entry); - /* update the idr hint, who cares about races, it's just a hint */ group->inotify_data.last_wd = tmp_ientry->wd; -- cgit v1.1 From 0db501bd0610ee0c0aca84d927f90bcccd09e2bd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Aug 2009 03:20:04 -0700 Subject: inotify: Ensure we alwasy write the terminating NULL. Before the rewrite copy_event_to_user always wrote a terqminating '\0' byte to user space after the filename. Since the rewrite that terminating byte was skipped if your filename is exactly a multiple of event_size. Ouch! So add one byte to name_size before we round up and use clear_user to set userspace to zero like /dev/zero does instead of copying the strange nul_inotify_event. I can't quite convince myself len_to_zero will never exceed 16 and even if it doesn't clear_user should be more efficient and a more accurate reflection of what the code is trying to do. Signed-off-by: Eric W. Biederman Signed-off-by: Eric Paris --- fs/notify/inotify/inotify_user.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/notify/inotify/inotify_user.c') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ce1f582..0e781bc 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -47,9 +47,6 @@ static struct vfsmount *inotify_mnt __read_mostly; -/* this just sits here and wastes global memory. used to just pad userspace messages with zeros */ -static struct inotify_event nul_inotify_event; - /* these are configurable via /proc/sys/fs/inotify/ */ static int inotify_max_user_instances __read_mostly; static int inotify_max_queued_events __read_mostly; @@ -199,8 +196,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, inotify_free_event_priv(fsn_priv); } - /* round up event->name_len so it is a multiple of event_size */ - name_len = roundup(event->name_len, event_size); + /* round up event->name_len so it is a multiple of event_size + * plus an extra byte for the terminating '\0'. + */ + name_len = roundup(event->name_len + 1, event_size); inotify_event.len = name_len; inotify_event.mask = inotify_mask_to_arg(event->mask); @@ -224,8 +223,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, return -EFAULT; buf += event->name_len; - /* fill userspace with 0's from nul_inotify_event */ - if (copy_to_user(buf, &nul_inotify_event, len_to_zero)) + /* fill userspace with 0's */ + if (clear_user(buf, len_to_zero)) return -EFAULT; buf += len_to_zero; event_size += name_len; -- cgit v1.1