aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/dir.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/dir.c')
-rw-r--r--fs/ceph/dir.c233
1 files changed, 161 insertions, 72 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ef8f08c..7903e62 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,14 +40,6 @@ int ceph_init_dentry(struct dentry *dentry)
if (dentry->d_fsdata)
return 0;
- if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
- ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
- d_set_d_op(dentry, &ceph_dentry_ops);
- else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
- d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
- else
- d_set_d_op(dentry, &ceph_snap_dentry_ops);
-
di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
if (!di)
return -ENOMEM; /* oh well */
@@ -58,16 +50,42 @@ int ceph_init_dentry(struct dentry *dentry)
kmem_cache_free(ceph_dentry_cachep, di);
goto out_unlock;
}
+
+ if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
+ ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+ d_set_d_op(dentry, &ceph_dentry_ops);
+ else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
+ d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
+ else
+ d_set_d_op(dentry, &ceph_snap_dentry_ops);
+
di->dentry = dentry;
di->lease_session = NULL;
- dentry->d_fsdata = di;
dentry->d_time = jiffies;
+ /* avoid reordering d_fsdata setup so that the check above is safe */
+ smp_mb();
+ dentry->d_fsdata = di;
ceph_dentry_lru_add(dentry);
out_unlock:
spin_unlock(&dentry->d_lock);
return 0;
}
+struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
+{
+ struct inode *inode = NULL;
+
+ if (!dentry)
+ return NULL;
+
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_parent) {
+ inode = dentry->d_parent->d_inode;
+ ihold(inode);
+ }
+ spin_unlock(&dentry->d_lock);
+ return inode;
+}
/*
@@ -86,11 +104,11 @@ static unsigned fpos_off(loff_t p)
/*
* When possible, we try to satisfy a readdir by peeking at the
* dcache. We make this work by carefully ordering dentries on
- * d_u.d_child when we initially get results back from the MDS, and
+ * d_child when we initially get results back from the MDS, and
* falling back to a "normal" sync readdir if any dentries in the dir
* are dropped.
*
- * I_COMPLETE tells indicates we have all dentries in the dir. It is
+ * D_COMPLETE tells indicates we have all dentries in the dir. It is
* defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
* the MDS if/when the directory is modified).
*/
@@ -122,18 +140,18 @@ static int __dcache_readdir(struct file *filp,
p = parent->d_subdirs.prev;
dout(" initial p %p/%p\n", p->prev, p->next);
} else {
- p = last->d_u.d_child.prev;
+ p = last->d_child.prev;
}
more:
- dentry = list_entry(p, struct dentry, d_u.d_child);
+ dentry = list_entry(p, struct dentry, d_child);
di = ceph_dentry(dentry);
while (1) {
dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
d_unhashed(dentry) ? "!hashed" : "hashed",
parent->d_subdirs.prev, parent->d_subdirs.next);
if (p == &parent->d_subdirs) {
- fi->at_end = 1;
+ fi->flags |= CEPH_F_ATEND;
goto out_unlock;
}
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -148,7 +166,7 @@ more:
!dentry->d_inode ? " null" : "");
spin_unlock(&dentry->d_lock);
p = p->prev;
- dentry = list_entry(p, struct dentry, d_u.d_child);
+ dentry = list_entry(p, struct dentry, d_child);
di = ceph_dentry(dentry);
}
@@ -181,8 +199,8 @@ more:
filp->f_pos++;
/* make sure a dentry wasn't dropped while we didn't have parent lock */
- if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
- dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
+ if (!ceph_dir_test_complete(dir)) {
+ dout(" lost D_COMPLETE on %p; falling back to mds\n", dir);
err = -EAGAIN;
goto out;
}
@@ -234,7 +252,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
const int max_bytes = fsc->mount_options->max_readdir_bytes;
dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
- if (fi->at_end)
+ if (fi->flags & CEPH_F_ATEND)
return 0;
/* always start with . and .. */
@@ -252,7 +270,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
off = 1;
}
if (filp->f_pos == 1) {
- ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino;
+ ino_t ino = parent_ino(filp->f_dentry);
dout("readdir off 1 -> '..'\n");
if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
ceph_translate_ino(inode->i_sb, ino),
@@ -263,18 +281,18 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
/* can we use the dcache? */
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if ((filp->f_pos == 2 || fi->dentry) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
- (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
+ ceph_dir_test_complete(inode) &&
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
err = __dcache_readdir(filp, dirent, filldir);
if (err != -EAGAIN)
return err;
} else {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
if (fi->dentry) {
err = note_last_dentry(fi, fi->dentry->d_name.name,
@@ -333,7 +351,7 @@ more:
if (!req->r_did_prepopulate) {
dout("readdir !did_prepopulate");
- fi->dir_release_count--; /* preclude I_COMPLETE */
+ fi->dir_release_count--; /* preclude D_COMPLETE */
}
/* note next offset and last dentry name */
@@ -403,20 +421,19 @@ more:
dout("readdir next frag is %x\n", frag);
goto more;
}
- fi->at_end = 1;
+ fi->flags |= CEPH_F_ATEND;
/*
* if dir_release_count still matches the dir, no dentries
* were released during the whole readdir, and we should have
* the complete dir contents in our cache.
*/
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (ci->i_release_count == fi->dir_release_count) {
- dout(" marking %p complete\n", inode);
- /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
+ ceph_dir_set_complete(inode);
ci->i_max_offset = filp->f_pos;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout("readdir %p filp %p done.\n", inode, filp);
return 0;
@@ -435,7 +452,7 @@ static void reset_readdir(struct ceph_file_info *fi)
dput(fi->dentry);
fi->dentry = NULL;
}
- fi->at_end = 0;
+ fi->flags &= ~CEPH_F_ATEND;
}
static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
@@ -446,19 +463,24 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
loff_t retval;
mutex_lock(&inode->i_mutex);
+ retval = -EINVAL;
switch (origin) {
case SEEK_END:
offset += inode->i_size + 2; /* FIXME */
break;
case SEEK_CUR:
offset += file->f_pos;
+ case SEEK_SET:
+ break;
+ default:
+ goto out;
}
- retval = -EINVAL;
+
if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
- fi->at_end = 0;
+ fi->flags &= ~CEPH_F_ATEND;
}
retval = offset;
@@ -477,26 +499,19 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
if (offset > old_offset)
fi->dir_release_count--;
}
+out:
mutex_unlock(&inode->i_mutex);
return retval;
}
/*
- * Process result of a lookup/open request.
- *
- * Mainly, make sure we return the final req->r_dentry (if it already
- * existed) in place of the original VFS-provided dentry when they
- * differ.
- *
- * Gracefully handle the case where the MDS replies with -ENOENT and
- * no trace (which it may do, at its discretion, e.g., if it doesn't
- * care to issue a lease on the negative dentry).
+ * Handle lookups for the hidden .snap directory.
*/
-struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
- struct dentry *dentry, int err)
+int ceph_handle_snapdir(struct ceph_mds_request *req,
+ struct dentry *dentry, int err)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
- struct inode *parent = dentry->d_parent->d_inode;
+ struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */
/* .snap dir? */
if (err == -ENOENT &&
@@ -510,7 +525,23 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
d_add(dentry, inode);
err = 0;
}
+ return err;
+}
+/*
+ * Figure out final result of a lookup/open request.
+ *
+ * Mainly, make sure we return the final req->r_dentry (if it already
+ * existed) in place of the original VFS-provided dentry when they
+ * differ.
+ *
+ * Gracefully handle the case where the MDS replies with -ENOENT and
+ * no trace (which it may do, at its discretion, e.g., if it doesn't
+ * care to issue a lease on the negative dentry).
+ */
+struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
+ struct dentry *dentry, int err)
+{
if (err == -ENOENT) {
/* no trace? */
err = 0;
@@ -566,7 +597,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
/* open (but not create!) intent? */
if (nd &&
(nd->flags & LOOKUP_OPEN) &&
- (nd->flags & LOOKUP_CONTINUE) == 0 && /* only open last component */
!(nd->intent.open.flags & O_CREAT)) {
int mode = nd->intent.open.create_mode & ~current->fs->umask;
return ceph_lookup_open(dir, dentry, nd, mode, 1);
@@ -577,21 +607,21 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
struct ceph_inode_info *ci = ceph_inode(dir);
struct ceph_dentry_info *di = ceph_dentry(dentry);
- spin_lock(&dir->i_lock);
+ spin_lock(&ci->i_ceph_lock);
dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
if (strncmp(dentry->d_name.name,
fsc->mount_options->snapdir_name,
dentry->d_name.len) &&
!is_root_ceph_dentry(dir, dentry) &&
- (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
+ ceph_dir_test_complete(dir) &&
(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
- spin_unlock(&dir->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout(" dir %p complete, -ENOENT\n", dir);
d_add(dentry, NULL);
di->lease_shared_gen = ci->i_shared_gen;
return NULL;
}
- spin_unlock(&dir->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
op = ceph_snap(dir) == CEPH_SNAPDIR ?
@@ -605,6 +635,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
req->r_locked_dir = dir;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ err = ceph_handle_snapdir(req, dentry, err);
dentry = ceph_finish_lookup(req, dentry, err);
ceph_mdsc_put_request(req); /* will dput(dentry) */
dout("lookup result=%p\n", dentry);
@@ -784,6 +815,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */
+ req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
req->r_locked_dir = dir;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -809,12 +841,12 @@ static int drop_caps_for_unlink(struct inode *inode)
struct ceph_inode_info *ci = ceph_inode(inode);
int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (inode->i_nlink == 1) {
drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
ci->i_ceph_flags |= CEPH_I_NODELAY;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return drop;
}
@@ -882,6 +914,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
req->r_dentry = dget(new_dentry);
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
+ req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
req->r_locked_dir = new_dir;
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -900,7 +933,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
/* d_move screws up d_subdirs order */
- ceph_i_clear(new_dir, CEPH_I_COMPLETE);
+ ceph_dir_clear_complete(new_dir);
d_move(old_dentry, new_dentry);
@@ -982,10 +1015,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
struct ceph_dentry_info *di = ceph_dentry(dentry);
int valid = 0;
- spin_lock(&dir->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (ci->i_shared_gen == di->lease_shared_gen)
valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
- spin_unlock(&dir->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
dir, (unsigned)ci->i_shared_gen, dentry,
(unsigned)di->lease_shared_gen, valid);
@@ -997,36 +1030,38 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
*/
static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
+ int valid = 0;
struct inode *dir;
if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
- dir = dentry->d_parent->d_inode;
-
dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
ceph_dentry(dentry)->offset);
+ dir = ceph_get_dentry_parent_inode(dentry);
+
/* always trust cached snapped dentries, snapdir dentry */
if (ceph_snap(dir) != CEPH_NOSNAP) {
dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
- goto out_touch;
+ valid = 1;
+ } else if (dentry->d_inode &&
+ ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
+ valid = 1;
+ } else if (dentry_lease_is_valid(dentry) ||
+ dir_lease_is_valid(dir, dentry)) {
+ valid = 1;
}
- if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR)
- goto out_touch;
- if (dentry_lease_is_valid(dentry) ||
- dir_lease_is_valid(dir, dentry))
- goto out_touch;
-
- dout("d_revalidate %p invalid\n", dentry);
- d_drop(dentry);
- return 0;
-out_touch:
- ceph_dentry_lru_touch(dentry);
- return 1;
+ dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
+ if (valid)
+ ceph_dentry_lru_touch(dentry);
+ else
+ d_drop(dentry);
+ iput(dir);
+ return valid;
}
/*
@@ -1056,7 +1091,52 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
return 1;
}
+/*
+ * Set/clear/test dir complete flag on the dir's dentry.
+ */
+void ceph_dir_set_complete(struct inode *inode)
+{
+ /* not yet implemented */
+}
+
+void ceph_dir_clear_complete(struct inode *inode)
+{
+ /* not yet implemented */
+}
+bool ceph_dir_test_complete(struct inode *inode)
+{
+ /* not yet implemented */
+ return false;
+}
+
+/*
+ * When the VFS prunes a dentry from the cache, we need to clear the
+ * complete flag on the parent directory.
+ *
+ * Called under dentry->d_lock.
+ */
+static void ceph_d_prune(struct dentry *dentry)
+{
+ struct ceph_dentry_info *di;
+
+ dout("ceph_d_prune %p\n", dentry);
+
+ /* do we have a valid parent? */
+ if (!dentry->d_parent || IS_ROOT(dentry))
+ return;
+
+ /* if we are not hashed, we don't affect D_COMPLETE */
+ if (d_unhashed(dentry))
+ return;
+
+ /*
+ * we hold d_lock, so d_parent is stable, and d_fsdata is never
+ * cleared until d_release
+ */
+ di = ceph_dentry(dentry->d_parent);
+ clear_bit(CEPH_D_COMPLETE, &di->flags);
+}
/*
* read() on a dir. This weird interface hack only works if mounted
@@ -1113,7 +1193,8 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
* an fsync() on a dir will wait for any uncommitted directory
* operations to commit.
*/
-static int ceph_dir_fsync(struct file *file, int datasync)
+static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1123,6 +1204,11 @@ static int ceph_dir_fsync(struct file *file, int datasync)
int ret = 0;
dout("dir_fsync %p\n", inode);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
spin_lock(&ci->i_unsafe_lock);
if (list_empty(head))
goto out;
@@ -1156,6 +1242,8 @@ static int ceph_dir_fsync(struct file *file, int datasync)
} while (req->r_tid < last_tid);
out:
spin_unlock(&ci->i_unsafe_lock);
+ mutex_unlock(&inode->i_mutex);
+
return ret;
}
@@ -1215,9 +1303,8 @@ void ceph_dentry_lru_del(struct dentry *dn)
* Return name hash for a given dentry. This is dependent on
* the parent directory's hash function.
*/
-unsigned ceph_dentry_hash(struct dentry *dn)
+unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
{
- struct inode *dir = dn->d_parent->d_inode;
struct ceph_inode_info *dci = ceph_inode(dir);
switch (dci->i_dir_layout.dl_dir_hash) {
@@ -1263,6 +1350,7 @@ const struct inode_operations ceph_dir_iops = {
const struct dentry_operations ceph_dentry_ops = {
.d_revalidate = ceph_d_revalidate,
.d_release = ceph_d_release,
+ .d_prune = ceph_d_prune,
};
const struct dentry_operations ceph_snapdir_dentry_ops = {
@@ -1272,4 +1360,5 @@ const struct dentry_operations ceph_snapdir_dentry_ops = {
const struct dentry_operations ceph_snap_dentry_ops = {
.d_release = ceph_d_release,
+ .d_prune = ceph_d_prune,
};