aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exofs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/exofs')
-rw-r--r--fs/exofs/Kbuild6
-rw-r--r--fs/exofs/exofs.h169
-rw-r--r--fs/exofs/file.c10
-rw-r--r--fs/exofs/inode.c357
-rw-r--r--fs/exofs/ios.c803
-rw-r--r--fs/exofs/namei.c7
-rw-r--r--fs/exofs/pnfs.h45
-rw-r--r--fs/exofs/super.c369
8 files changed, 511 insertions, 1255 deletions
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 2d0f757..352ba14 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -12,5 +12,9 @@
# Kbuild - Gets included from the Kernels Makefile and build system
#
-exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
+# ore module library
+libore-y := ore.o ore_raid.o
+obj-$(CONFIG_ORE) += libore.o
+
+exofs-y := inode.o file.o symlink.o namei.o dir.o super.o
obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index c965806..51f4b4c 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -36,12 +36,9 @@
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/backing-dev.h>
-#include "common.h"
+#include <scsi/osd_ore.h>
-/* FIXME: Remove once pnfs hits mainline
- * #include <linux/exportfs/pnfs_osd_xdr.h>
- */
-#include "pnfs.h"
+#include "common.h"
#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
@@ -56,27 +53,15 @@
/* u64 has problems with printk this will cast it to unsigned long long */
#define _LLU(x) (unsigned long long)(x)
-struct exofs_layout {
- osd_id s_pid; /* partition ID of file system*/
-
- /* Our way of looking at the data_map */
- unsigned stripe_unit;
- unsigned mirrors_p1;
-
- unsigned group_width;
- u64 group_depth;
- unsigned group_count;
-
- enum exofs_inode_layout_gen_functions lay_func;
-
- unsigned s_numdevs; /* Num of devices in array */
- struct osd_dev *s_ods[0]; /* Variable length */
+struct exofs_dev {
+ struct ore_dev ored;
+ unsigned did;
};
-
/*
* our extension to the in-memory superblock
*/
struct exofs_sb_info {
+ struct backing_dev_info bdi; /* register our bdi with VFS */
struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
int s_timeout; /* timeout for OSD operations */
uint64_t s_nextid; /* highest object ID used */
@@ -84,17 +69,10 @@ struct exofs_sb_info {
spinlock_t s_next_gen_lock; /* spinlock for gen # update */
u32 s_next_generation; /* next gen # to use */
atomic_t s_curr_pending; /* number of pending commands */
- uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
- struct backing_dev_info bdi; /* register our bdi with VFS */
-
- struct pnfs_osd_data_map data_map; /* Default raid to use
- * FIXME: Needed ?
- */
-/* struct exofs_layout dir_layout;*/ /* Default dir layout */
- struct exofs_layout layout; /* Default files layout,
- * contains the variable osd_dev
- * array. Keep last */
- struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
+
+ struct ore_layout layout; /* Default files layout */
+ struct ore_comp one_comp; /* id & cred of partition id=0*/
+ struct ore_components oc; /* comps for the partition */
};
/*
@@ -107,7 +85,8 @@ struct exofs_i_info {
uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/
uint32_t i_dir_start_lookup; /* which page to start lookup */
uint64_t i_commit_size; /* the object's written length */
- uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */
+ struct ore_comp one_comp; /* same component for all devices */
+ struct ore_components oc; /* inode view of the device table */
};
static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
@@ -115,52 +94,6 @@ static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
}
-struct exofs_io_state;
-typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
-
-struct exofs_io_state {
- struct kref kref;
-
- void *private;
- exofs_io_done_fn done;
-
- struct exofs_layout *layout;
- struct osd_obj_id obj;
- u8 *cred;
-
- /* Global read/write IO*/
- loff_t offset;
- unsigned long length;
- void *kern_buff;
-
- struct page **pages;
- unsigned nr_pages;
- unsigned pgbase;
- unsigned pages_consumed;
-
- /* Attributes */
- unsigned in_attr_len;
- struct osd_attr *in_attr;
- unsigned out_attr_len;
- struct osd_attr *out_attr;
-
- /* Variable array of size numdevs */
- unsigned numdevs;
- struct exofs_per_dev_state {
- struct osd_request *or;
- struct bio *bio;
- loff_t offset;
- unsigned length;
- unsigned dev;
- } per_dev[];
-};
-
-static inline unsigned exofs_io_state_size(unsigned numdevs)
-{
- return sizeof(struct exofs_io_state) +
- sizeof(struct exofs_per_dev_state) * numdevs;
-}
-
/*
* our inode flags
*/
@@ -205,12 +138,6 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
}
/*
- * Given a layout, object_number and stripe_index return the associated global
- * dev_index
- */
-unsigned exofs_layout_od_id(struct exofs_layout *layout,
- osd_id obj_no, unsigned layout_index);
-/*
* Maximum count of links to a file
*/
#define EXOFS_LINK_MAX 32000
@@ -219,44 +146,8 @@ unsigned exofs_layout_od_id(struct exofs_layout *layout,
* function declarations *
*************************/
-/* ios.c */
-void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
- const struct osd_obj_id *obj);
-int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
- u64 offset, void *p, unsigned length);
-
-int exofs_get_io_state(struct exofs_layout *layout,
- struct exofs_io_state **ios);
-void exofs_put_io_state(struct exofs_io_state *ios);
-
-int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
-
-int exofs_sbi_create(struct exofs_io_state *ios);
-int exofs_sbi_remove(struct exofs_io_state *ios);
-int exofs_sbi_write(struct exofs_io_state *ios);
-int exofs_sbi_read(struct exofs_io_state *ios);
-
-int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
-
-int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
-static inline int exofs_oi_write(struct exofs_i_info *oi,
- struct exofs_io_state *ios)
-{
- ios->obj.id = exofs_oi_objno(oi);
- ios->cred = oi->i_cred;
- return exofs_sbi_write(ios);
-}
-
-static inline int exofs_oi_read(struct exofs_i_info *oi,
- struct exofs_io_state *ios)
-{
- ios->obj.id = exofs_oi_objno(oi);
- ios->cred = oi->i_cred;
- return exofs_sbi_read(ios);
-}
-
/* inode.c */
-unsigned exofs_max_io_pages(struct exofs_layout *layout,
+unsigned exofs_max_io_pages(struct ore_layout *layout,
unsigned expected_pages);
int exofs_setattr(struct dentry *, struct iattr *);
int exofs_write_begin(struct file *file, struct address_space *mapping,
@@ -281,6 +172,8 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
struct inode *);
/* super.c */
+void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
+ const struct osd_obj_id *obj);
int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
/*********************
@@ -295,7 +188,6 @@ extern const struct file_operations exofs_file_operations;
/* inode.c */
extern const struct address_space_operations exofs_aops;
-extern const struct osd_attr g_attr_logical_length;
/* namei.c */
extern const struct inode_operations exofs_dir_inode_operations;
@@ -305,4 +197,35 @@ extern const struct inode_operations exofs_special_inode_operations;
extern const struct inode_operations exofs_symlink_inode_operations;
extern const struct inode_operations exofs_fast_symlink_inode_operations;
+/* exofs_init_comps will initialize an ore_components device array
+ * pointing to a single ore_comp struct, and a round-robin view
+ * of the device table.
+ * The first device of each inode is the [inode->ino % num_devices]
+ * and the rest of the devices sequentially following where the
+ * first device is after the last device.
+ * It is assumed that the global device array at @sbi is twice
+ * bigger and that the device table repeats twice.
+ * See: exofs_read_lookup_dev_table()
+ */
+static inline void exofs_init_comps(struct ore_components *oc,
+ struct ore_comp *one_comp,
+ struct exofs_sb_info *sbi, osd_id oid)
+{
+ unsigned dev_mod = (unsigned)oid, first_dev;
+
+ one_comp->obj.partition = sbi->one_comp.obj.partition;
+ one_comp->obj.id = oid;
+ exofs_make_credential(one_comp->cred, &one_comp->obj);
+
+ oc->first_dev = 0;
+ oc->numdevs = sbi->layout.group_width * sbi->layout.mirrors_p1 *
+ sbi->layout.group_count;
+ oc->single_comp = EC_SINGLE_COMP;
+ oc->comps = one_comp;
+
+ /* Round robin device view of the table */
+ first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->oc.numdevs;
+ oc->ods = &sbi->oc.ods[first_dev];
+}
+
#endif
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 45ca323..491c6c0 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -42,11 +42,19 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
* Note, in exofs all metadata is written as part of inode, regardless.
* The writeout is synchronous
*/
-static int exofs_file_fsync(struct file *filp, int datasync)
+static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
+ struct inode *inode = filp->f_mapping->host;
int ret;
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
ret = sync_inode_metadata(filp->f_mapping->host, 1);
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 8472c09..f6dbf77 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -37,20 +37,15 @@
#define EXOFS_DBGMSG2(M...) do {} while (0)
-enum { BIO_MAX_PAGES_KMALLOC =
- (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
- MAX_PAGES_KMALLOC =
- PAGE_SIZE / sizeof(struct page *),
-};
+enum {MAX_PAGES_KMALLOC = PAGE_SIZE / sizeof(struct page *), };
-unsigned exofs_max_io_pages(struct exofs_layout *layout,
+unsigned exofs_max_io_pages(struct ore_layout *layout,
unsigned expected_pages)
{
unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
/* TODO: easily support bio chaining */
- pages = min_t(unsigned, pages,
- layout->group_width * BIO_MAX_PAGES_KMALLOC);
+ pages = min_t(unsigned, pages, layout->max_io_length / PAGE_SIZE);
return pages;
}
@@ -58,7 +53,7 @@ struct page_collect {
struct exofs_sb_info *sbi;
struct inode *inode;
unsigned expected_pages;
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
struct page **pages;
unsigned alloc_pages;
@@ -68,6 +63,7 @@ struct page_collect {
bool read_4_write; /* This means two things: that the read is sync
* And the pages should not be unlocked.
*/
+ struct page *that_locked_page;
};
static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
@@ -86,6 +82,7 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
pcol->length = 0;
pcol->pg_first = -1;
pcol->read_4_write = false;
+ pcol->that_locked_page = NULL;
}
static void _pcol_reset(struct page_collect *pcol)
@@ -98,6 +95,7 @@ static void _pcol_reset(struct page_collect *pcol)
pcol->length = 0;
pcol->pg_first = -1;
pcol->ios = NULL;
+ pcol->that_locked_page = NULL;
/* this is probably the end of the loop but in writes
* it might not end here. don't be left with nothing
@@ -110,13 +108,6 @@ static int pcol_try_alloc(struct page_collect *pcol)
{
unsigned pages;
- if (!pcol->ios) { /* First time allocate io_state */
- int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
-
- if (ret)
- return ret;
- }
-
/* TODO: easily support bio chaining */
pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
@@ -140,7 +131,7 @@ static void pcol_free(struct page_collect *pcol)
pcol->pages = NULL;
if (pcol->ios) {
- exofs_put_io_state(pcol->ios);
+ ore_put_io_state(pcol->ios);
pcol->ios = NULL;
}
}
@@ -156,14 +147,17 @@ static int pcol_add_page(struct page_collect *pcol, struct page *page,
return 0;
}
+enum {PAGE_WAS_NOT_IN_IO = 17};
static int update_read_page(struct page *page, int ret)
{
- if (ret == 0) {
+ switch (ret) {
+ case 0:
/* Everything is OK */
SetPageUptodate(page);
if (PageError(page))
ClearPageError(page);
- } else if (ret == -EFAULT) {
+ break;
+ case -EFAULT:
/* In this case we were trying to read something that wasn't on
* disk yet - return a page full of zeroes. This should be OK,
* because the object should be empty (if there was a write
@@ -174,16 +168,22 @@ static int update_read_page(struct page *page, int ret)
SetPageUptodate(page);
if (PageError(page))
ClearPageError(page);
- ret = 0; /* recovered error */
EXOFS_DBGMSG("recovered read error\n");
- } else /* Error */
+ /* fall through */
+ case PAGE_WAS_NOT_IN_IO:
+ ret = 0; /* recovered error */
+ break;
+ default:
SetPageError(page);
-
+ }
return ret;
}
static void update_write_page(struct page *page, int ret)
{
+ if (unlikely(ret == PAGE_WAS_NOT_IN_IO))
+ return; /* don't pass start don't collect $200 */
+
if (ret) {
mapping_set_error(page->mapping, ret);
SetPageError(page);
@@ -197,15 +197,16 @@ static void update_write_page(struct page *page, int ret)
static int __readpages_done(struct page_collect *pcol)
{
int i;
- u64 resid;
u64 good_bytes;
u64 length = 0;
- int ret = exofs_check_io(pcol->ios, &resid);
+ int ret = ore_check_io(pcol->ios, NULL);
- if (likely(!ret))
+ if (likely(!ret)) {
good_bytes = pcol->length;
- else
- good_bytes = pcol->length - resid;
+ ret = PAGE_WAS_NOT_IN_IO;
+ } else {
+ good_bytes = 0;
+ }
EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx"
" length=0x%lx nr_pages=%u\n",
@@ -241,7 +242,7 @@ static int __readpages_done(struct page_collect *pcol)
}
/* callback of async reads */
-static void readpages_done(struct exofs_io_state *ios, void *p)
+static void readpages_done(struct ore_io_state *ios, void *p)
{
struct page_collect *pcol = p;
@@ -266,23 +267,70 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
}
}
+static int _maybe_not_all_in_one_io(struct ore_io_state *ios,
+ struct page_collect *pcol_src, struct page_collect *pcol)
+{
+ /* length was wrong or offset was not page aligned */
+ BUG_ON(pcol_src->nr_pages < ios->nr_pages);
+
+ if (pcol_src->nr_pages > ios->nr_pages) {
+ struct page **src_page;
+ unsigned pages_less = pcol_src->nr_pages - ios->nr_pages;
+ unsigned long len_less = pcol_src->length - ios->length;
+ unsigned i;
+ int ret;
+
+ /* This IO was trimmed */
+ pcol_src->nr_pages = ios->nr_pages;
+ pcol_src->length = ios->length;
+
+ /* Left over pages are passed to the next io */
+ pcol->expected_pages += pages_less;
+ pcol->nr_pages = pages_less;
+ pcol->length = len_less;
+ src_page = pcol_src->pages + pcol_src->nr_pages;
+ pcol->pg_first = (*src_page)->index;
+
+ ret = pcol_try_alloc(pcol);
+ if (unlikely(ret))
+ return ret;
+
+ for (i = 0; i < pages_less; ++i)
+ pcol->pages[i] = *src_page++;
+
+ EXOFS_DBGMSG("Length was adjusted nr_pages=0x%x "
+ "pages_less=0x%x expected_pages=0x%x "
+ "next_offset=0x%llx next_len=0x%lx\n",
+ pcol_src->nr_pages, pages_less, pcol->expected_pages,
+ pcol->pg_first * PAGE_SIZE, pcol->length);
+ }
+ return 0;
+}
+
static int read_exec(struct page_collect *pcol)
{
struct exofs_i_info *oi = exofs_i(pcol->inode);
- struct exofs_io_state *ios = pcol->ios;
+ struct ore_io_state *ios;
struct page_collect *pcol_copy = NULL;
int ret;
if (!pcol->pages)
return 0;
+ if (!pcol->ios) {
+ int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, true,
+ pcol->pg_first << PAGE_CACHE_SHIFT,
+ pcol->length, &pcol->ios);
+
+ if (ret)
+ return ret;
+ }
+
+ ios = pcol->ios;
ios->pages = pcol->pages;
- ios->nr_pages = pcol->nr_pages;
- ios->length = pcol->length;
- ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
if (pcol->read_4_write) {
- exofs_oi_read(oi, pcol->ios);
+ ore_read(pcol->ios);
return __readpages_done(pcol);
}
@@ -295,17 +343,23 @@ static int read_exec(struct page_collect *pcol)
*pcol_copy = *pcol;
ios->done = readpages_done;
ios->private = pcol_copy;
- ret = exofs_oi_read(oi, ios);
+
+ /* pages ownership was passed to pcol_copy */
+ _pcol_reset(pcol);
+
+ ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol);
if (unlikely(ret))
goto err;
- atomic_inc(&pcol->sbi->s_curr_pending);
+ EXOFS_DBGMSG2("read_exec(0x%lx) offset=0x%llx length=0x%llx\n",
+ pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length));
- EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
- ios->obj.id, _LLU(ios->offset), pcol->length);
+ ret = ore_read(ios);
+ if (unlikely(ret))
+ goto err;
+
+ atomic_inc(&pcol->sbi->s_curr_pending);
- /* pages ownership was passed to pcol_copy */
- _pcol_reset(pcol);
return 0;
err:
@@ -340,6 +394,8 @@ static int readpage_strip(void *data, struct page *page)
EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino,
page->index);
+ pcol->that_locked_page = page;
+
if (page->index < end_index)
len = PAGE_CACHE_SIZE;
else if (page->index == end_index)
@@ -428,6 +484,10 @@ static int exofs_readpages(struct file *file, struct address_space *mapping,
return ret;
}
+ ret = read_exec(&pcol);
+ if (unlikely(ret))
+ return ret;
+
return read_exec(&pcol);
}
@@ -457,21 +517,22 @@ static int exofs_readpage(struct file *file, struct page *page)
}
/* Callback for osd_write. All writes are asynchronous */
-static void writepages_done(struct exofs_io_state *ios, void *p)
+static void writepages_done(struct ore_io_state *ios, void *p)
{
struct page_collect *pcol = p;
int i;
- u64 resid;
u64 good_bytes;
u64 length = 0;
- int ret = exofs_check_io(ios, &resid);
+ int ret = ore_check_io(ios, NULL);
atomic_dec(&pcol->sbi->s_curr_pending);
- if (likely(!ret))
+ if (likely(!ret)) {
good_bytes = pcol->length;
- else
- good_bytes = pcol->length - resid;
+ ret = PAGE_WAS_NOT_IN_IO;
+ } else {
+ good_bytes = 0;
+ }
EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx"
" length=0x%lx nr_pages=%u\n",
@@ -504,16 +565,73 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
EXOFS_DBGMSG2("writepages_done END\n");
}
+static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
+{
+ struct page_collect *pcol = priv;
+ pgoff_t index = offset / PAGE_SIZE;
+
+ if (!pcol->that_locked_page ||
+ (pcol->that_locked_page->index != index)) {
+ struct page *page = find_get_page(pcol->inode->i_mapping, index);
+
+ if (!page) {
+ page = find_or_create_page(pcol->inode->i_mapping,
+ index, GFP_NOFS);
+ if (unlikely(!page)) {
+ EXOFS_DBGMSG("grab_cache_page Failed "
+ "index=0x%llx\n", _LLU(index));
+ return NULL;
+ }
+ unlock_page(page);
+ }
+ if (PageDirty(page) || PageWriteback(page))
+ *uptodate = true;
+ else
+ *uptodate = PageUptodate(page);
+ EXOFS_DBGMSG("index=0x%lx uptodate=%d\n", index, *uptodate);
+ return page;
+ } else {
+ EXOFS_DBGMSG("YES that_locked_page index=0x%lx\n",
+ pcol->that_locked_page->index);
+ *uptodate = true;
+ return pcol->that_locked_page;
+ }
+}
+
+static void __r4w_put_page(void *priv, struct page *page)
+{
+ struct page_collect *pcol = priv;
+
+ if (pcol->that_locked_page != page) {
+ EXOFS_DBGMSG("index=0x%lx\n", page->index);
+ page_cache_release(page);
+ return;
+ }
+ EXOFS_DBGMSG("that_locked_page index=0x%lx\n", page->index);
+}
+
+static const struct _ore_r4w_op _r4w_op = {
+ .get_page = &__r4w_get_page,
+ .put_page = &__r4w_put_page,
+};
+
static int write_exec(struct page_collect *pcol)
{
struct exofs_i_info *oi = exofs_i(pcol->inode);
- struct exofs_io_state *ios = pcol->ios;
+ struct ore_io_state *ios;
struct page_collect *pcol_copy = NULL;
int ret;
if (!pcol->pages)
return 0;
+ BUG_ON(pcol->ios);
+ ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, false,
+ pcol->pg_first << PAGE_CACHE_SHIFT,
+ pcol->length, &pcol->ios);
+ if (unlikely(ret))
+ goto err;
+
pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
if (!pcol_copy) {
EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n");
@@ -523,25 +641,29 @@ static int write_exec(struct page_collect *pcol)
*pcol_copy = *pcol;
+ ios = pcol->ios;
ios->pages = pcol_copy->pages;
- ios->nr_pages = pcol_copy->nr_pages;
- ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
- ios->length = pcol_copy->length;
ios->done = writepages_done;
+ ios->r4w = &_r4w_op;
ios->private = pcol_copy;
- ret = exofs_oi_write(oi, ios);
+ /* pages ownership was passed to pcol_copy */
+ _pcol_reset(pcol);
+
+ ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol);
+ if (unlikely(ret))
+ goto err;
+
+ EXOFS_DBGMSG2("write_exec(0x%lx) offset=0x%llx length=0x%llx\n",
+ pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length));
+
+ ret = ore_write(ios);
if (unlikely(ret)) {
- EXOFS_ERR("write_exec: exofs_oi_write() Failed\n");
+ EXOFS_ERR("write_exec: ore_write() Failed\n");
goto err;
}
atomic_inc(&pcol->sbi->s_curr_pending);
- EXOFS_DBGMSG2("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
- pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
- pcol->length);
- /* pages ownership was passed to pcol_copy */
- _pcol_reset(pcol);
return 0;
err:
@@ -681,14 +803,33 @@ static int exofs_writepages(struct address_space *mapping,
_pcol_init(&pcol, expected_pages, mapping->host);
ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol);
- if (ret) {
+ if (unlikely(ret)) {
EXOFS_ERR("write_cache_pages => %d\n", ret);
return ret;
}
- return write_exec(&pcol);
+ ret = write_exec(&pcol);
+ if (unlikely(ret))
+ return ret;
+
+ if (wbc->sync_mode == WB_SYNC_ALL) {
+ return write_exec(&pcol); /* pump the last reminder */
+ } else if (pcol.nr_pages) {
+ /* not SYNC let the reminder join the next writeout */
+ unsigned i;
+
+ for (i = 0; i < pcol.nr_pages; i++) {
+ struct page *page = pcol.pages[i];
+
+ end_page_writeback(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ }
+ }
+ return 0;
}
+/*
static int exofs_writepage(struct page *page, struct writeback_control *wbc)
{
struct page_collect pcol;
@@ -704,7 +845,7 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc)
return write_exec(&pcol);
}
-
+*/
/* i_mutex held using inode->i_size directly */
static void _write_failed(struct inode *inode, loff_t to)
{
@@ -810,7 +951,7 @@ static void exofs_invalidatepage(struct page *page, unsigned long offset)
const struct address_space_operations exofs_aops = {
.readpage = exofs_readpage,
.readpages = exofs_readpages,
- .writepage = exofs_writepage,
+ .writepage = NULL,
.writepages = exofs_writepages,
.write_begin = exofs_write_begin_export,
.write_end = exofs_write_end,
@@ -844,17 +985,15 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode)
return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
}
-const struct osd_attr g_attr_logical_length = ATTR_DEF(
- OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
-
static int _do_truncate(struct inode *inode, loff_t newsize)
{
struct exofs_i_info *oi = exofs_i(inode);
+ struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
int ret;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- ret = exofs_oi_truncate(oi, (u64)newsize);
+ ret = ore_truncate(&sbi->layout, &oi->oc, (u64)newsize);
if (likely(!ret))
truncate_setsize(inode, newsize);
@@ -917,30 +1056,26 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
[1] = g_attr_inode_file_layout,
[2] = g_attr_inode_dir_layout,
};
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
struct exofs_on_disk_inode_layout *layout;
int ret;
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+ EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
return ret;
}
- ios->obj.id = exofs_oi_objno(oi);
- exofs_make_credential(oi->i_cred, &ios->obj);
- ios->cred = oi->i_cred;
-
- attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
- attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
+ attrs[1].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs);
+ attrs[2].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs);
ios->in_attr = attrs;
ios->in_attr_len = ARRAY_SIZE(attrs);
- ret = exofs_sbi_read(ios);
+ ret = ore_read(ios);
if (unlikely(ret)) {
EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n",
- _LLU(ios->obj.id), ret);
+ _LLU(oi->one_comp.obj.id), ret);
memset(inode, 0, sizeof(*inode));
inode->i_mode = 0040000 | (0777 & ~022);
/* If object is lost on target we might as well enable it's
@@ -990,7 +1125,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
}
out:
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
return ret;
}
@@ -1016,6 +1151,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
return inode;
oi = exofs_i(inode);
__oi_init(oi);
+ exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info,
+ exofs_oi_objno(oi));
/* read the inode from the osd */
ret = exofs_get_inode(sb, oi, &fcb);
@@ -1028,7 +1165,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
inode->i_mode = le16_to_cpu(fcb.i_mode);
inode->i_uid = le32_to_cpu(fcb.i_uid);
inode->i_gid = le32_to_cpu(fcb.i_gid);
- inode->i_nlink = le16_to_cpu(fcb.i_links_count);
+ set_nlink(inode, le16_to_cpu(fcb.i_links_count));
inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime);
@@ -1107,21 +1244,22 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
* set the obj_created flag so that other methods know that the object exists on
* the OSD.
*/
-static void create_done(struct exofs_io_state *ios, void *p)
+static void create_done(struct ore_io_state *ios, void *p)
{
struct inode *inode = p;
struct exofs_i_info *oi = exofs_i(inode);
struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
int ret;
- ret = exofs_check_io(ios, NULL);
- exofs_put_io_state(ios);
+ ret = ore_check_io(ios, NULL);
+ ore_put_io_state(ios);
atomic_dec(&sbi->s_curr_pending);
if (unlikely(ret)) {
EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx",
- _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid));
+ _LLU(exofs_oi_objno(oi)),
+ _LLU(oi->one_comp.obj.partition));
/*TODO: When FS is corrupted creation can fail, object already
* exist. Get rid of this asynchronous creation, if exist
* increment the obj counter and try the next object. Until we
@@ -1140,14 +1278,13 @@ static void create_done(struct exofs_io_state *ios, void *p)
*/
struct inode *exofs_new_inode(struct inode *dir, int mode)
{
- struct super_block *sb;
+ struct super_block *sb = dir->i_sb;
+ struct exofs_sb_info *sbi = sb->s_fs_info;
struct inode *inode;
struct exofs_i_info *oi;
- struct exofs_sb_info *sbi;
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
int ret;
- sb = dir->i_sb;
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -1157,8 +1294,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
set_obj_2bcreated(oi);
- sbi = sb->s_fs_info;
-
inode->i_mapping->backing_dev_info = sb->s_bdi;
inode_init_owner(inode, dir, mode);
inode->i_ino = sbi->s_nextid++;
@@ -1170,25 +1305,24 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
spin_unlock(&sbi->s_next_gen_lock);
insert_inode_hash(inode);
+ exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info,
+ exofs_oi_objno(oi));
exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
mark_inode_dirty(inode);
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
+ EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n");
return ERR_PTR(ret);
}
- ios->obj.id = exofs_oi_objno(oi);
- exofs_make_credential(oi->i_cred, &ios->obj);
-
ios->done = create_done;
ios->private = inode;
- ios->cred = oi->i_cred;
- ret = exofs_sbi_create(ios);
+
+ ret = ore_create(ios);
if (ret) {
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
return ERR_PTR(ret);
}
atomic_inc(&sbi->s_curr_pending);
@@ -1207,11 +1341,11 @@ struct updatei_args {
/*
* Callback function from exofs_update_inode().
*/
-static void updatei_done(struct exofs_io_state *ios, void *p)
+static void updatei_done(struct ore_io_state *ios, void *p)
{
struct updatei_args *args = p;
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
atomic_dec(&args->sbi->s_curr_pending);
@@ -1227,7 +1361,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
struct exofs_i_info *oi = exofs_i(inode);
struct super_block *sb = inode->i_sb;
struct exofs_sb_info *sbi = sb->s_fs_info;
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
struct osd_attr attr;
struct exofs_fcb *fcb;
struct updatei_args *args;
@@ -1266,9 +1400,9 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
} else
memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+ EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
goto free_args;
}
@@ -1285,13 +1419,13 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
ios->private = args;
}
- ret = exofs_oi_write(oi, ios);
+ ret = ore_write(ios);
if (!do_sync && !ret) {
atomic_inc(&sbi->s_curr_pending);
goto out; /* deallocation in updatei_done */
}
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
free_args:
kfree(args);
out:
@@ -1310,11 +1444,11 @@ int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
* Callback function from exofs_delete_inode() - don't have much cleaning up to
* do.
*/
-static void delete_done(struct exofs_io_state *ios, void *p)
+static void delete_done(struct ore_io_state *ios, void *p)
{
struct exofs_sb_info *sbi = p;
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
atomic_dec(&sbi->s_curr_pending);
}
@@ -1329,7 +1463,7 @@ void exofs_evict_inode(struct inode *inode)
struct exofs_i_info *oi = exofs_i(inode);
struct super_block *sb = inode->i_sb;
struct exofs_sb_info *sbi = sb->s_fs_info;
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
int ret;
truncate_inode_pages(&inode->i_data, 0);
@@ -1349,20 +1483,19 @@ void exofs_evict_inode(struct inode *inode)
/* ignore the error, attempt a remove anyway */
/* Now Remove the OSD objects */
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
+ EXOFS_ERR("%s: ore_get_io_state failed\n", __func__);
return;
}
- ios->obj.id = exofs_oi_objno(oi);
ios->done = delete_done;
ios->private = sbi;
- ios->cred = oi->i_cred;
- ret = exofs_sbi_remove(ios);
+
+ ret = ore_remove(ios);
if (ret) {
- EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__);
- exofs_put_io_state(ios);
+ EXOFS_ERR("%s: ore_remove failed\n", __func__);
+ ore_put_io_state(ios);
return;
}
atomic_inc(&sbi->s_curr_pending);
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
deleted file mode 100644
index f74a2ec..0000000
--- a/fs/exofs/ios.c
+++ /dev/null
@@ -1,803 +0,0 @@
-/*
- * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com)
- * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
- *
- * This file is part of exofs.
- *
- * exofs is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation. Since it is based on ext2, and the only
- * valid version of GPL for the Linux kernel is version 2, the only valid
- * version of GPL for exofs is version 2.
- *
- * exofs is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with exofs; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <linux/slab.h>
-#include <scsi/scsi_device.h>
-#include <asm/div64.h>
-
-#include "exofs.h"
-
-#define EXOFS_DBGMSG2(M...) do {} while (0)
-/* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */
-
-void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
-{
- osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
-}
-
-int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
- u64 offset, void *p, unsigned length)
-{
- struct osd_request *or = osd_start_request(od, GFP_KERNEL);
-/* struct osd_sense_info osi = {.key = 0};*/
- int ret;
-
- if (unlikely(!or)) {
- EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
- return -ENOMEM;
- }
- ret = osd_req_read_kern(or, obj, offset, p, length);
- if (unlikely(ret)) {
- EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
- goto out;
- }
-
- ret = osd_finalize_request(or, 0, cred, NULL);
- if (unlikely(ret)) {
- EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
- goto out;
- }
-
- ret = osd_execute_request(or);
- if (unlikely(ret))
- EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
- /* osd_req_decode_sense(or, ret); */
-
-out:
- osd_end_request(or);
- return ret;
-}
-
-int exofs_get_io_state(struct exofs_layout *layout,
- struct exofs_io_state **pios)
-{
- struct exofs_io_state *ios;
-
- /*TODO: Maybe use kmem_cach per sbi of size
- * exofs_io_state_size(layout->s_numdevs)
- */
- ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL);
- if (unlikely(!ios)) {
- EXOFS_DBGMSG("Failed kzalloc bytes=%d\n",
- exofs_io_state_size(layout->s_numdevs));
- *pios = NULL;
- return -ENOMEM;
- }
-
- ios->layout = layout;
- ios->obj.partition = layout->s_pid;
- *pios = ios;
- return 0;
-}
-
-void exofs_put_io_state(struct exofs_io_state *ios)
-{
- if (ios) {
- unsigned i;
-
- for (i = 0; i < ios->numdevs; i++) {
- struct exofs_per_dev_state *per_dev = &ios->per_dev[i];
-
- if (per_dev->or)
- osd_end_request(per_dev->or);
- if (per_dev->bio)
- bio_put(per_dev->bio);
- }
-
- kfree(ios);
- }
-}
-
-unsigned exofs_layout_od_id(struct exofs_layout *layout,
- osd_id obj_no, unsigned layout_index)
-{
-/* switch (layout->lay_func) {
- case LAYOUT_MOVING_WINDOW:
- {*/
- unsigned dev_mod = obj_no;
-
- return (layout_index + dev_mod * layout->mirrors_p1) %
- layout->s_numdevs;
-/* }
- case LAYOUT_FUNC_IMPLICT:
- return layout->devs[layout_index];
- }*/
-}
-
-static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios,
- unsigned layout_index)
-{
- return ios->layout->s_ods[
- exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)];
-}
-
-static void _sync_done(struct exofs_io_state *ios, void *p)
-{
- struct completion *waiting = p;
-
- complete(waiting);
-}
-
-static void _last_io(struct kref *kref)
-{
- struct exofs_io_state *ios = container_of(
- kref, struct exofs_io_state, kref);
-
- ios->done(ios, ios->private);
-}
-
-static void _done_io(struct osd_request *or, void *p)
-{
- struct exofs_io_state *ios = p;
-
- kref_put(&ios->kref, _last_io);
-}
-
-static int exofs_io_execute(struct exofs_io_state *ios)
-{
- DECLARE_COMPLETION_ONSTACK(wait);
- bool sync = (ios->done == NULL);
- int i, ret;
-
- if (sync) {
- ios->done = _sync_done;
- ios->private = &wait;
- }
-
- for (i = 0; i < ios->numdevs; i++) {
- struct osd_request *or = ios->per_dev[i].or;
- if (unlikely(!or))
- continue;
-
- ret = osd_finalize_request(or, 0, ios->cred, NULL);
- if (unlikely(ret)) {
- EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n",
- ret);
- return ret;
- }
- }
-
- kref_init(&ios->kref);
-
- for (i = 0; i < ios->numdevs; i++) {
- struct osd_request *or = ios->per_dev[i].or;
- if (unlikely(!or))
- continue;
-
- kref_get(&ios->kref);
- osd_execute_request_async(or, _done_io, ios);
- }
-
- kref_put(&ios->kref, _last_io);
- ret = 0;
-
- if (sync) {
- wait_for_completion(&wait);
- ret = exofs_check_io(ios, NULL);
- }
- return ret;
-}
-
-static void _clear_bio(struct bio *bio)
-{
- struct bio_vec *bv;
- unsigned i;
-
- __bio_for_each_segment(bv, bio, i, 0) {
- unsigned this_count = bv->bv_len;
-
- if (likely(PAGE_SIZE == this_count))
- clear_highpage(bv->bv_page);
- else
- zero_user(bv->bv_page, bv->bv_offset, this_count);
- }
-}
-
-int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
-{
- enum osd_err_priority acumulated_osd_err = 0;
- int acumulated_lin_err = 0;
- int i;
-
- for (i = 0; i < ios->numdevs; i++) {
- struct osd_sense_info osi;
- struct osd_request *or = ios->per_dev[i].or;
- int ret;
-
- if (unlikely(!or))
- continue;
-
- ret = osd_req_decode_sense(or, &osi);
- if (likely(!ret))
- continue;
-
- if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
- /* start read offset passed endof file */
- _clear_bio(ios->per_dev[i].bio);
- EXOFS_DBGMSG("start read offset passed end of file "
- "offset=0x%llx, length=0x%llx\n",
- _LLU(ios->per_dev[i].offset),
- _LLU(ios->per_dev[i].length));
-
- continue; /* we recovered */
- }
-
- if (osi.osd_err_pri >= acumulated_osd_err) {
- acumulated_osd_err = osi.osd_err_pri;
- acumulated_lin_err = ret;
- }
- }
-
- /* TODO: raid specific residual calculations */
- if (resid) {
- if (likely(!acumulated_lin_err))
- *resid = 0;
- else
- *resid = ios->length;
- }
-
- return acumulated_lin_err;
-}
-
-/*
- * L - logical offset into the file
- *
- * U - The number of bytes in a stripe within a group
- *
- * U = stripe_unit * group_width
- *
- * T - The number of bytes striped within a group of component objects
- * (before advancing to the next group)
- *
- * T = stripe_unit * group_width * group_depth
- *
- * S - The number of bytes striped across all component objects
- * before the pattern repeats
- *
- * S = stripe_unit * group_width * group_depth * group_count
- *
- * M - The "major" (i.e., across all components) stripe number
- *
- * M = L / S
- *
- * G - Counts the groups from the beginning of the major stripe
- *
- * G = (L - (M * S)) / T [or (L % S) / T]
- *
- * H - The byte offset within the group
- *
- * H = (L - (M * S)) % T [or (L % S) % T]
- *
- * N - The "minor" (i.e., across the group) stripe number
- *
- * N = H / U
- *
- * C - The component index coresponding to L
- *
- * C = (H - (N * U)) / stripe_unit + G * group_width
- * [or (L % U) / stripe_unit + G * group_width]
- *
- * O - The component offset coresponding to L
- *
- * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit
- */
-struct _striping_info {
- u64 obj_offset;
- u64 group_length;
- unsigned dev;
- unsigned unit_off;
-};
-
-static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
- struct _striping_info *si)
-{
- u32 stripe_unit = ios->layout->stripe_unit;
- u32 group_width = ios->layout->group_width;
- u64 group_depth = ios->layout->group_depth;
-
- u32 U = stripe_unit * group_width;
- u64 T = U * group_depth;
- u64 S = T * ios->layout->group_count;
- u64 M = div64_u64(file_offset, S);
-
- /*
- G = (L - (M * S)) / T
- H = (L - (M * S)) % T
- */
- u64 LmodS = file_offset - M * S;
- u32 G = div64_u64(LmodS, T);
- u64 H = LmodS - G * T;
-
- u32 N = div_u64(H, U);
-
- /* "H - (N * U)" is just "H % U" so it's bound to u32 */
- si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
- si->dev *= ios->layout->mirrors_p1;
-
- div_u64_rem(file_offset, stripe_unit, &si->unit_off);
-
- si->obj_offset = si->unit_off + (N * stripe_unit) +
- (M * group_depth * stripe_unit);
-
- si->group_length = T - H;
-}
-
-static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
- unsigned pgbase, struct exofs_per_dev_state *per_dev,
- int cur_len)
-{
- unsigned pg = *cur_pg;
- struct request_queue *q =
- osd_request_queue(exofs_ios_od(ios, per_dev->dev));
-
- per_dev->length += cur_len;
-
- if (per_dev->bio == NULL) {
- unsigned pages_in_stripe = ios->layout->group_width *
- (ios->layout->stripe_unit / PAGE_SIZE);
- unsigned bio_size = (ios->nr_pages + pages_in_stripe) /
- ios->layout->group_width;
-
- per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
- if (unlikely(!per_dev->bio)) {
- EXOFS_DBGMSG("Failed to allocate BIO size=%u\n",
- bio_size);
- return -ENOMEM;
- }
- }
-
- while (cur_len > 0) {
- unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
- unsigned added_len;
-
- BUG_ON(ios->nr_pages <= pg);
- cur_len -= pglen;
-
- added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg],
- pglen, pgbase);
- if (unlikely(pglen != added_len))
- return -ENOMEM;
- pgbase = 0;
- ++pg;
- }
- BUG_ON(cur_len);
-
- *cur_pg = pg;
- return 0;
-}
-
-static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
- struct _striping_info *si)
-{
- unsigned stripe_unit = ios->layout->stripe_unit;
- unsigned mirrors_p1 = ios->layout->mirrors_p1;
- unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
- unsigned dev = si->dev;
- unsigned first_dev = dev - (dev % devs_in_group);
- unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
- unsigned cur_pg = ios->pages_consumed;
- int ret = 0;
-
- while (length) {
- struct exofs_per_dev_state *per_dev = &ios->per_dev[dev];
- unsigned cur_len, page_off = 0;
-
- if (!per_dev->length) {
- per_dev->dev = dev;
- if (dev < si->dev) {
- per_dev->offset = si->obj_offset + stripe_unit -
- si->unit_off;
- cur_len = stripe_unit;
- } else if (dev == si->dev) {
- per_dev->offset = si->obj_offset;
- cur_len = stripe_unit - si->unit_off;
- page_off = si->unit_off & ~PAGE_MASK;
- BUG_ON(page_off && (page_off != ios->pgbase));
- } else { /* dev > si->dev */
- per_dev->offset = si->obj_offset - si->unit_off;
- cur_len = stripe_unit;
- }
-
- if (max_comp < dev)
- max_comp = dev;
- } else {
- cur_len = stripe_unit;
- }
- if (cur_len >= length)
- cur_len = length;
-
- ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
- cur_len);
- if (unlikely(ret))
- goto out;
-
- dev += mirrors_p1;
- dev = (dev % devs_in_group) + first_dev;
-
- length -= cur_len;
- }
-out:
- ios->numdevs = max_comp + mirrors_p1;
- ios->pages_consumed = cur_pg;
- return ret;
-}
-
-static int _prepare_for_striping(struct exofs_io_state *ios)
-{
- u64 length = ios->length;
- u64 offset = ios->offset;
- struct _striping_info si;
- int ret = 0;
-
- if (!ios->pages) {
- if (ios->kern_buff) {
- struct exofs_per_dev_state *per_dev = &ios->per_dev[0];
-
- _calc_stripe_info(ios, ios->offset, &si);
- per_dev->offset = si.obj_offset;
- per_dev->dev = si.dev;
-
- /* no cross device without page array */
- BUG_ON((ios->layout->group_width > 1) &&
- (si.unit_off + ios->length >
- ios->layout->stripe_unit));
- }
- ios->numdevs = ios->layout->mirrors_p1;
- return 0;
- }
-
- while (length) {
- _calc_stripe_info(ios, offset, &si);
-
- if (length < si.group_length)
- si.group_length = length;
-
- ret = _prepare_one_group(ios, si.group_length, &si);
- if (unlikely(ret))
- goto out;
-
- offset += si.group_length;
- length -= si.group_length;
- }
-
-out:
- return ret;
-}
-
-int exofs_sbi_create(struct exofs_io_state *ios)
-{
- int i, ret;
-
- for (i = 0; i < ios->layout->s_numdevs; i++) {
- struct osd_request *or;
-
- or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("%s: osd_start_request failed\n", __func__);
- ret = -ENOMEM;
- goto out;
- }
- ios->per_dev[i].or = or;
- ios->numdevs++;
-
- osd_req_create_object(or, &ios->obj);
- }
- ret = exofs_io_execute(ios);
-
-out:
- return ret;
-}
-
-int exofs_sbi_remove(struct exofs_io_state *ios)
-{
- int i, ret;
-
- for (i = 0; i < ios->layout->s_numdevs; i++) {
- struct osd_request *or;
-
- or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("%s: osd_start_request failed\n", __func__);
- ret = -ENOMEM;
- goto out;
- }
- ios->per_dev[i].or = or;
- ios->numdevs++;
-
- osd_req_remove_object(or, &ios->obj);
- }
- ret = exofs_io_execute(ios);
-
-out:
- return ret;
-}
-
-static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
-{
- struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp];
- unsigned dev = ios->per_dev[cur_comp].dev;
- unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
- int ret = 0;
-
- if (ios->pages && !master_dev->length)
- return 0; /* Just an empty slot */
-
- for (; cur_comp < last_comp; ++cur_comp, ++dev) {
- struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
- struct osd_request *or;
-
- or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("%s: osd_start_request failed\n", __func__);
- ret = -ENOMEM;
- goto out;
- }
- per_dev->or = or;
- per_dev->offset = master_dev->offset;
-
- if (ios->pages) {
- struct bio *bio;
-
- if (per_dev != master_dev) {
- bio = bio_kmalloc(GFP_KERNEL,
- master_dev->bio->bi_max_vecs);
- if (unlikely(!bio)) {
- EXOFS_DBGMSG(
- "Failed to allocate BIO size=%u\n",
- master_dev->bio->bi_max_vecs);
- ret = -ENOMEM;
- goto out;
- }
-
- __bio_clone(bio, master_dev->bio);
- bio->bi_bdev = NULL;
- bio->bi_next = NULL;
- per_dev->length = master_dev->length;
- per_dev->bio = bio;
- per_dev->dev = dev;
- } else {
- bio = master_dev->bio;
- /* FIXME: bio_set_dir() */
- bio->bi_rw |= REQ_WRITE;
- }
-
- osd_req_write(or, &ios->obj, per_dev->offset, bio,
- per_dev->length);
- EXOFS_DBGMSG("write(0x%llx) offset=0x%llx "
- "length=0x%llx dev=%d\n",
- _LLU(ios->obj.id), _LLU(per_dev->offset),
- _LLU(per_dev->length), dev);
- } else if (ios->kern_buff) {
- ret = osd_req_write_kern(or, &ios->obj, per_dev->offset,
- ios->kern_buff, ios->length);
- if (unlikely(ret))
- goto out;
- EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
- "length=0x%llx dev=%d\n",
- _LLU(ios->obj.id), _LLU(per_dev->offset),
- _LLU(ios->length), dev);
- } else {
- osd_req_set_attributes(or, &ios->obj);
- EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
- _LLU(ios->obj.id), ios->out_attr_len, dev);
- }
-
- if (ios->out_attr)
- osd_req_add_set_attr_list(or, ios->out_attr,
- ios->out_attr_len);
-
- if (ios->in_attr)
- osd_req_add_get_attr_list(or, ios->in_attr,
- ios->in_attr_len);
- }
-
-out:
- return ret;
-}
-
-int exofs_sbi_write(struct exofs_io_state *ios)
-{
- int i;
- int ret;
-
- ret = _prepare_for_striping(ios);
- if (unlikely(ret))
- return ret;
-
- for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
- ret = _sbi_write_mirror(ios, i);
- if (unlikely(ret))
- return ret;
- }
-
- ret = exofs_io_execute(ios);
- return ret;
-}
-
-static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
-{
- struct osd_request *or;
- struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
- unsigned first_dev = (unsigned)ios->obj.id;
-
- if (ios->pages && !per_dev->length)
- return 0; /* Just an empty slot */
-
- first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
- or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("%s: osd_start_request failed\n", __func__);
- return -ENOMEM;
- }
- per_dev->or = or;
-
- if (ios->pages) {
- osd_req_read(or, &ios->obj, per_dev->offset,
- per_dev->bio, per_dev->length);
- EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
- " dev=%d\n", _LLU(ios->obj.id),
- _LLU(per_dev->offset), _LLU(per_dev->length),
- first_dev);
- } else if (ios->kern_buff) {
- int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset,
- ios->kern_buff, ios->length);
- EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
- "length=0x%llx dev=%d ret=>%d\n",
- _LLU(ios->obj.id), _LLU(per_dev->offset),
- _LLU(ios->length), first_dev, ret);
- if (unlikely(ret))
- return ret;
- } else {
- osd_req_get_attributes(or, &ios->obj);
- EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
- _LLU(ios->obj.id), ios->in_attr_len, first_dev);
- }
- if (ios->out_attr)
- osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
-
- if (ios->in_attr)
- osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
-
- return 0;
-}
-
-int exofs_sbi_read(struct exofs_io_state *ios)
-{
- int i;
- int ret;
-
- ret = _prepare_for_striping(ios);
- if (unlikely(ret))
- return ret;
-
- for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
- ret = _sbi_read_mirror(ios, i);
- if (unlikely(ret))
- return ret;
- }
-
- ret = exofs_io_execute(ios);
- return ret;
-}
-
-int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
-{
- struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
- void *iter = NULL;
- int nelem;
-
- do {
- nelem = 1;
- osd_req_decode_get_attr_list(ios->per_dev[0].or,
- &cur_attr, &nelem, &iter);
- if ((cur_attr.attr_page == attr->attr_page) &&
- (cur_attr.attr_id == attr->attr_id)) {
- attr->len = cur_attr.len;
- attr->val_ptr = cur_attr.val_ptr;
- return 0;
- }
- } while (iter);
-
- return -EIO;
-}
-
-static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp,
- struct osd_attr *attr)
-{
- int last_comp = cur_comp + ios->layout->mirrors_p1;
-
- for (; cur_comp < last_comp; ++cur_comp) {
- struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
- struct osd_request *or;
-
- or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("%s: osd_start_request failed\n", __func__);
- return -ENOMEM;
- }
- per_dev->or = or;
-
- osd_req_set_attributes(or, &ios->obj);
- osd_req_add_set_attr_list(or, attr, 1);
- }
-
- return 0;
-}
-
-int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
-{
- struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
- struct exofs_io_state *ios;
- struct exofs_trunc_attr {
- struct osd_attr attr;
- __be64 newsize;
- } *size_attrs;
- struct _striping_info si;
- int i, ret;
-
- ret = exofs_get_io_state(&sbi->layout, &ios);
- if (unlikely(ret))
- return ret;
-
- size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs),
- GFP_KERNEL);
- if (unlikely(!size_attrs)) {
- ret = -ENOMEM;
- goto out;
- }
-
- ios->obj.id = exofs_oi_objno(oi);
- ios->cred = oi->i_cred;
-
- ios->numdevs = ios->layout->s_numdevs;
- _calc_stripe_info(ios, size, &si);
-
- for (i = 0; i < ios->layout->group_width; ++i) {
- struct exofs_trunc_attr *size_attr = &size_attrs[i];
- u64 obj_size;
-
- if (i < si.dev)
- obj_size = si.obj_offset +
- ios->layout->stripe_unit - si.unit_off;
- else if (i == si.dev)
- obj_size = si.obj_offset;
- else /* i > si.dev */
- obj_size = si.obj_offset - si.unit_off;
-
- size_attr->newsize = cpu_to_be64(obj_size);
- size_attr->attr = g_attr_logical_length;
- size_attr->attr.val_ptr = &size_attr->newsize;
-
- ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
- &size_attr->attr);
- if (unlikely(ret))
- goto out;
- }
- ret = exofs_io_execute(ios);
-
-out:
- kfree(size_attrs);
- exofs_put_io_state(ios);
- return ret;
-}
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 4d70db1..b54c437 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -55,12 +55,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ENAMETOOLONG);
ino = exofs_inode_by_name(dir, dentry);
- inode = NULL;
- if (ino) {
- inode = exofs_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- }
+ inode = ino ? exofs_iget(dir->i_sb, ino) : NULL;
return d_splice_alias(inode, dentry);
}
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
deleted file mode 100644
index c52e988..0000000
--- a/fs/exofs/pnfs.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
- *
- * This file is part of exofs.
- *
- * exofs is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License version 2 as published by the Free
- * Software Foundation.
- *
- */
-
-/* FIXME: Remove this file once pnfs hits mainline */
-
-#ifndef __EXOFS_PNFS_H__
-#define __EXOFS_PNFS_H__
-
-#if ! defined(__PNFS_OSD_XDR_H__)
-
-enum pnfs_iomode {
- IOMODE_READ = 1,
- IOMODE_RW = 2,
- IOMODE_ANY = 3,
-};
-
-/* Layout Structure */
-enum pnfs_osd_raid_algorithm4 {
- PNFS_OSD_RAID_0 = 1,
- PNFS_OSD_RAID_4 = 2,
- PNFS_OSD_RAID_5 = 3,
- PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */
-};
-
-struct pnfs_osd_data_map {
- u32 odm_num_comps;
- u64 odm_stripe_unit;
- u32 odm_group_width;
- u32 odm_group_depth;
- u32 odm_mirror_cnt;
- u32 odm_raid_algorithm;
-};
-
-#endif /* ! defined(__PNFS_OSD_XDR_H__) */
-
-#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index c57bedd..7ed5000 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -35,11 +35,14 @@
#include <linux/parser.h>
#include <linux/vfs.h>
#include <linux/random.h>
+#include <linux/module.h>
#include <linux/exportfs.h>
#include <linux/slab.h>
#include "exofs.h"
+#define EXOFS_DBGMSG2(M...) do {} while (0)
+
/******************************************************************************
* MOUNT OPTIONS
*****************************************************************************/
@@ -208,10 +211,48 @@ static void destroy_inodecache(void)
}
/******************************************************************************
- * SUPERBLOCK FUNCTIONS
+ * Some osd helpers
*****************************************************************************/
-static const struct super_operations exofs_sops;
-static const struct export_operations exofs_export_ops;
+void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
+{
+ osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
+}
+
+static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
+ u64 offset, void *p, unsigned length)
+{
+ struct osd_request *or = osd_start_request(od, GFP_KERNEL);
+/* struct osd_sense_info osi = {.key = 0};*/
+ int ret;
+
+ if (unlikely(!or)) {
+ EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
+ return -ENOMEM;
+ }
+ ret = osd_req_read_kern(or, obj, offset, p, length);
+ if (unlikely(ret)) {
+ EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
+ goto out;
+ }
+
+ ret = osd_finalize_request(or, 0, cred, NULL);
+ if (unlikely(ret)) {
+ EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
+ goto out;
+ }
+
+ ret = osd_execute_request(or);
+ if (unlikely(ret))
+ EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
+ /* osd_req_decode_sense(or, ret); */
+
+out:
+ osd_end_request(or);
+ EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
+ "length=0x%llx dev=%p ret=>%d\n",
+ _LLU(obj->id), _LLU(offset), _LLU(length), od, ret);
+ return ret;
+}
static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
EXOFS_APAGE_SB_DATA,
@@ -223,21 +264,19 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
struct osd_attr attrs[] = {
[0] = g_attr_sb_stats,
};
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
int ret;
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+ EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
return ret;
}
- ios->cred = sbi->s_cred;
-
ios->in_attr = attrs;
ios->in_attr_len = ARRAY_SIZE(attrs);
- ret = exofs_sbi_read(ios);
+ ret = ore_read(ios);
if (unlikely(ret)) {
EXOFS_ERR("Error reading super_block stats => %d\n", ret);
goto out;
@@ -264,13 +303,13 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
}
out:
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
return ret;
}
-static void stats_done(struct exofs_io_state *ios, void *p)
+static void stats_done(struct ore_io_state *ios, void *p)
{
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
/* Good thanks nothing to do anymore */
}
@@ -280,12 +319,12 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
struct osd_attr attrs[] = {
[0] = g_attr_sb_stats,
};
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
int ret;
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+ EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
return ret;
}
@@ -293,29 +332,37 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
attrs[0].val_ptr = &sbi->s_ess;
- ios->cred = sbi->s_cred;
+
ios->done = stats_done;
ios->private = sbi;
ios->out_attr = attrs;
ios->out_attr_len = ARRAY_SIZE(attrs);
- ret = exofs_sbi_write(ios);
+ ret = ore_write(ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
- exofs_put_io_state(ios);
+ EXOFS_ERR("%s: ore_write failed.\n", __func__);
+ ore_put_io_state(ios);
}
return ret;
}
+/******************************************************************************
+ * SUPERBLOCK FUNCTIONS
+ *****************************************************************************/
+static const struct super_operations exofs_sops;
+static const struct export_operations exofs_export_ops;
+
/*
* Write the superblock to the OSD
*/
-int exofs_sync_fs(struct super_block *sb, int wait)
+static int exofs_sync_fs(struct super_block *sb, int wait)
{
struct exofs_sb_info *sbi;
struct exofs_fscb *fscb;
- struct exofs_io_state *ios;
+ struct ore_comp one_comp;
+ struct ore_components oc;
+ struct ore_io_state *ios;
int ret = -ENOMEM;
fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
@@ -331,7 +378,10 @@ int exofs_sync_fs(struct super_block *sb, int wait)
* version). Otherwise the exofs_fscb is read-only from mkfs time. All
* the writeable info is set in exofs_sbi_write_stats() above.
*/
- ret = exofs_get_io_state(&sbi->layout, &ios);
+
+ exofs_init_comps(&oc, &one_comp, sbi, EXOFS_SUPER_ID);
+
+ ret = ore_get_io_state(&sbi->layout, &oc, &ios);
if (unlikely(ret))
goto out;
@@ -345,14 +395,12 @@ int exofs_sync_fs(struct super_block *sb, int wait)
fscb->s_newfs = 0;
fscb->s_version = EXOFS_FSCB_VER;
- ios->obj.id = EXOFS_SUPER_ID;
ios->offset = 0;
ios->kern_buff = fscb;
- ios->cred = sbi->s_cred;
- ret = exofs_sbi_write(ios);
+ ret = ore_write(ios);
if (unlikely(ret))
- EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
+ EXOFS_ERR("%s: ore_write failed.\n", __func__);
else
sb->s_dirt = 0;
@@ -360,7 +408,7 @@ int exofs_sync_fs(struct super_block *sb, int wait)
unlock_super(sb);
out:
EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
kfree(fscb);
return ret;
}
@@ -382,17 +430,20 @@ static void _exofs_print_device(const char *msg, const char *dev_path,
msg, dev_path ?: "", odi->osdname, _LLU(pid));
}
-void exofs_free_sbi(struct exofs_sb_info *sbi)
+static void exofs_free_sbi(struct exofs_sb_info *sbi)
{
- while (sbi->layout.s_numdevs) {
- int i = --sbi->layout.s_numdevs;
- struct osd_dev *od = sbi->layout.s_ods[i];
+ unsigned numdevs = sbi->oc.numdevs;
+
+ while (numdevs) {
+ unsigned i = --numdevs;
+ struct osd_dev *od = ore_comp_dev(&sbi->oc, i);
if (od) {
- sbi->layout.s_ods[i] = NULL;
+ ore_comp_set_dev(&sbi->oc, i, NULL);
osduld_put_device(od);
}
}
+ kfree(sbi->oc.ods);
kfree(sbi);
}
@@ -419,8 +470,8 @@ static void exofs_put_super(struct super_block *sb)
msecs_to_jiffies(100));
}
- _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
- sbi->layout.s_pid);
+ _exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0),
+ sbi->one_comp.obj.partition);
bdi_destroy(&sbi->bdi);
exofs_free_sbi(sbi);
@@ -430,81 +481,34 @@ static void exofs_put_super(struct super_block *sb)
static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
struct exofs_device_table *dt)
{
- u64 stripe_length;
+ int ret;
- sbi->data_map.odm_num_comps =
- le32_to_cpu(dt->dt_data_map.cb_num_comps);
- sbi->data_map.odm_stripe_unit =
+ sbi->layout.stripe_unit =
le64_to_cpu(dt->dt_data_map.cb_stripe_unit);
- sbi->data_map.odm_group_width =
+ sbi->layout.group_width =
le32_to_cpu(dt->dt_data_map.cb_group_width);
- sbi->data_map.odm_group_depth =
+ sbi->layout.group_depth =
le32_to_cpu(dt->dt_data_map.cb_group_depth);
- sbi->data_map.odm_mirror_cnt =
- le32_to_cpu(dt->dt_data_map.cb_mirror_cnt);
- sbi->data_map.odm_raid_algorithm =
+ sbi->layout.mirrors_p1 =
+ le32_to_cpu(dt->dt_data_map.cb_mirror_cnt) + 1;
+ sbi->layout.raid_algorithm =
le32_to_cpu(dt->dt_data_map.cb_raid_algorithm);
-/* FIXME: Only raid0 for now. if not so, do not mount */
- if (sbi->data_map.odm_num_comps != numdevs) {
- EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n",
- sbi->data_map.odm_num_comps, numdevs);
- return -EINVAL;
- }
- if (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) {
- EXOFS_ERR("Only RAID_0 for now\n");
- return -EINVAL;
- }
- if (0 != (numdevs % (sbi->data_map.odm_mirror_cnt + 1))) {
- EXOFS_ERR("Data Map wrong, numdevs=%d mirrors=%d\n",
- numdevs, sbi->data_map.odm_mirror_cnt);
- return -EINVAL;
- }
-
- if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) {
- EXOFS_ERR("Stripe Unit(0x%llx)"
- " must be Multples of PAGE_SIZE(0x%lx)\n",
- _LLU(sbi->data_map.odm_stripe_unit), PAGE_SIZE);
- return -EINVAL;
- }
-
- sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit;
- sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1;
-
- if (sbi->data_map.odm_group_width) {
- sbi->layout.group_width = sbi->data_map.odm_group_width;
- sbi->layout.group_depth = sbi->data_map.odm_group_depth;
- if (!sbi->layout.group_depth) {
- EXOFS_ERR("group_depth == 0 && group_width != 0\n");
- return -EINVAL;
- }
- sbi->layout.group_count = sbi->data_map.odm_num_comps /
- sbi->layout.mirrors_p1 /
- sbi->data_map.odm_group_width;
- } else {
- if (sbi->data_map.odm_group_depth) {
- printk(KERN_NOTICE "Warning: group_depth ignored "
- "group_width == 0 && group_depth == %d\n",
- sbi->data_map.odm_group_depth);
- sbi->data_map.odm_group_depth = 0;
- }
- sbi->layout.group_width = sbi->data_map.odm_num_comps /
- sbi->layout.mirrors_p1;
- sbi->layout.group_depth = -1;
- sbi->layout.group_count = 1;
- }
-
- stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit;
- if (stripe_length >= (1ULL << 32)) {
- EXOFS_ERR("Total Stripe length(0x%llx)"
- " >= 32bit is not supported\n", _LLU(stripe_length));
- return -EINVAL;
- }
-
- return 0;
+ ret = ore_verify_layout(numdevs, &sbi->layout);
+
+ EXOFS_DBGMSG("exofs: layout: "
+ "num_comps=%u stripe_unit=0x%x group_width=%u "
+ "group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n",
+ numdevs,
+ sbi->layout.stripe_unit,
+ sbi->layout.group_width,
+ _LLU(sbi->layout.group_depth),
+ sbi->layout.mirrors_p1,
+ sbi->layout.raid_algorithm);
+ return ret;
}
-static unsigned __ra_pages(struct exofs_layout *layout)
+static unsigned __ra_pages(struct ore_layout *layout)
{
const unsigned _MIN_RA = 32; /* min 128K read-ahead */
unsigned ra_pages = layout->group_width * layout->stripe_unit /
@@ -547,14 +551,40 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
return !(odi->systemid_len || odi->osdname_len);
}
-static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
+int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs,
+ struct exofs_dev **peds)
+{
+ struct __alloc_ore_devs_and_exofs_devs {
+ /* Twice bigger table: See exofs_init_comps() and comment at
+ * exofs_read_lookup_dev_table()
+ */
+ struct ore_dev *oreds[numdevs * 2 - 1];
+ struct exofs_dev eds[numdevs];
+ } *aoded;
+ struct exofs_dev *eds;
+ unsigned i;
+
+ aoded = kzalloc(sizeof(*aoded), GFP_KERNEL);
+ if (unlikely(!aoded)) {
+ EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
+ numdevs);
+ return -ENOMEM;
+ }
+
+ sbi->oc.ods = aoded->oreds;
+ *peds = eds = aoded->eds;
+ for (i = 0; i < numdevs; ++i)
+ aoded->oreds[i] = &eds[i].ored;
+ return 0;
+}
+
+static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
+ struct osd_dev *fscb_od,
unsigned table_count)
{
- struct exofs_sb_info *sbi = *psbi;
- struct osd_dev *fscb_od;
- struct osd_obj_id obj = {.partition = sbi->layout.s_pid,
- .id = EXOFS_DEVTABLE_ID};
+ struct ore_comp comp;
struct exofs_device_table *dt;
+ struct exofs_dev *eds;
unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
sizeof(*dt);
unsigned numdevs, i;
@@ -567,10 +597,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
return -ENOMEM;
}
- fscb_od = sbi->layout.s_ods[0];
- sbi->layout.s_ods[0] = NULL;
- sbi->layout.s_numdevs = 0;
- ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes);
+ sbi->oc.numdevs = 0;
+
+ comp.obj.partition = sbi->one_comp.obj.partition;
+ comp.obj.id = EXOFS_DEVTABLE_ID;
+ exofs_make_credential(comp.cred, &comp.obj);
+
+ ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt,
+ table_bytes);
if (unlikely(ret)) {
EXOFS_ERR("ERROR: reading device table\n");
goto out;
@@ -587,18 +621,16 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
if (unlikely(ret))
goto out;
- if (likely(numdevs > 1)) {
- unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]);
-
- sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL);
- if (unlikely(!sbi)) {
- ret = -ENOMEM;
- goto out;
- }
- memset(&sbi->layout.s_ods[1], 0,
- size - sizeof(sbi->layout.s_ods[0]));
- *psbi = sbi;
- }
+ ret = __alloc_dev_table(sbi, numdevs, &eds);
+ if (unlikely(ret))
+ goto out;
+ /* exofs round-robins the device table view according to inode
+ * number. We hold a: twice bigger table hence inodes can point
+ * to any device and have a sequential view of the table
+ * starting at this device. See exofs_init_comps()
+ */
+ memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0],
+ (numdevs - 1) * sizeof(sbi->oc.ods[0]));
for (i = 0; i < numdevs; i++) {
struct exofs_fscb fscb;
@@ -614,13 +646,16 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n",
i, odi.osdname);
+ /* the exofs id is currently the table index */
+ eds[i].did = i;
+
/* On all devices the device table is identical. The user can
* specify any one of the participating devices on the command
* line. We always keep them in device-table order.
*/
if (fscb_od && osduld_device_same(fscb_od, &odi)) {
- sbi->layout.s_ods[i] = fscb_od;
- ++sbi->layout.s_numdevs;
+ eds[i].ored.od = fscb_od;
+ ++sbi->oc.numdevs;
fscb_od = NULL;
continue;
}
@@ -633,13 +668,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
goto out;
}
- sbi->layout.s_ods[i] = od;
- ++sbi->layout.s_numdevs;
+ eds[i].ored.od = od;
+ ++sbi->oc.numdevs;
/* Read the fscb of the other devices to make sure the FS
* partition is there.
*/
- ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb,
+ ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb,
sizeof(fscb));
if (unlikely(ret)) {
EXOFS_ERR("ERROR: Malformed participating device "
@@ -656,13 +691,11 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
out:
kfree(dt);
- if (unlikely(!ret && fscb_od)) {
- EXOFS_ERR(
- "ERROR: Bad device-table container device not present\n");
- osduld_put_device(fscb_od);
- ret = -EINVAL;
+ if (unlikely(fscb_od && !ret)) {
+ EXOFS_ERR("ERROR: Bad device-table container device not present\n");
+ osduld_put_device(fscb_od);
+ return -EINVAL;
}
-
return ret;
}
@@ -676,7 +709,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
struct exofs_sb_info *sbi; /*extended info */
struct osd_dev *od; /* Master device */
struct exofs_fscb fscb; /*on-disk superblock info */
- struct osd_obj_id obj;
+ struct ore_comp comp;
unsigned table_count;
int ret;
@@ -684,10 +717,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
if (!sbi)
return -ENOMEM;
- ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
- if (ret)
- goto free_bdi;
-
/* use mount options to fill superblock */
if (opts->is_osdname) {
struct osd_dev_info odi = {.systemid_len = 0};
@@ -695,6 +724,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
odi.osdname_len = strlen(opts->dev_name);
odi.osdname = (u8 *)opts->dev_name;
od = osduld_info_lookup(&odi);
+ kfree(opts->dev_name);
+ opts->dev_name = NULL;
} else {
od = osduld_path_lookup(opts->dev_name);
}
@@ -709,11 +740,14 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
sbi->layout.group_width = 1;
sbi->layout.group_depth = -1;
sbi->layout.group_count = 1;
- sbi->layout.s_ods[0] = od;
- sbi->layout.s_numdevs = 1;
- sbi->layout.s_pid = opts->pid;
sbi->s_timeout = opts->timeout;
+ sbi->one_comp.obj.partition = opts->pid;
+ sbi->one_comp.obj.id = 0;
+ exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj);
+ sbi->oc.single_comp = EC_SINGLE_COMP;
+ sbi->oc.comps = &sbi->one_comp;
+
/* fill in some other data by hand */
memset(sb->s_id, 0, sizeof(sb->s_id));
strcpy(sb->s_id, "exofs");
@@ -724,11 +758,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_bdev = NULL;
sb->s_dev = 0;
- obj.partition = sbi->layout.s_pid;
- obj.id = EXOFS_SUPER_ID;
- exofs_make_credential(sbi->s_cred, &obj);
+ comp.obj.partition = sbi->one_comp.obj.partition;
+ comp.obj.id = EXOFS_SUPER_ID;
+ exofs_make_credential(comp.cred, &comp.obj);
- ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb));
+ ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb));
if (unlikely(ret))
goto free_sbi;
@@ -757,9 +791,18 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
table_count = le64_to_cpu(fscb.s_dev_table_count);
if (table_count) {
- ret = exofs_read_lookup_dev_table(&sbi, table_count);
+ ret = exofs_read_lookup_dev_table(sbi, od, table_count);
+ if (unlikely(ret))
+ goto free_sbi;
+ } else {
+ struct exofs_dev *eds;
+
+ ret = __alloc_dev_table(sbi, 1, &eds);
if (unlikely(ret))
goto free_sbi;
+
+ ore_comp_set_dev(&sbi->oc, 0, od);
+ sbi->oc.numdevs = 1;
}
__sbi_read_stats(sbi);
@@ -793,20 +836,21 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
goto free_sbi;
}
- _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
- sbi->layout.s_pid);
- if (opts->is_osdname)
- kfree(opts->dev_name);
+ ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
+ if (ret) {
+ EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
+ goto free_sbi;
+ }
+
+ _exofs_print_device("Mounting", opts->dev_name,
+ ore_comp_dev(&sbi->oc, 0),
+ sbi->one_comp.obj.partition);
return 0;
free_sbi:
- bdi_destroy(&sbi->bdi);
-free_bdi:
EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
- opts->dev_name, sbi->layout.s_pid, ret);
+ opts->dev_name, sbi->one_comp.obj.partition, ret);
exofs_free_sbi(sbi);
- if (opts->is_osdname)
- kfree(opts->dev_name);
return ret;
}
@@ -837,7 +881,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct exofs_sb_info *sbi = sb->s_fs_info;
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
struct osd_attr attrs[] = {
ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
@@ -846,21 +890,18 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
};
uint64_t capacity = ULLONG_MAX;
uint64_t used = ULLONG_MAX;
- uint8_t cred_a[OSD_CAP_LEN];
int ret;
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios);
if (ret) {
- EXOFS_DBGMSG("exofs_get_io_state failed.\n");
+ EXOFS_DBGMSG("ore_get_io_state failed.\n");
return ret;
}
- exofs_make_credential(cred_a, &ios->obj);
- ios->cred = sbi->s_cred;
ios->in_attr = attrs;
ios->in_attr_len = ARRAY_SIZE(attrs);
- ret = exofs_sbi_read(ios);
+ ret = ore_read(ios);
if (unlikely(ret))
goto out;
@@ -889,7 +930,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = EXOFS_NAME_LEN;
out:
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
return ret;
}
@@ -908,7 +949,7 @@ static const struct super_operations exofs_sops = {
* EXPORT OPERATIONS
*****************************************************************************/
-struct dentry *exofs_get_parent(struct dentry *child)
+static struct dentry *exofs_get_parent(struct dentry *child)
{
unsigned long ino = exofs_parent_ino(child);