aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-10-18 01:09:09 +0200
committerFrederic Weisbecker <fweisbec@gmail.com>2009-10-18 01:12:33 +0200
commit0f8f86c7bdd1c954fbe153af437a0d91a6c5721a (patch)
tree94a8d419a470a4f9852ca397bb9bbe48db92ff5c /block
parentdca2d6ac09d9ef59ff46820d4f0c94b08a671202 (diff)
parentf39cdf25bf77219676ec5360980ac40b1a7e144a (diff)
downloadkernel_samsung_smdk4412-0f8f86c7bdd1c954fbe153af437a0d91a6c5721a.zip
kernel_samsung_smdk4412-0f8f86c7bdd1c954fbe153af437a0d91a6c5721a.tar.gz
kernel_samsung_smdk4412-0f8f86c7bdd1c954fbe153af437a0d91a6c5721a.tar.bz2
Merge commit 'perf/core' into perf/hw-breakpoint
Conflicts: kernel/Makefile kernel/trace/Makefile kernel/trace/trace.h samples/Makefile Merge reason: We need to be uptodate with the perf events development branch because we plan to rewrite the breakpoints API on top of perf events.
Diffstat (limited to 'block')
-rw-r--r--block/as-iosched.c10
-rw-r--r--block/blk-barrier.c45
-rw-r--r--block/blk-core.c9
-rw-r--r--block/blk-settings.c34
-rw-r--r--block/blk-sysfs.c11
-rw-r--r--block/blk-tag.c2
-rw-r--r--block/bsg.c4
-rw-r--r--block/cfq-iosched.c252
-rw-r--r--block/compat_ioctl.c13
-rw-r--r--block/elevator.c4
-rw-r--r--block/genhd.c10
-rw-r--r--block/ioctl.c17
12 files changed, 258 insertions, 153 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 7a12cf6..ce8ba57 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -146,7 +146,7 @@ enum arq_state {
#define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2)
#define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state)
-static DEFINE_PER_CPU(unsigned long, ioc_count);
+static DEFINE_PER_CPU(unsigned long, as_ioc_count);
static struct completion *ioc_gone;
static DEFINE_SPINLOCK(ioc_gone_lock);
@@ -161,7 +161,7 @@ static void as_antic_stop(struct as_data *ad);
static void free_as_io_context(struct as_io_context *aic)
{
kfree(aic);
- elv_ioc_count_dec(ioc_count);
+ elv_ioc_count_dec(as_ioc_count);
if (ioc_gone) {
/*
* AS scheduler is exiting, grab exit lock and check
@@ -169,7 +169,7 @@ static void free_as_io_context(struct as_io_context *aic)
* complete ioc_gone and set it back to NULL.
*/
spin_lock(&ioc_gone_lock);
- if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+ if (ioc_gone && !elv_ioc_count_read(as_ioc_count)) {
complete(ioc_gone);
ioc_gone = NULL;
}
@@ -211,7 +211,7 @@ static struct as_io_context *alloc_as_io_context(void)
ret->seek_total = 0;
ret->seek_samples = 0;
ret->seek_mean = 0;
- elv_ioc_count_inc(ioc_count);
+ elv_ioc_count_inc(as_ioc_count);
}
return ret;
@@ -1507,7 +1507,7 @@ static void __exit as_exit(void)
ioc_gone = &all_gone;
/* ioc_gone's update must be visible before reading ioc_count */
smp_wmb();
- if (elv_ioc_count_read(ioc_count))
+ if (elv_ioc_count_read(as_ioc_count))
wait_for_completion(&all_gone);
synchronize_rcu();
}
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6593ab3..8873b9b 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
if (bio->bi_private)
complete(bio->bi_private);
+ __free_page(bio_page(bio));
bio_put(bio);
}
@@ -372,30 +373,50 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
struct request_queue *q = bdev_get_queue(bdev);
int type = flags & DISCARD_FL_BARRIER ?
DISCARD_BARRIER : DISCARD_NOBARRIER;
+ struct bio *bio;
+ struct page *page;
int ret = 0;
if (!q)
return -ENXIO;
- if (!q->prepare_discard_fn)
+ if (!blk_queue_discard(q))
return -EOPNOTSUPP;
while (nr_sects && !ret) {
- struct bio *bio = bio_alloc(gfp_mask, 0);
- if (!bio)
- return -ENOMEM;
+ unsigned int sector_size = q->limits.logical_block_size;
+ unsigned int max_discard_sectors =
+ min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+ bio = bio_alloc(gfp_mask, 1);
+ if (!bio)
+ goto out;
+ bio->bi_sector = sector;
bio->bi_end_io = blkdev_discard_end_io;
bio->bi_bdev = bdev;
if (flags & DISCARD_FL_WAIT)
bio->bi_private = &wait;
- bio->bi_sector = sector;
+ /*
+ * Add a zeroed one-sector payload as that's what
+ * our current implementations need. If we'll ever need
+ * more the interface will need revisiting.
+ */
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ goto out_free_bio;
+ if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+ goto out_free_page;
- if (nr_sects > queue_max_hw_sectors(q)) {
- bio->bi_size = queue_max_hw_sectors(q) << 9;
- nr_sects -= queue_max_hw_sectors(q);
- sector += queue_max_hw_sectors(q);
+ /*
+ * And override the bio size - the way discard works we
+ * touch many more blocks on disk than the actual payload
+ * length.
+ */
+ if (nr_sects > max_discard_sectors) {
+ bio->bi_size = max_discard_sectors << 9;
+ nr_sects -= max_discard_sectors;
+ sector += max_discard_sectors;
} else {
bio->bi_size = nr_sects << 9;
nr_sects = 0;
@@ -414,5 +435,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bio_put(bio);
}
return ret;
+out_free_page:
+ __free_page(page);
+out_free_bio:
+ bio_put(bio);
+out:
+ return -ENOMEM;
}
EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index 8135228..ac0fa10 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
#include "blk.h"
EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
static int __make_request(struct request_queue *q, struct bio *bio);
@@ -1029,7 +1030,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
if (now == part->stamp)
return;
- if (part->in_flight) {
+ if (part_in_flight(part)) {
__part_stat_add(cpu, part, time_in_queue,
part_in_flight(part) * (now - part->stamp));
__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
@@ -1124,7 +1125,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
req->cmd_flags |= REQ_DISCARD;
if (bio_rw_flagged(bio, BIO_RW_BARRIER))
req->cmd_flags |= REQ_SOFTBARRIER;
- req->q->prepare_discard_fn(req->q, req);
} else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
req->cmd_flags |= REQ_HARDBARRIER;
@@ -1437,7 +1437,8 @@ static inline void __generic_make_request(struct bio *bio)
goto end_io;
}
- if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
+ if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+ nr_sectors > queue_max_hw_sectors(q))) {
printk(KERN_ERR "bio too big device %s (%u > %u)\n",
bdevname(bio->bi_bdev, b),
bio_sectors(bio),
@@ -1470,7 +1471,7 @@ static inline void __generic_make_request(struct bio *bio)
goto end_io;
if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
- !q->prepare_discard_fn) {
+ !blk_queue_discard(q)) {
err = -EOPNOTSUPP;
goto end_io;
}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 83413ff..66d4aa8 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -34,23 +34,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
EXPORT_SYMBOL(blk_queue_prep_rq);
/**
- * blk_queue_set_discard - set a discard_sectors function for queue
- * @q: queue
- * @dfn: prepare_discard function
- *
- * It's possible for a queue to register a discard callback which is used
- * to transform a discard request into the appropriate type for the
- * hardware. If none is registered, then discard requests are failed
- * with %EOPNOTSUPP.
- *
- */
-void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
-{
- q->prepare_discard_fn = dfn;
-}
-EXPORT_SYMBOL(blk_queue_set_discard);
-
-/**
* blk_queue_merge_bvec - set a merge_bvec function for queue
* @q: queue
* @mbfn: merge_bvec_fn
@@ -111,7 +94,9 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->max_hw_segments = MAX_HW_SEGMENTS;
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = MAX_SEGMENT_SIZE;
- lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
+ lim->max_sectors = BLK_DEF_MAX_SECTORS;
+ lim->max_hw_sectors = INT_MAX;
+ lim->max_discard_sectors = SAFE_MAX_SECTORS;
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
lim->alignment_offset = 0;
@@ -164,6 +149,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
q->unplug_timer.data = (unsigned long)q;
blk_set_default_limits(&q->limits);
+ blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
/*
* If the caller didn't supply a lock, fall back to our embedded
@@ -254,6 +240,18 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
/**
+ * blk_queue_max_discard_sectors - set max sectors for a single discard
+ * @q: the request queue for the device
+ * @max_discard_sectors: maximum number of sectors to discard
+ **/
+void blk_queue_max_discard_sectors(struct request_queue *q,
+ unsigned int max_discard_sectors)
+{
+ q->limits.max_discard_sectors = max_discard_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+
+/**
* blk_queue_max_phys_segments - set max phys segments for a request for this queue
* @q: the request queue for the device
* @max_segments: max number of segments
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b78c9c3..8a6d81a 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -452,6 +452,7 @@ int blk_register_queue(struct gendisk *disk)
if (ret) {
kobject_uevent(&q->kobj, KOBJ_REMOVE);
kobject_del(&q->kobj);
+ blk_trace_remove_sysfs(disk_to_dev(disk));
return ret;
}
@@ -465,11 +466,11 @@ void blk_unregister_queue(struct gendisk *disk)
if (WARN_ON(!q))
return;
- if (q->request_fn) {
+ if (q->request_fn)
elv_unregister_queue(q);
- kobject_uevent(&q->kobj, KOBJ_REMOVE);
- kobject_del(&q->kobj);
- kobject_put(&disk_to_dev(disk)->kobj);
- }
+ kobject_uevent(&q->kobj, KOBJ_REMOVE);
+ kobject_del(&q->kobj);
+ blk_trace_remove_sysfs(disk_to_dev(disk));
+ kobject_put(&disk_to_dev(disk)->kobj);
}
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 2e5cfeb..6b0f52c 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -359,7 +359,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
max_depth -= 2;
if (!max_depth)
max_depth = 1;
- if (q->in_flight[0] > max_depth)
+ if (q->in_flight[BLK_RW_ASYNC] > max_depth)
return 1;
}
diff --git a/block/bsg.c b/block/bsg.c
index 5f184bb..0676301 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -1062,7 +1062,7 @@ EXPORT_SYMBOL_GPL(bsg_register_queue);
static struct cdev bsg_cdev;
-static char *bsg_nodename(struct device *dev)
+static char *bsg_devnode(struct device *dev, mode_t *mode)
{
return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
}
@@ -1087,7 +1087,7 @@ static int __init bsg_init(void)
ret = PTR_ERR(bsg_class);
goto destroy_kmemcache;
}
- bsg_class->nodename = bsg_nodename;
+ bsg_class->devnode = bsg_devnode;
ret = alloc_chrdev_region(&devid, 0, BSG_MAX_DEVS, "bsg");
if (ret)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 0e3814b..069a610 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -48,7 +48,7 @@ static int cfq_slice_idle = HZ / 125;
static struct kmem_cache *cfq_pool;
static struct kmem_cache *cfq_ioc_pool;
-static DEFINE_PER_CPU(unsigned long, ioc_count);
+static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
static struct completion *ioc_gone;
static DEFINE_SPINLOCK(ioc_gone_lock);
@@ -173,6 +173,7 @@ struct cfq_data {
unsigned int cfq_slice[2];
unsigned int cfq_slice_async_rq;
unsigned int cfq_slice_idle;
+ unsigned int cfq_latency;
struct list_head cic_list;
@@ -180,6 +181,8 @@ struct cfq_data {
* Fallback dummy cfqq for extreme OOM conditions
*/
struct cfq_queue oom_cfqq;
+
+ unsigned long last_end_sync_rq;
};
enum cfqq_state_flags {
@@ -227,7 +230,7 @@ CFQ_CFQQ_FNS(coop);
blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
static void cfq_dispatch_insert(struct request_queue *, struct request *);
-static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
+static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
struct io_context *, gfp_t);
static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
struct io_context *);
@@ -238,27 +241,24 @@ static inline int rq_in_driver(struct cfq_data *cfqd)
}
static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
- int is_sync)
+ bool is_sync)
{
- return cic->cfqq[!!is_sync];
+ return cic->cfqq[is_sync];
}
static inline void cic_set_cfqq(struct cfq_io_context *cic,
- struct cfq_queue *cfqq, int is_sync)
+ struct cfq_queue *cfqq, bool is_sync)
{
- cic->cfqq[!!is_sync] = cfqq;
+ cic->cfqq[is_sync] = cfqq;
}
/*
* We regard a request as SYNC, if it's either a read or has the SYNC bit
* set (in which case it could also be direct WRITE).
*/
-static inline int cfq_bio_sync(struct bio *bio)
+static inline bool cfq_bio_sync(struct bio *bio)
{
- if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO))
- return 1;
-
- return 0;
+ return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO);
}
/*
@@ -285,7 +285,7 @@ static int cfq_queue_empty(struct request_queue *q)
* if a queue is marked sync and has sync io queued. A sync queue with async
* io only, should not get full sync slice length.
*/
-static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync,
+static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync,
unsigned short prio)
{
const int base_slice = cfqd->cfq_slice[sync];
@@ -313,7 +313,7 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* isn't valid until the first request from the dispatch is activated
* and the slice time set.
*/
-static inline int cfq_slice_used(struct cfq_queue *cfqq)
+static inline bool cfq_slice_used(struct cfq_queue *cfqq)
{
if (cfq_cfqq_slice_new(cfqq))
return 0;
@@ -488,7 +488,7 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
* we will service the queues.
*/
static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- int add_front)
+ bool add_front)
{
struct rb_node **p, *parent;
struct cfq_queue *__cfqq;
@@ -504,11 +504,20 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
} else
rb_key += jiffies;
} else if (!add_front) {
+ /*
+ * Get our rb key offset. Subtract any residual slice
+ * value carried from last service. A negative resid
+ * count indicates slice overrun, and this should position
+ * the next service time further away in the tree.
+ */
rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
- rb_key += cfqq->slice_resid;
+ rb_key -= cfqq->slice_resid;
cfqq->slice_resid = 0;
- } else
- rb_key = 0;
+ } else {
+ rb_key = -HZ;
+ __cfqq = cfq_rb_first(&cfqd->service_tree);
+ rb_key += __cfqq ? __cfqq->rb_key : jiffies;
+ }
if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
/*
@@ -542,7 +551,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
n = &(*p)->rb_left;
else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq))
n = &(*p)->rb_right;
- else if (rb_key < __cfqq->rb_key)
+ else if (time_before(rb_key, __cfqq->rb_key))
n = &(*p)->rb_left;
else
n = &(*p)->rb_right;
@@ -822,8 +831,10 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
* reposition in fifo if next is older than rq
*/
if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
- time_before(next->start_time, rq->start_time))
+ time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
list_move(&rq->queuelist, &next->queuelist);
+ rq_set_fifo_time(rq, rq_fifo_time(next));
+ }
cfq_remove_request(next);
}
@@ -839,7 +850,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
* Disallow merge of a sync bio into an async request.
*/
if (cfq_bio_sync(bio) && !rq_is_sync(rq))
- return 0;
+ return false;
/*
* Lookup the cfqq that this bio will be queued with. Allow
@@ -847,13 +858,10 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
*/
cic = cfq_cic_lookup(cfqd, current->io_context);
if (!cic)
- return 0;
+ return false;
cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
- if (cfqq == RQ_CFQQ(rq))
- return 1;
-
- return 0;
+ return cfqq == RQ_CFQQ(rq);
}
static void __cfq_set_active_queue(struct cfq_data *cfqd,
@@ -881,7 +889,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
*/
static void
__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- int timed_out)
+ bool timed_out)
{
cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
@@ -909,7 +917,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
}
}
-static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out)
+static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
{
struct cfq_queue *cfqq = cfqd->active_queue;
@@ -1021,7 +1029,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
*/
static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
struct cfq_queue *cur_cfqq,
- int probe)
+ bool probe)
{
struct cfq_queue *cfqq;
@@ -1085,6 +1093,15 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
if (!cic || !atomic_read(&cic->ioc->nr_tasks))
return;
+ /*
+ * If our average think time is larger than the remaining time
+ * slice, then don't idle. This avoids overrunning the allotted
+ * time slice.
+ */
+ if (sample_valid(cic->ttime_samples) &&
+ (cfqq->slice_end - jiffies < cic->ttime_mean))
+ return;
+
cfq_mark_cfqq_wait_request(cfqq);
/*
@@ -1124,9 +1141,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
*/
static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
{
- struct cfq_data *cfqd = cfqq->cfqd;
- struct request *rq;
- int fifo;
+ struct request *rq = NULL;
if (cfq_cfqq_fifo_expire(cfqq))
return NULL;
@@ -1136,13 +1151,11 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
if (list_empty(&cfqq->fifo))
return NULL;
- fifo = cfq_cfqq_sync(cfqq);
rq = rq_entry_fifo(cfqq->fifo.next);
-
- if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
+ if (time_before(jiffies, rq_fifo_time(rq)))
rq = NULL;
- cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq);
+ cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
return rq;
}
@@ -1243,16 +1256,83 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
return dispatched;
}
+static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+ unsigned int max_dispatch;
+
+ /*
+ * Drain async requests before we start sync IO
+ */
+ if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+ return false;
+
+ /*
+ * If this is an async queue and we have sync IO in flight, let it wait
+ */
+ if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
+ return false;
+
+ max_dispatch = cfqd->cfq_quantum;
+ if (cfq_class_idle(cfqq))
+ max_dispatch = 1;
+
+ /*
+ * Does this cfqq already have too much IO in flight?
+ */
+ if (cfqq->dispatched >= max_dispatch) {
+ /*
+ * idle queue must always only have a single IO in flight
+ */
+ if (cfq_class_idle(cfqq))
+ return false;
+
+ /*
+ * We have other queues, don't allow more IO from this one
+ */
+ if (cfqd->busy_queues > 1)
+ return false;
+
+ /*
+ * Sole queue user, allow bigger slice
+ */
+ max_dispatch *= 4;
+ }
+
+ /*
+ * Async queues must wait a bit before being allowed dispatch.
+ * We also ramp up the dispatch depth gradually for async IO,
+ * based on the last sync IO we serviced
+ */
+ if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
+ unsigned long last_sync = jiffies - cfqd->last_end_sync_rq;
+ unsigned int depth;
+
+ depth = last_sync / cfqd->cfq_slice[1];
+ if (!depth && !cfqq->dispatched)
+ depth = 1;
+ if (depth < max_dispatch)
+ max_dispatch = depth;
+ }
+
+ /*
+ * If we're below the current max, allow a dispatch
+ */
+ return cfqq->dispatched < max_dispatch;
+}
+
/*
* Dispatch a request from cfqq, moving them to the request queue
* dispatch list.
*/
-static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
struct request *rq;
BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
+ if (!cfq_may_dispatch(cfqd, cfqq))
+ return false;
+
/*
* follow expired path, else get first next available
*/
@@ -1271,6 +1351,8 @@ static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
atomic_long_inc(&cic->ioc->refcount);
cfqd->active_cic = cic;
}
+
+ return true;
}
/*
@@ -1281,7 +1363,6 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_queue *cfqq;
- unsigned int max_dispatch;
if (!cfqd->busy_queues)
return 0;
@@ -1294,48 +1375,11 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
return 0;
/*
- * Drain async requests before we start sync IO
+ * Dispatch a request from this cfqq, if it is allowed
*/
- if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+ if (!cfq_dispatch_request(cfqd, cfqq))
return 0;
- /*
- * If this is an async queue and we have sync IO in flight, let it wait
- */
- if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
- return 0;
-
- max_dispatch = cfqd->cfq_quantum;
- if (cfq_class_idle(cfqq))
- max_dispatch = 1;
-
- /*
- * Does this cfqq already have too much IO in flight?
- */
- if (cfqq->dispatched >= max_dispatch) {
- /*
- * idle queue must always only have a single IO in flight
- */
- if (cfq_class_idle(cfqq))
- return 0;
-
- /*
- * We have other queues, don't allow more IO from this one
- */
- if (cfqd->busy_queues > 1)
- return 0;
-
- /*
- * we are the only queue, allow up to 4 times of 'quantum'
- */
- if (cfqq->dispatched >= 4 * max_dispatch)
- return 0;
- }
-
- /*
- * Dispatch a request from this cfqq
- */
- cfq_dispatch_request(cfqd, cfqq);
cfqq->slice_dispatch++;
cfq_clear_cfqq_must_dispatch(cfqq);
@@ -1415,7 +1459,7 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
cic = container_of(head, struct cfq_io_context, rcu_head);
kmem_cache_free(cfq_ioc_pool, cic);
- elv_ioc_count_dec(ioc_count);
+ elv_ioc_count_dec(cfq_ioc_count);
if (ioc_gone) {
/*
@@ -1424,7 +1468,7 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
* complete ioc_gone and set it back to NULL
*/
spin_lock(&ioc_gone_lock);
- if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+ if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
complete(ioc_gone);
ioc_gone = NULL;
}
@@ -1550,7 +1594,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
INIT_HLIST_NODE(&cic->cic_list);
cic->dtor = cfq_free_io_context;
cic->exit = cfq_exit_io_context;
- elv_ioc_count_inc(ioc_count);
+ elv_ioc_count_inc(cfq_ioc_count);
}
return cic;
@@ -1635,7 +1679,7 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc)
}
static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- pid_t pid, int is_sync)
+ pid_t pid, bool is_sync)
{
RB_CLEAR_NODE(&cfqq->rb_node);
RB_CLEAR_NODE(&cfqq->p_node);
@@ -1655,7 +1699,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
}
static struct cfq_queue *
-cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
+cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
struct io_context *ioc, gfp_t gfp_mask)
{
struct cfq_queue *cfqq, *new_cfqq = NULL;
@@ -1719,7 +1763,7 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
}
static struct cfq_queue *
-cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
+cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
gfp_t gfp_mask)
{
const int ioprio = task_ioprio(ioc);
@@ -1951,10 +1995,13 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
- (cfqd->hw_tag && CIC_SEEKY(cic)))
+ (!cfqd->cfq_latency && cfqd->hw_tag && CIC_SEEKY(cic)))
enable_idle = 0;
else if (sample_valid(cic->ttime_samples)) {
- if (cic->ttime_mean > cfqd->cfq_slice_idle)
+ unsigned int slice_idle = cfqd->cfq_slice_idle;
+ if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
+ slice_idle = msecs_to_jiffies(CFQ_MIN_TT);
+ if (cic->ttime_mean > slice_idle)
enable_idle = 0;
else
enable_idle = 1;
@@ -1973,7 +2020,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
* Check if new_cfqq should preempt the currently active queue. Return 0 for
* no or if we aren't sure, a 1 will cause a preempt.
*/
-static int
+static bool
cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
struct request *rq)
{
@@ -1981,48 +2028,48 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
cfqq = cfqd->active_queue;
if (!cfqq)
- return 0;
+ return false;
if (cfq_slice_used(cfqq))
- return 1;
+ return true;
if (cfq_class_idle(new_cfqq))
- return 0;
+ return false;
if (cfq_class_idle(cfqq))
- return 1;
+ return true;
/*
* if the new request is sync, but the currently running queue is
* not, let the sync request have priority.
*/
if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
- return 1;
+ return true;
/*
* So both queues are sync. Let the new request get disk time if
* it's a metadata request and the current queue is doing regular IO.
*/
if (rq_is_meta(rq) && !cfqq->meta_pending)
- return 1;
+ return false;
/*
* Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
*/
if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
- return 1;
+ return true;
if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
- return 0;
+ return false;
/*
* if this request is as-good as one we would expect from the
* current cfqq, let it preempt
*/
if (cfq_rq_close(cfqd, rq))
- return 1;
+ return true;
- return 0;
+ return false;
}
/*
@@ -2107,6 +2154,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
cfq_add_rq_rb(rq);
+ rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
list_add_tail(&rq->queuelist, &cfqq->fifo);
cfq_rq_enqueued(cfqd, cfqq, rq);
@@ -2157,8 +2205,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
if (cfq_cfqq_sync(cfqq))
cfqd->sync_flight--;
- if (sync)
+ if (sync) {
RQ_CIC(rq)->last_end_request = now;
+ cfqd->last_end_sync_rq = now;
+ }
/*
* If this is the active queue, check if it needs to be expired,
@@ -2284,7 +2334,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_io_context *cic;
const int rw = rq_data_dir(rq);
- const int is_sync = rq_is_sync(rq);
+ const bool is_sync = rq_is_sync(rq);
struct cfq_queue *cfqq;
unsigned long flags;
@@ -2480,8 +2530,9 @@ static void *cfq_init_queue(struct request_queue *q)
cfqd->cfq_slice[1] = cfq_slice_sync;
cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
cfqd->cfq_slice_idle = cfq_slice_idle;
+ cfqd->cfq_latency = 1;
cfqd->hw_tag = 1;
-
+ cfqd->last_end_sync_rq = jiffies;
return cfqd;
}
@@ -2549,6 +2600,7 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
+SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
#undef SHOW_FUNCTION
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
@@ -2580,6 +2632,7 @@ STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
UINT_MAX, 0);
+STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
#undef STORE_FUNCTION
#define CFQ_ATTR(name) \
@@ -2595,6 +2648,7 @@ static struct elv_fs_entry cfq_attrs[] = {
CFQ_ATTR(slice_async),
CFQ_ATTR(slice_async_rq),
CFQ_ATTR(slice_idle),
+ CFQ_ATTR(low_latency),
__ATTR_NULL
};
@@ -2654,7 +2708,7 @@ static void __exit cfq_exit(void)
* this also protects us from entering cfq_slab_kill() with
* pending RCU callbacks
*/
- if (elv_ioc_count_read(ioc_count))
+ if (elv_ioc_count_read(cfq_ioc_count))
wait_for_completion(&all_gone);
cfq_slab_kill();
}
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7865a34..9bd086c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -21,6 +21,11 @@ static int compat_put_int(unsigned long arg, int val)
return put_user(val, (compat_int_t __user *)compat_ptr(arg));
}
+static int compat_put_uint(unsigned long arg, unsigned int val)
+{
+ return put_user(val, (compat_uint_t __user *)compat_ptr(arg));
+}
+
static int compat_put_long(unsigned long arg, long val)
{
return put_user(val, (compat_long_t __user *)compat_ptr(arg));
@@ -734,6 +739,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
switch (cmd) {
case HDIO_GETGEO:
return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
+ case BLKPBSZGET:
+ return compat_put_uint(arg, bdev_physical_block_size(bdev));
+ case BLKIOMIN:
+ return compat_put_uint(arg, bdev_io_min(bdev));
+ case BLKIOOPT:
+ return compat_put_uint(arg, bdev_io_opt(bdev));
+ case BLKALIGNOFF:
+ return compat_put_int(arg, bdev_alignment_offset(bdev));
case BLKFLSBUF:
case BLKROSET:
case BLKDISCARD:
diff --git a/block/elevator.c b/block/elevator.c
index 1975b61..a847046 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1059,9 +1059,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
return count;
strlcpy(elevator_name, name, sizeof(elevator_name));
- strstrip(elevator_name);
-
- e = elevator_get(elevator_name);
+ e = elevator_get(strstrip(elevator_name));
if (!e) {
printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
return -EINVAL;
diff --git a/block/genhd.c b/block/genhd.c
index 5b76bf5..517e433 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -903,7 +903,7 @@ static struct attribute_group disk_attr_group = {
.attrs = disk_attrs,
};
-static struct attribute_group *disk_attr_groups[] = {
+static const struct attribute_group *disk_attr_groups[] = {
&disk_attr_group,
NULL
};
@@ -998,12 +998,12 @@ struct class block_class = {
.name = "block",
};
-static char *block_nodename(struct device *dev)
+static char *block_devnode(struct device *dev, mode_t *mode)
{
struct gendisk *disk = dev_to_disk(dev);
- if (disk->nodename)
- return disk->nodename(disk);
+ if (disk->devnode)
+ return disk->devnode(disk, mode);
return NULL;
}
@@ -1011,7 +1011,7 @@ static struct device_type disk_type = {
.name = "disk",
.groups = disk_attr_groups,
.release = disk_release,
- .nodename = block_nodename,
+ .devnode = block_devnode,
};
#ifdef CONFIG_PROC_FS
diff --git a/block/ioctl.c b/block/ioctl.c
index d3e6b58..1f4d1de 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -138,6 +138,11 @@ static int put_int(unsigned long arg, int val)
return put_user(val, (int __user *)arg);
}
+static int put_uint(unsigned long arg, unsigned int val)
+{
+ return put_user(val, (unsigned int __user *)arg);
+}
+
static int put_long(unsigned long arg, long val)
{
return put_user(val, (long __user *)arg);
@@ -263,10 +268,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
case BLKROGET:
return put_int(arg, bdev_read_only(bdev) != 0);
- case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
+ case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
return put_int(arg, block_size(bdev));
- case BLKSSZGET: /* get block device hardware sector size */
+ case BLKSSZGET: /* get block device logical block size */
return put_int(arg, bdev_logical_block_size(bdev));
+ case BLKPBSZGET: /* get block device physical block size */
+ return put_uint(arg, bdev_physical_block_size(bdev));
+ case BLKIOMIN:
+ return put_uint(arg, bdev_io_min(bdev));
+ case BLKIOOPT:
+ return put_uint(arg, bdev_io_opt(bdev));
+ case BLKALIGNOFF:
+ return put_int(arg, bdev_alignment_offset(bdev));
case BLKSECTGET:
return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
case BLKRASET: