From 3a2e9a5a2afc1a2d2c548b8987f133235cebe933 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 23 Sep 2009 21:56:00 +0800 Subject: writeback: balance_dirty_pages() shall write more than dirtied pages Some filesystem may choose to write much more than ratelimit_pages before calling balance_dirty_pages_ratelimited_nr(). So it is safer to determine number to write based on real number of dirtied pages. Otherwise it is possible that loop { btrfs_file_write(): dirty 1024 pages balance_dirty_pages(): write up to 48 pages (= ratelimit_pages * 1.5) } in which the writeback rate cannot keep up with dirty rate, and the dirty pages go all the way beyond dirty_thresh. The increased write_chunk may make the dirtier more bumpy. So filesystems shall be take care not to dirty too much at a time (eg. > 4MB) without checking the ratelimit. Signed-off-by: Wu Fengguang Acked-by: Peter Zijlstra Signed-off-by: Jens Axboe --- mm/page-writeback.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'mm') diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5f378dd5..cbd4cba 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -44,12 +44,15 @@ static long ratelimit_pages = 32; /* * When balance_dirty_pages decides that the caller needs to perform some * non-background writeback, this is how many pages it will attempt to write. - * It should be somewhat larger than RATELIMIT_PAGES to ensure that reasonably + * It should be somewhat larger than dirtied pages to ensure that reasonably * large amounts of I/O are submitted. */ -static inline long sync_writeback_pages(void) +static inline long sync_writeback_pages(unsigned long dirtied) { - return ratelimit_pages + ratelimit_pages / 2; + if (dirtied < ratelimit_pages) + dirtied = ratelimit_pages; + + return dirtied + dirtied / 2; } /* The following parameters are exported via /proc/sys/vm */ @@ -477,7 +480,8 @@ get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, * If we're over `background_thresh' then pdflush is woken to perform some * writeout. */ -static void balance_dirty_pages(struct address_space *mapping) +static void balance_dirty_pages(struct address_space *mapping, + unsigned long write_chunk) { long nr_reclaimable, bdi_nr_reclaimable; long nr_writeback, bdi_nr_writeback; @@ -485,7 +489,6 @@ static void balance_dirty_pages(struct address_space *mapping) unsigned long dirty_thresh; unsigned long bdi_thresh; unsigned long pages_written = 0; - unsigned long write_chunk = sync_writeback_pages(); unsigned long pause = 1; struct backing_dev_info *bdi = mapping->backing_dev_info; @@ -640,9 +643,10 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, p = &__get_cpu_var(bdp_ratelimits); *p += nr_pages_dirtied; if (unlikely(*p >= ratelimit)) { + ratelimit = sync_writeback_pages(*p); *p = 0; preempt_enable(); - balance_dirty_pages(mapping); + balance_dirty_pages(mapping, ratelimit); return; } preempt_enable(); -- cgit v1.1 From d3ddec7635b6fb37cb49e3553bdeea59642be653 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 23 Sep 2009 20:33:40 +0800 Subject: writeback: stop background writeback when below background threshold Treat bdi_start_writeback(0) as a special request to do background write, and stop such work when we are below the background dirty threshold. Also simplify the (nr_pages <= 0) checks. Since we already pass in nr_pages=LONG_MAX for WB_SYNC_ALL and background writes, we don't need to worry about it being decreased to zero. Reported-by: Richard Kennedy CC: Jan Kara Acked-by: Peter Zijlstra Signed-off-by: Wu Fengguang Signed-off-by: Jens Axboe --- mm/page-writeback.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cbd4cba..3c78fc3 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -593,10 +593,10 @@ static void balance_dirty_pages(struct address_space *mapping, * background_thresh, to keep the amount of dirty memory low. */ if ((laptop_mode && pages_written) || - (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY) - + global_page_state(NR_UNSTABLE_NFS)) + (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS)) > background_thresh))) - bdi_start_writeback(bdi, nr_writeback); + bdi_start_writeback(bdi, 0); } void set_page_dirty_balance(struct page *page, int page_mkwrite) -- cgit v1.1 From 5b0830cb9085f4b69f9d57d7f3aaff322ffbec26 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 23 Sep 2009 19:37:09 +0200 Subject: writeback: get rid to incorrect references to pdflush in comments Signed-off-by: Jens Axboe --- mm/page-writeback.c | 8 ++++---- mm/shmem.c | 5 +++-- mm/vmscan.c | 8 ++++---- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'mm') diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3c78fc3..8bef063 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -58,7 +58,7 @@ static inline long sync_writeback_pages(unsigned long dirtied) /* The following parameters are exported via /proc/sys/vm */ /* - * Start background writeback (via pdflush) at this percentage + * Start background writeback (via writeback threads) at this percentage */ int dirty_background_ratio = 10; @@ -477,8 +477,8 @@ get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, * balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force * the caller to perform writeback if the system is over `vm_dirty_ratio'. - * If we're over `background_thresh' then pdflush is woken to perform some - * writeout. + * If we're over `background_thresh' then the writeback threads are woken to + * perform some writeout. */ static void balance_dirty_pages(struct address_space *mapping, unsigned long write_chunk) @@ -582,7 +582,7 @@ static void balance_dirty_pages(struct address_space *mapping, bdi->dirty_exceeded = 0; if (writeback_in_progress(bdi)) - return; /* pdflush is already working this queue */ + return; /* * In laptop mode, we wait until hitting the higher threshold before diff --git a/mm/shmem.c b/mm/shmem.c index b206a7a..aa94811 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1046,8 +1046,9 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) * sync from ever calling shmem_writepage; but a stacking filesystem * may use the ->writepage of its underlying filesystem, in which case * tmpfs should write out to swap only in response to memory pressure, - * and not for pdflush or sync. However, in those cases, we do still - * want to check if there's a redundant swappage to be discarded. + * and not for the writeback threads or sync. However, in those cases, + * we do still want to check if there's a redundant swappage to be + * discarded. */ if (wbc->for_reclaim) swap = get_swap_page(); diff --git a/mm/vmscan.c b/mm/vmscan.c index 613e89f..359c3c5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1709,10 +1709,10 @@ static void shrink_zones(int priority, struct zonelist *zonelist, * * If the caller is !__GFP_FS then the probability of a failure is reasonably * high - the zone may be full of dirty or under-writeback pages, which this - * caller can't do much about. We kick pdflush and take explicit naps in the - * hope that some of these pages can be written. But if the allocating task - * holds filesystem locks which prevent writeout this might not work, and the - * allocation attempt will fail. + * caller can't do much about. We kick the writeback threads and take explicit + * naps in the hope that some of these pages can be written. But if the + * allocating task holds filesystem locks which prevent writeout this might not + * work, and the allocation attempt will fail. * * returns: 0, if no pages reclaimed * else, the number of pages reclaimed -- cgit v1.1