aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/memory.c89
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/page-writeback.c1
-rw-r--r--mm/page_alloc.c228
-rw-r--r--mm/readahead.c31
-rw-r--r--mm/slab.c131
-rw-r--r--mm/swap.c1
-rw-r--r--mm/swap_state.c1
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/vmalloc.c4
-rw-r--r--mm/vmscan.c6
14 files changed, 263 insertions, 245 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 1a4473fc..ae9ce6b 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -126,9 +126,11 @@ comment "Memory hotplug is currently incompatible with Software Suspend"
# Default to 4 for wider testing, though 8 might be more appropriate.
# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
# PA-RISC's debug spinlock_t is too large for the 32-bit struct page.
+# ARM26 and SPARC32 and PPC64 may use one page for multiple page tables.
#
config SPLIT_PTLOCK_CPUS
int
default "4096" if ARM && !CPU_CACHE_VIPT
default "4096" if PARISC && DEBUG_SPINLOCK && !64BIT
+ default "4096" if ARM26 || SPARC32 || PPC64
default "4"
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c9b4336..728e9bd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -103,6 +103,9 @@ static int __init hugetlb_init(void)
unsigned long i;
struct page *page;
+ if (HPAGE_SHIFT == 0)
+ return 0;
+
for (i = 0; i < MAX_NUMNODES; ++i)
INIT_LIST_HEAD(&hugepage_freelists[i]);
@@ -234,7 +237,6 @@ unsigned long hugetlb_total_pages(void)
{
return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
}
-EXPORT_SYMBOL(hugetlb_total_pages);
/*
* We cannot handle pagefaults against hugetlb pages at all. They cause
diff --git a/mm/memory.c b/mm/memory.c
index 0f60baf..2998cfc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -549,10 +549,10 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
return 0;
}
-static void zap_pte_range(struct mmu_gather *tlb,
+static unsigned long zap_pte_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ long *zap_work, struct zap_details *details)
{
struct mm_struct *mm = tlb->mm;
pte_t *pte;
@@ -563,10 +563,15 @@ static void zap_pte_range(struct mmu_gather *tlb,
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
do {
pte_t ptent = *pte;
- if (pte_none(ptent))
+ if (pte_none(ptent)) {
+ (*zap_work)--;
continue;
+ }
if (pte_present(ptent)) {
struct page *page = NULL;
+
+ (*zap_work) -= PAGE_SIZE;
+
if (!(vma->vm_flags & VM_RESERVED)) {
unsigned long pfn = pte_pfn(ptent);
if (unlikely(!pfn_valid(pfn)))
@@ -624,16 +629,18 @@ static void zap_pte_range(struct mmu_gather *tlb,
if (!pte_file(ptent))
free_swap_and_cache(pte_to_swp_entry(ptent));
pte_clear_full(mm, addr, pte, tlb->fullmm);
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
add_mm_rss(mm, file_rss, anon_rss);
pte_unmap_unlock(pte - 1, ptl);
+
+ return addr;
}
-static inline void zap_pmd_range(struct mmu_gather *tlb,
+static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pud_t *pud,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ long *zap_work, struct zap_details *details)
{
pmd_t *pmd;
unsigned long next;
@@ -641,16 +648,21 @@ static inline void zap_pmd_range(struct mmu_gather *tlb,
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
+ if (pmd_none_or_clear_bad(pmd)) {
+ (*zap_work)--;
continue;
- zap_pte_range(tlb, vma, pmd, addr, next, details);
- } while (pmd++, addr = next, addr != end);
+ }
+ next = zap_pte_range(tlb, vma, pmd, addr, next,
+ zap_work, details);
+ } while (pmd++, addr = next, (addr != end && *zap_work > 0));
+
+ return addr;
}
-static inline void zap_pud_range(struct mmu_gather *tlb,
+static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ long *zap_work, struct zap_details *details)
{
pud_t *pud;
unsigned long next;
@@ -658,15 +670,21 @@ static inline void zap_pud_range(struct mmu_gather *tlb,
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
+ if (pud_none_or_clear_bad(pud)) {
+ (*zap_work)--;
continue;
- zap_pmd_range(tlb, vma, pud, addr, next, details);
- } while (pud++, addr = next, addr != end);
+ }
+ next = zap_pmd_range(tlb, vma, pud, addr, next,
+ zap_work, details);
+ } while (pud++, addr = next, (addr != end && *zap_work > 0));
+
+ return addr;
}
-static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+static unsigned long unmap_page_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ long *zap_work, struct zap_details *details)
{
pgd_t *pgd;
unsigned long next;
@@ -679,11 +697,16 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
pgd = pgd_offset(vma->vm_mm, addr);
do {
next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
+ if (pgd_none_or_clear_bad(pgd)) {
+ (*zap_work)--;
continue;
- zap_pud_range(tlb, vma, pgd, addr, next, details);
- } while (pgd++, addr = next, addr != end);
+ }
+ next = zap_pud_range(tlb, vma, pgd, addr, next,
+ zap_work, details);
+ } while (pgd++, addr = next, (addr != end && *zap_work > 0));
tlb_end_vma(tlb, vma);
+
+ return addr;
}
#ifdef CONFIG_PREEMPT
@@ -724,7 +747,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *details)
{
- unsigned long zap_bytes = ZAP_BLOCK_SIZE;
+ long zap_work = ZAP_BLOCK_SIZE;
unsigned long tlb_start = 0; /* For tlb_finish_mmu */
int tlb_start_valid = 0;
unsigned long start = start_addr;
@@ -745,27 +768,25 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
*nr_accounted += (end - start) >> PAGE_SHIFT;
while (start != end) {
- unsigned long block;
-
if (!tlb_start_valid) {
tlb_start = start;
tlb_start_valid = 1;
}
- if (is_vm_hugetlb_page(vma)) {
- block = end - start;
+ if (unlikely(is_vm_hugetlb_page(vma))) {
unmap_hugepage_range(vma, start, end);
- } else {
- block = min(zap_bytes, end - start);
- unmap_page_range(*tlbp, vma, start,
- start + block, details);
+ zap_work -= (end - start) /
+ (HPAGE_SIZE / PAGE_SIZE);
+ start = end;
+ } else
+ start = unmap_page_range(*tlbp, vma,
+ start, end, &zap_work, details);
+
+ if (zap_work > 0) {
+ BUG_ON(start != end);
+ break;
}
- start += block;
- zap_bytes -= block;
- if ((long)zap_bytes > 0)
- continue;
-
tlb_finish_mmu(*tlbp, tlb_start, start);
if (need_resched() ||
@@ -779,7 +800,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
*tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
tlb_start_valid = 0;
- zap_bytes = ZAP_BLOCK_SIZE;
+ zap_work = ZAP_BLOCK_SIZE;
}
}
out:
diff --git a/mm/mmap.c b/mm/mmap.c
index 320dda1..6c997b1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -155,10 +155,6 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
return -ENOMEM;
}
-EXPORT_SYMBOL(sysctl_overcommit_memory);
-EXPORT_SYMBOL(sysctl_overcommit_ratio);
-EXPORT_SYMBOL(sysctl_max_map_count);
-EXPORT_SYMBOL(vm_committed_space);
EXPORT_SYMBOL(__vm_enough_memory);
/*
diff --git a/mm/nommu.c b/mm/nommu.c
index d1e076a..6deb6ab 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -44,10 +44,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
int heap_stack_gap = 0;
EXPORT_SYMBOL(mem_map);
-EXPORT_SYMBOL(sysctl_max_map_count);
-EXPORT_SYMBOL(sysctl_overcommit_memory);
-EXPORT_SYMBOL(sysctl_overcommit_ratio);
-EXPORT_SYMBOL(vm_committed_space);
EXPORT_SYMBOL(__vm_enough_memory);
/* list of shareable VMAs */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0166ea15..74138c9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -750,7 +750,6 @@ int clear_page_dirty_for_io(struct page *page)
}
return TestClearPageDirty(page);
}
-EXPORT_SYMBOL(clear_page_dirty_for_io);
int test_clear_page_writeback(struct page *page)
{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 259a71b..104e69c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -67,7 +67,6 @@ long nr_swap_pages;
int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
EXPORT_SYMBOL(totalram_pages);
-EXPORT_SYMBOL(nr_swap_pages);
/*
* Used by page_zone() to look up the address of the struct zone whose
@@ -736,9 +735,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
}
local_irq_restore(flags);
put_cpu();
- }
-
- if (page == NULL) {
+ } else {
spin_lock_irqsave(&zone->lock, flags);
page = __rmqueue(zone, order);
spin_unlock_irqrestore(&zone->lock, flags);
@@ -758,20 +755,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
return page;
}
+#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */
+#define ALLOC_HARDER 0x02 /* try to alloc harder */
+#define ALLOC_HIGH 0x04 /* __GFP_HIGH set */
+#define ALLOC_CPUSET 0x08 /* check for correct cpuset */
+
/*
* Return 1 if free pages are above 'mark'. This takes into account the order
* of the allocation.
*/
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
- int classzone_idx, int can_try_harder, gfp_t gfp_high)
+ int classzone_idx, int alloc_flags)
{
/* free_pages my go negative - that's OK */
long min = mark, free_pages = z->free_pages - (1 << order) + 1;
int o;
- if (gfp_high)
+ if (alloc_flags & ALLOC_HIGH)
min -= min / 2;
- if (can_try_harder)
+ if (alloc_flags & ALLOC_HARDER)
min -= min / 4;
if (free_pages <= min + z->lowmem_reserve[classzone_idx])
@@ -789,14 +791,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
return 1;
}
-static inline int
-should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
+/*
+ * get_page_from_freeliest goes through the zonelist trying to allocate
+ * a page.
+ */
+static struct page *
+get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
+ struct zonelist *zonelist, int alloc_flags)
{
- if (!z->reclaim_pages)
- return 0;
- if (gfp_mask & __GFP_NORECLAIM)
- return 0;
- return 1;
+ struct zone **z = zonelist->zones;
+ struct page *page = NULL;
+ int classzone_idx = zone_idx(*z);
+
+ /*
+ * Go through the zonelist once, looking for a zone with enough free.
+ * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+ */
+ do {
+ if ((alloc_flags & ALLOC_CPUSET) &&
+ !cpuset_zone_allowed(*z, gfp_mask))
+ continue;
+
+ if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
+ if (!zone_watermark_ok(*z, order, (*z)->pages_low,
+ classzone_idx, alloc_flags))
+ continue;
+ }
+
+ page = buffered_rmqueue(*z, order, gfp_mask);
+ if (page) {
+ zone_statistics(zonelist, *z);
+ break;
+ }
+ } while (*(++z) != NULL);
+ return page;
}
/*
@@ -807,105 +835,75 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist)
{
const gfp_t wait = gfp_mask & __GFP_WAIT;
- struct zone **zones, *z;
+ struct zone **z;
struct page *page;
struct reclaim_state reclaim_state;
struct task_struct *p = current;
- int i;
- int classzone_idx;
int do_retry;
- int can_try_harder;
+ int alloc_flags;
int did_some_progress;
might_sleep_if(wait);
- /*
- * The caller may dip into page reserves a bit more if the caller
- * cannot run direct reclaim, or is the caller has realtime scheduling
- * policy
- */
- can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
+ z = zonelist->zones; /* the list of zones suitable for gfp_mask */
- zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
-
- if (unlikely(zones[0] == NULL)) {
+ if (unlikely(*z == NULL)) {
/* Should this ever happen?? */
return NULL;
}
+restart:
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+ zonelist, ALLOC_CPUSET);
+ if (page)
+ goto got_pg;
- classzone_idx = zone_idx(zones[0]);
+ do
+ wakeup_kswapd(*z, order);
+ while (*(++z));
-restart:
/*
- * Go through the zonelist once, looking for a zone with enough free.
- * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+ * OK, we're below the kswapd watermark and have kicked background
+ * reclaim. Now things get more complex, so set up alloc_flags according
+ * to how we want to proceed.
+ *
+ * The caller may dip into page reserves a bit more if the caller
+ * cannot run direct reclaim, or if the caller has realtime scheduling
+ * policy.
*/
- for (i = 0; (z = zones[i]) != NULL; i++) {
- int do_reclaim = should_reclaim_zone(z, gfp_mask);
-
- if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
- continue;
-
- /*
- * If the zone is to attempt early page reclaim then this loop
- * will try to reclaim pages and check the watermark a second
- * time before giving up and falling back to the next zone.
- */
-zone_reclaim_retry:
- if (!zone_watermark_ok(z, order, z->pages_low,
- classzone_idx, 0, 0)) {
- if (!do_reclaim)
- continue;
- else {
- zone_reclaim(z, gfp_mask, order);
- /* Only try reclaim once */
- do_reclaim = 0;
- goto zone_reclaim_retry;
- }
- }
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
-
- for (i = 0; (z = zones[i]) != NULL; i++)
- wakeup_kswapd(z, order);
+ alloc_flags = 0;
+ if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
+ alloc_flags |= ALLOC_HARDER;
+ if (gfp_mask & __GFP_HIGH)
+ alloc_flags |= ALLOC_HIGH;
+ if (wait)
+ alloc_flags |= ALLOC_CPUSET;
/*
* Go through the zonelist again. Let __GFP_HIGH and allocations
- * coming from realtime tasks to go deeper into reserves
+ * coming from realtime tasks go deeper into reserves.
*
* This is the last chance, in general, before the goto nopage.
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!zone_watermark_ok(z, order, z->pages_min,
- classzone_idx, can_try_harder,
- gfp_mask & __GFP_HIGH))
- continue;
-
- if (wait && !cpuset_zone_allowed(z, gfp_mask))
- continue;
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
+ page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
+ if (page)
+ goto got_pg;
/* This allocation should allow future memory freeing. */
if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
&& !in_interrupt()) {
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+nofail_alloc:
/* go through the zonelist yet again, ignoring mins */
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!cpuset_zone_allowed(z, gfp_mask))
- continue;
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
+ page = get_page_from_freelist(gfp_mask, order,
+ zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
+ if (page)
+ goto got_pg;
+ if (gfp_mask & __GFP_NOFAIL) {
+ blk_congestion_wait(WRITE, HZ/50);
+ goto nofail_alloc;
}
}
goto nopage;
@@ -923,7 +921,7 @@ rebalance:
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
- did_some_progress = try_to_free_pages(zones, gfp_mask);
+ did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
p->reclaim_state = NULL;
p->flags &= ~PF_MEMALLOC;
@@ -931,19 +929,10 @@ rebalance:
cond_resched();
if (likely(did_some_progress)) {
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!zone_watermark_ok(z, order, z->pages_min,
- classzone_idx, can_try_harder,
- gfp_mask & __GFP_HIGH))
- continue;
-
- if (!cpuset_zone_allowed(z, gfp_mask))
- continue;
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
+ page = get_page_from_freelist(gfp_mask, order,
+ zonelist, alloc_flags);
+ if (page)
+ goto got_pg;
} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
/*
* Go through the zonelist yet one more time, keep
@@ -951,18 +940,10 @@ rebalance:
* a parallel oom killing, we must fail if we're still
* under heavy pressure.
*/
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!zone_watermark_ok(z, order, z->pages_high,
- classzone_idx, 0, 0))
- continue;
-
- if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
- continue;
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+ zonelist, ALLOC_CPUSET);
+ if (page)
+ goto got_pg;
out_of_memory(gfp_mask, order);
goto restart;
@@ -995,9 +976,7 @@ nopage:
dump_stack();
show_mem();
}
- return NULL;
got_pg:
- zone_statistics(zonelist, z);
return page;
}
@@ -1334,7 +1313,7 @@ void show_free_areas(void)
} else
printk("\n");
- for_each_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct per_cpu_pageset *pageset;
pageset = zone_pcp(zone, cpu);
@@ -2426,13 +2405,18 @@ void setup_per_zone_pages_min(void)
}
for_each_zone(zone) {
+ unsigned long tmp;
spin_lock_irqsave(&zone->lru_lock, flags);
+ tmp = (pages_min * zone->present_pages) / lowmem_pages;
if (is_highmem(zone)) {
/*
- * Often, highmem doesn't need to reserve any pages.
- * But the pages_min/low/high values are also used for
- * batching up page reclaim activity so we need a
- * decent value here.
+ * __GFP_HIGH and PF_MEMALLOC allocations usually don't
+ * need highmem pages, so cap pages_min to a small
+ * value here.
+ *
+ * The (pages_high-pages_low) and (pages_low-pages_min)
+ * deltas controls asynch page reclaim, and so should
+ * not be capped for highmem.
*/
int min_pages;
@@ -2443,19 +2427,15 @@ void setup_per_zone_pages_min(void)
min_pages = 128;
zone->pages_min = min_pages;
} else {
- /* if it's a lowmem zone, reserve a number of pages
+ /*
+ * If it's a lowmem zone, reserve a number of pages
* proportionate to the zone's size.
*/
- zone->pages_min = (pages_min * zone->present_pages) /
- lowmem_pages;
+ zone->pages_min = tmp;
}
- /*
- * When interpreting these watermarks, just keep in mind that:
- * zone->pages_min == (zone->pages_min * 4) / 4;
- */
- zone->pages_low = (zone->pages_min * 5) / 4;
- zone->pages_high = (zone->pages_min * 6) / 4;
+ zone->pages_low = zone->pages_min + tmp / 4;
+ zone->pages_high = zone->pages_min + tmp / 2;
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
}
diff --git a/mm/readahead.c b/mm/readahead.c
index d0b5003..72e7adb 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -254,7 +254,7 @@ out:
*/
static int
__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
- unsigned long offset, unsigned long nr_to_read)
+ pgoff_t offset, unsigned long nr_to_read)
{
struct inode *inode = mapping->host;
struct page *page;
@@ -274,7 +274,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
*/
read_lock_irq(&mapping->tree_lock);
for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
- unsigned long page_offset = offset + page_idx;
+ pgoff_t page_offset = offset + page_idx;
if (page_offset > end_index)
break;
@@ -311,7 +311,7 @@ out:
* memory at once.
*/
int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
- unsigned long offset, unsigned long nr_to_read)
+ pgoff_t offset, unsigned long nr_to_read)
{
int ret = 0;
@@ -368,7 +368,7 @@ static inline int check_ra_success(struct file_ra_state *ra,
* request queues.
*/
int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
- unsigned long offset, unsigned long nr_to_read)
+ pgoff_t offset, unsigned long nr_to_read)
{
if (bdi_read_congested(mapping->backing_dev_info))
return -1;
@@ -385,7 +385,7 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
*/
static int
blockable_page_cache_readahead(struct address_space *mapping, struct file *filp,
- unsigned long offset, unsigned long nr_to_read,
+ pgoff_t offset, unsigned long nr_to_read,
struct file_ra_state *ra, int block)
{
int actual;
@@ -430,14 +430,27 @@ static int make_ahead_window(struct address_space *mapping, struct file *filp,
return ret;
}
-/*
- * page_cache_readahead is the main function. If performs the adaptive
+/**
+ * page_cache_readahead - generic adaptive readahead
+ * @mapping: address_space which holds the pagecache and I/O vectors
+ * @ra: file_ra_state which holds the readahead state
+ * @filp: passed on to ->readpage() and ->readpages()
+ * @offset: start offset into @mapping, in PAGE_CACHE_SIZE units
+ * @req_size: hint: total size of the read which the caller is performing in
+ * PAGE_CACHE_SIZE units
+ *
+ * page_cache_readahead() is the main function. If performs the adaptive
* readahead window size management and submits the readahead I/O.
+ *
+ * Note that @filp is purely used for passing on to the ->readpage[s]()
+ * handler: it may refer to a different file from @mapping (so we may not use
+ * @filp->f_mapping or @filp->f_dentry->d_inode here).
+ * Also, @ra may not be equal to &@filp->f_ra.
+ *
*/
unsigned long
page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
- struct file *filp, unsigned long offset,
- unsigned long req_size)
+ struct file *filp, pgoff_t offset, unsigned long req_size)
{
unsigned long max, newsize;
int sequential;
diff --git a/mm/slab.c b/mm/slab.c
index 22bfb0b2..e5ec26e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -368,7 +368,7 @@ static inline void kmem_list3_init(struct kmem_list3 *parent)
* manages a cache.
*/
-struct kmem_cache_s {
+struct kmem_cache {
/* 1) per-cpu data, touched during every alloc/free */
struct array_cache *array[NR_CPUS];
unsigned int batchcount;
@@ -434,7 +434,7 @@ struct kmem_cache_s {
/* Optimization question: fewer reaps means less
* probability for unnessary cpucache drain/refill cycles.
*
- * OTHO the cpuarrays can contain lots of objects,
+ * OTOH the cpuarrays can contain lots of objects,
* which could lock up otherwise freeable slabs.
*/
#define REAPTIMEOUT_CPUC (2*HZ)
@@ -565,14 +565,29 @@ static void **dbg_userword(kmem_cache_t *cachep, void *objp)
#define BREAK_GFP_ORDER_LO 0
static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
-/* Macros for storing/retrieving the cachep and or slab from the
+/* Functions for storing/retrieving the cachep and or slab from the
* global 'mem_map'. These are used to find the slab an obj belongs to.
* With kfree(), these are used to find the cache which an obj belongs to.
*/
-#define SET_PAGE_CACHE(pg,x) ((pg)->lru.next = (struct list_head *)(x))
-#define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->lru.next)
-#define SET_PAGE_SLAB(pg,x) ((pg)->lru.prev = (struct list_head *)(x))
-#define GET_PAGE_SLAB(pg) ((struct slab *)(pg)->lru.prev)
+static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
+{
+ page->lru.next = (struct list_head *)cache;
+}
+
+static inline struct kmem_cache *page_get_cache(struct page *page)
+{
+ return (struct kmem_cache *)page->lru.next;
+}
+
+static inline void page_set_slab(struct page *page, struct slab *slab)
+{
+ page->lru.prev = (struct list_head *)slab;
+}
+
+static inline struct slab *page_get_slab(struct page *page)
+{
+ return (struct slab *)page->lru.prev;
+}
/* These are the default caches for kmalloc. Custom caches can have other sizes. */
struct cache_sizes malloc_sizes[] = {
@@ -1190,11 +1205,7 @@ static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid)
int i;
flags |= cachep->gfpflags;
- if (likely(nodeid == -1)) {
- page = alloc_pages(flags, cachep->gfporder);
- } else {
- page = alloc_pages_node(nodeid, flags, cachep->gfporder);
- }
+ page = alloc_pages_node(nodeid, flags, cachep->gfporder);
if (!page)
return NULL;
addr = page_address(page);
@@ -1368,7 +1379,7 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp)
/* Print some data about the neighboring objects, if they
* exist:
*/
- struct slab *slabp = GET_PAGE_SLAB(virt_to_page(objp));
+ struct slab *slabp = page_get_slab(virt_to_page(objp));
int objnr;
objnr = (objp-slabp->s_mem)/cachep->objsize;
@@ -1502,6 +1513,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
{
size_t left_over, slab_size, ralign;
kmem_cache_t *cachep = NULL;
+ struct list_head *p;
/*
* Sanity checks... these are all serious usage bugs.
@@ -1516,6 +1528,35 @@ kmem_cache_create (const char *name, size_t size, size_t align,
BUG();
}
+ down(&cache_chain_sem);
+
+ list_for_each(p, &cache_chain) {
+ kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
+ mm_segment_t old_fs = get_fs();
+ char tmp;
+ int res;
+
+ /*
+ * This happens when the module gets unloaded and doesn't
+ * destroy its slab cache and no-one else reuses the vmalloc
+ * area of the module. Print a warning.
+ */
+ set_fs(KERNEL_DS);
+ res = __get_user(tmp, pc->name);
+ set_fs(old_fs);
+ if (res) {
+ printk("SLAB: cache with size %d has lost its name\n",
+ pc->objsize);
+ continue;
+ }
+
+ if (!strcmp(pc->name,name)) {
+ printk("kmem_cache_create: duplicate cache %s\n", name);
+ dump_stack();
+ goto oops;
+ }
+ }
+
#if DEBUG
WARN_ON(strchr(name, ' ')); /* It confuses parsers */
if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
@@ -1592,7 +1633,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
/* Get cache's description obj. */
cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
if (!cachep)
- goto opps;
+ goto oops;
memset(cachep, 0, sizeof(kmem_cache_t));
#if DEBUG
@@ -1686,7 +1727,7 @@ next:
printk("kmem_cache_create: couldn't create cache %s.\n", name);
kmem_cache_free(&cache_cache, cachep);
cachep = NULL;
- goto opps;
+ goto oops;
}
slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t)
+ sizeof(struct slab), align);
@@ -1781,43 +1822,14 @@ next:
cachep->limit = BOOT_CPUCACHE_ENTRIES;
}
- /* Need the semaphore to access the chain. */
- down(&cache_chain_sem);
- {
- struct list_head *p;
- mm_segment_t old_fs;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- list_for_each(p, &cache_chain) {
- kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
- char tmp;
- /* This happens when the module gets unloaded and doesn't
- destroy its slab cache and noone else reuses the vmalloc
- area of the module. Print a warning. */
- if (__get_user(tmp,pc->name)) {
- printk("SLAB: cache with size %d has lost its name\n",
- pc->objsize);
- continue;
- }
- if (!strcmp(pc->name,name)) {
- printk("kmem_cache_create: duplicate cache %s\n",name);
- up(&cache_chain_sem);
- unlock_cpu_hotplug();
- BUG();
- }
- }
- set_fs(old_fs);
- }
-
/* cache setup completed, link it into the list */
list_add(&cachep->next, &cache_chain);
- up(&cache_chain_sem);
unlock_cpu_hotplug();
-opps:
+oops:
if (!cachep && (flags & SLAB_PANIC))
panic("kmem_cache_create(): failed to create slab `%s'\n",
name);
+ up(&cache_chain_sem);
return cachep;
}
EXPORT_SYMBOL(kmem_cache_create);
@@ -2137,8 +2149,8 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp)
i = 1 << cachep->gfporder;
page = virt_to_page(objp);
do {
- SET_PAGE_CACHE(page, cachep);
- SET_PAGE_SLAB(page, slabp);
+ page_set_cache(page, cachep);
+ page_set_slab(page, slabp);
page++;
} while (--i);
}
@@ -2268,14 +2280,14 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp,
kfree_debugcheck(objp);
page = virt_to_page(objp);
- if (GET_PAGE_CACHE(page) != cachep) {
+ if (page_get_cache(page) != cachep) {
printk(KERN_ERR "mismatch in kmem_cache_free: expected cache %p, got %p\n",
- GET_PAGE_CACHE(page),cachep);
+ page_get_cache(page),cachep);
printk(KERN_ERR "%p is %s.\n", cachep, cachep->name);
- printk(KERN_ERR "%p is %s.\n", GET_PAGE_CACHE(page), GET_PAGE_CACHE(page)->name);
+ printk(KERN_ERR "%p is %s.\n", page_get_cache(page), page_get_cache(page)->name);
WARN_ON(1);
}
- slabp = GET_PAGE_SLAB(page);
+ slabp = page_get_slab(page);
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || *dbg_redzone2(cachep, objp) != RED_ACTIVE) {
@@ -2627,7 +2639,7 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int n
struct slab *slabp;
unsigned int objnr;
- slabp = GET_PAGE_SLAB(virt_to_page(objp));
+ slabp = page_get_slab(virt_to_page(objp));
l3 = cachep->nodelists[node];
list_del(&slabp->list);
objnr = (objp - slabp->s_mem) / cachep->objsize;
@@ -2743,7 +2755,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
#ifdef CONFIG_NUMA
{
struct slab *slabp;
- slabp = GET_PAGE_SLAB(virt_to_page(objp));
+ slabp = page_get_slab(virt_to_page(objp));
if (unlikely(slabp->nodeid != numa_node_id())) {
struct array_cache *alien = NULL;
int nodeid = slabp->nodeid;
@@ -2829,7 +2841,7 @@ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)
page = virt_to_page(ptr);
if (unlikely(!PageSlab(page)))
goto out;
- if (unlikely(GET_PAGE_CACHE(page) != cachep))
+ if (unlikely(page_get_cache(page) != cachep))
goto out;
return 1;
out:
@@ -3025,7 +3037,7 @@ void kfree(const void *objp)
return;
local_irq_save(flags);
kfree_debugcheck(objp);
- c = GET_PAGE_CACHE(virt_to_page(objp));
+ c = page_get_cache(virt_to_page(objp));
__cache_free(c, (void*)objp);
local_irq_restore(flags);
}
@@ -3262,6 +3274,7 @@ static void drain_array_locked(kmem_cache_t *cachep,
/**
* cache_reap - Reclaim memory from caches.
+ * @unused: unused parameter
*
* Called from workqueue/eventd every few seconds.
* Purpose:
@@ -3278,7 +3291,7 @@ static void cache_reap(void *unused)
if (down_trylock(&cache_chain_sem)) {
/* Give up. Setup the next iteration. */
- schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id());
+ schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
return;
}
@@ -3347,7 +3360,7 @@ next:
up(&cache_chain_sem);
drain_remote_pages();
/* Setup the next iteration */
- schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id());
+ schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
}
#ifdef CONFIG_PROC_FS
@@ -3594,7 +3607,7 @@ unsigned int ksize(const void *objp)
if (unlikely(objp == NULL))
return 0;
- return obj_reallen(GET_PAGE_CACHE(virt_to_page(objp)));
+ return obj_reallen(page_get_cache(virt_to_page(objp)));
}
diff --git a/mm/swap.c b/mm/swap.c
index 154ae13..d09cf7f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -413,7 +413,6 @@ void vm_acct_memory(long pages)
}
preempt_enable();
}
-EXPORT_SYMBOL(vm_acct_memory);
#ifdef CONFIG_HOTPLUG_CPU
static void lru_drain_cache(unsigned int cpu)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index dfd9a46..0df9a57 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -40,7 +40,6 @@ struct address_space swapper_space = {
.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
.backing_dev_info = &swap_backing_dev_info,
};
-EXPORT_SYMBOL(swapper_space);
#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8970c0b..edafeac 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -36,8 +36,6 @@ unsigned int nr_swapfiles;
long total_swap_pages;
static int swap_overflow;
-EXPORT_SYMBOL(total_swap_pages);
-
static const char Bad_file[] = "Bad swap file entry ";
static const char Unused_file[] = "Unused swap file entry ";
static const char Bad_offset[] = "Bad swap offset entry ";
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 54a90e8..729eb3e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -457,7 +457,7 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
* @size: allocation size
* @gfp_mask: flags for the page level allocator
* @prot: protection mask for the allocated pages
- * @node node to use for allocation or -1
+ * @node: node to use for allocation or -1
*
* Allocate enough pages to cover @size from the page level
* allocator with @gfp_mask flags. Map them into contiguous
@@ -507,7 +507,7 @@ EXPORT_SYMBOL(vmalloc);
* vmalloc_node - allocate memory on a specific node
*
* @size: allocation size
- * @node; numa node
+ * @node: numa node
*
* Allocate enough pages to cover @size from the page level
* allocator and map them into contiguous kernel virtual space.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 135bf8c..2813054 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1074,7 +1074,7 @@ loop_again:
continue;
if (!zone_watermark_ok(zone, order,
- zone->pages_high, 0, 0, 0)) {
+ zone->pages_high, 0, 0)) {
end_zone = i;
goto scan;
}
@@ -1111,7 +1111,7 @@ scan:
if (nr_pages == 0) { /* Not software suspend */
if (!zone_watermark_ok(zone, order,
- zone->pages_high, end_zone, 0, 0))
+ zone->pages_high, end_zone, 0))
all_zones_ok = 0;
}
zone->temp_priority = priority;
@@ -1259,7 +1259,7 @@ void wakeup_kswapd(struct zone *zone, int order)
return;
pgdat = zone->zone_pgdat;
- if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
+ if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
return;
if (pgdat->kswapd_max_order < order)
pgdat->kswapd_max_order = order;