diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 1313 |
1 files changed, 388 insertions, 925 deletions
@@ -2,11 +2,10 @@ * SLUB: A slab allocator that limits cache line use instead of queuing * objects in per cpu and per node lists. * - * The allocator synchronizes using per slab locks or atomic operatios - * and only uses a centralized lock to manage a pool of partial slabs. + * The allocator synchronizes using per slab locks and only + * uses a centralized lock to manage a pool of partial slabs. * * (C) 2007 SGI, Christoph Lameter - * (C) 2011 Linux Foundation, Christoph Lameter */ #include <linux/mm.h> @@ -28,33 +27,20 @@ #include <linux/memory.h> #include <linux/math64.h> #include <linux/fault-inject.h> -#include <linux/stacktrace.h> #include <trace/events/kmem.h> /* * Lock order: - * 1. slub_lock (Global Semaphore) - * 2. node->list_lock - * 3. slab_lock(page) (Only on some arches and for debugging) + * 1. slab_lock(page) + * 2. slab->list_lock * - * slub_lock - * - * The role of the slub_lock is to protect the list of all the slabs - * and to synchronize major metadata changes to slab cache structures. - * - * The slab_lock is only used for debugging and on arches that do not - * have the ability to do a cmpxchg_double. It only protects the second - * double word in the page struct. Meaning - * A. page->freelist -> List of object free in a page - * B. page->counters -> Counters of objects - * C. page->frozen -> frozen state - * - * If a slab is frozen then it is exempt from list management. It is not - * on any list. The processor that froze the slab is the one who can - * perform list operations on the page. Other processors may put objects - * onto the freelist but the processor that froze the slab is the only - * one that can retrieve the objects from the page's freelist. + * The slab_lock protects operations on the object of a particular + * slab and its metadata in the page struct. If the slab lock + * has been taken then no allocations nor frees can be performed + * on the objects in the slab nor can the slab be added or removed + * from the partial or full lists since this would mean modifying + * the page_struct of the slab. * * The list_lock protects the partial and full list on each node and * the partial slab counter. If taken then no new slabs may be added or @@ -67,6 +53,20 @@ * slabs, operations can continue without any centralized lock. F.e. * allocating a long series of objects that fill up slabs does not require * the list lock. + * + * The lock order is sometimes inverted when we are trying to get a slab + * off a list. We take the list_lock and then look for a page on the list + * to use. While we do that objects in the slabs may be freed. We can + * only operate on the slab if we have also taken the slab_lock. So we use + * a slab_trylock() on the slab. If trylock was successful then no frees + * can occur anymore and we can use the slab for allocations etc. If the + * slab_trylock() does not succeed then frees are in progress in the slab and + * we must stay away from it for a while since we may cause a bouncing + * cacheline if we try to acquire the lock. So go onto the next slab. + * If all pages are busy then we may allocate a new slab instead of reusing + * a partial slab. A new slab has no one operating on it and thus there is + * no danger of cacheline contention. + * * Interrupts are disabled during allocation and deallocation in order to * make the slab allocator safe to use in the context of an irq. In addition * interrupts are disabled to ensure that the processor does not change @@ -131,9 +131,6 @@ static inline int kmem_cache_debug(struct kmem_cache *s) /* Enable to test recovery from slab corruption on boot */ #undef SLUB_RESILIENCY_TEST -/* Enable to log cmpxchg failures */ -#undef SLUB_DEBUG_CMPXCHG - /* * Mininum number of partial slabs. These will be left on the partial * lists even if they are empty. kmem_cache_shrink may reclaim them. @@ -169,11 +166,10 @@ static inline int kmem_cache_debug(struct kmem_cache *s) #define OO_SHIFT 16 #define OO_MASK ((1 << OO_SHIFT) - 1) -#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */ +#define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */ /* Internal SLUB flags */ #define __OBJECT_POISON 0x80000000UL /* Poison object */ -#define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */ static int kmem_size = sizeof(struct kmem_cache); @@ -195,12 +191,8 @@ static LIST_HEAD(slab_caches); /* * Tracking user of a slab. */ -#define TRACK_ADDRS_COUNT 16 struct track { unsigned long addr; /* Called from address */ -#ifdef CONFIG_STACKTRACE - unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */ -#endif int cpu; /* Was running on cpu */ int pid; /* Pid context */ unsigned long when; /* When did the operation occur */ @@ -346,99 +338,11 @@ static inline int oo_objects(struct kmem_cache_order_objects x) return x.x & OO_MASK; } -/* - * Per slab locking using the pagelock - */ -static __always_inline void slab_lock(struct page *page) -{ - bit_spin_lock(PG_locked, &page->flags); -} - -static __always_inline void slab_unlock(struct page *page) -{ - __bit_spin_unlock(PG_locked, &page->flags); -} - -/* Interrupts must be disabled (for the fallback code to work right) */ -static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, - void *freelist_old, unsigned long counters_old, - void *freelist_new, unsigned long counters_new, - const char *n) -{ - VM_BUG_ON(!irqs_disabled()); -#ifdef CONFIG_CMPXCHG_DOUBLE - if (s->flags & __CMPXCHG_DOUBLE) { - if (cmpxchg_double(&page->freelist, - freelist_old, counters_old, - freelist_new, counters_new)) - return 1; - } else -#endif - { - slab_lock(page); - if (page->freelist == freelist_old && page->counters == counters_old) { - page->freelist = freelist_new; - page->counters = counters_new; - slab_unlock(page); - return 1; - } - slab_unlock(page); - } - - cpu_relax(); - stat(s, CMPXCHG_DOUBLE_FAIL); - -#ifdef SLUB_DEBUG_CMPXCHG - printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name); -#endif - - return 0; -} - -static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, - void *freelist_old, unsigned long counters_old, - void *freelist_new, unsigned long counters_new, - const char *n) -{ -#ifdef CONFIG_CMPXCHG_DOUBLE - if (s->flags & __CMPXCHG_DOUBLE) { - if (cmpxchg_double(&page->freelist, - freelist_old, counters_old, - freelist_new, counters_new)) - return 1; - } else -#endif - { - unsigned long flags; - - local_irq_save(flags); - slab_lock(page); - if (page->freelist == freelist_old && page->counters == counters_old) { - page->freelist = freelist_new; - page->counters = counters_new; - slab_unlock(page); - local_irq_restore(flags); - return 1; - } - slab_unlock(page); - local_irq_restore(flags); - } - - cpu_relax(); - stat(s, CMPXCHG_DOUBLE_FAIL); - -#ifdef SLUB_DEBUG_CMPXCHG - printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name); -#endif - - return 0; -} - #ifdef CONFIG_SLUB_DEBUG /* * Determine a map of object in use on a page. * - * Node listlock must be held to guarantee that the page does + * Slab lock or node listlock must be held to guarantee that the page does * not vanish from under us. */ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) @@ -467,8 +371,34 @@ static int disable_higher_order_debug; */ static void print_section(char *text, u8 *addr, unsigned int length) { - print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, - length, 1); + int i, offset; + int newline = 1; + char ascii[17]; + + ascii[16] = 0; + + for (i = 0; i < length; i++) { + if (newline) { + printk(KERN_ERR "%8s 0x%p: ", text, addr + i); + newline = 0; + } + printk(KERN_CONT " %02x", addr[i]); + offset = i % 16; + ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; + if (offset == 15) { + printk(KERN_CONT " %s\n", ascii); + newline = 1; + } + } + if (!newline) { + i %= 16; + while (i < 16) { + printk(KERN_CONT " "); + ascii[i] = ' '; + i++; + } + printk(KERN_CONT " %s\n", ascii); + } } static struct track *get_track(struct kmem_cache *s, void *object, @@ -490,24 +420,6 @@ static void set_track(struct kmem_cache *s, void *object, struct track *p = get_track(s, object, alloc); if (addr) { -#ifdef CONFIG_STACKTRACE - struct stack_trace trace; - int i; - - trace.nr_entries = 0; - trace.max_entries = TRACK_ADDRS_COUNT; - trace.entries = p->addrs; - trace.skip = 3; - save_stack_trace(&trace); - - /* See rant in lockdep.c */ - if (trace.nr_entries != 0 && - trace.entries[trace.nr_entries - 1] == ULONG_MAX) - trace.nr_entries--; - - for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++) - p->addrs[i] = 0; -#endif p->addr = addr; p->cpu = smp_processor_id(); p->pid = current->pid; @@ -532,16 +444,6 @@ static void print_track(const char *s, struct track *t) printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); -#ifdef CONFIG_STACKTRACE - { - int i; - for (i = 0; i < TRACK_ADDRS_COUNT; i++) - if (t->addrs[i]) - printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]); - else - break; - } -#endif } static void print_tracking(struct kmem_cache *s, void *object) @@ -599,12 +501,12 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) p, p - addr, get_freepointer(s, p)); if (p > addr + 16) - print_section("Bytes b4 ", p - 16, 16); + print_section("Bytes b4", p - 16, 16); + + print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE)); - print_section("Object ", p, min_t(unsigned long, s->objsize, - PAGE_SIZE)); if (s->flags & SLAB_RED_ZONE) - print_section("Redzone ", p + s->objsize, + print_section("Redzone", p + s->objsize, s->inuse - s->objsize); if (s->offset) @@ -617,7 +519,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) if (off != s->size) /* Beginning of the filler is the free pointer */ - print_section("Padding ", p + off, s->size - off); + print_section("Padding", p + off, s->size - off); dump_stack(); } @@ -627,6 +529,9 @@ static void object_err(struct kmem_cache *s, struct page *page, { slab_bug(s, "%s", reason); print_trailer(s, page, object); + + if(slub_debug) + panic("SLUB ERROR: object_err"); } static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...) @@ -640,6 +545,9 @@ static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...) slab_bug(s, "%s", buf); print_page_info(page); dump_stack(); + + if(slub_debug) + panic("SLUB ERROR: slab_err"); } static void init_object(struct kmem_cache *s, void *object, u8 val) @@ -655,6 +563,17 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) memset(p + s->objsize, val, s->inuse - s->objsize); } +static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) +{ + while (bytes) { + if (*start != (u8)value) + return start; + start++; + bytes--; + } + return NULL; +} + static void restore_bytes(struct kmem_cache *s, char *message, u8 data, void *from, void *to) { @@ -669,7 +588,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, u8 *fault; u8 *end; - fault = memchr_inv(start, value, bytes); + fault = check_bytes(start, value, bytes); if (!fault) return 1; @@ -683,6 +602,10 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, print_trailer(s, page, object); restore_bytes(s, what, value, fault, end); + + if(slub_debug) + panic("SLUB ERROR: check_bytes_and_report. Can it be restored?"); + return 0; } @@ -762,14 +685,14 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) if (!remainder) return 1; - fault = memchr_inv(end - remainder, POISON_INUSE, remainder); + fault = check_bytes(end - remainder, POISON_INUSE, remainder); if (!fault) return 1; while (end > fault && end[-1] == POISON_INUSE) end--; slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); - print_section("Padding ", end - remainder, remainder); + print_section("Padding", end - remainder, remainder); restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end); return 0; @@ -860,11 +783,10 @@ static int check_slab(struct kmem_cache *s, struct page *page) static int on_freelist(struct kmem_cache *s, struct page *page, void *search) { int nr = 0; - void *fp; + void *fp = page->freelist; void *object = NULL; unsigned long max_objects; - fp = page->freelist; while (fp && nr <= page->objects) { if (fp == search) return 1; @@ -918,7 +840,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, page->freelist); if (!alloc) - print_section("Object ", (void *)object, s->objsize); + print_section("Object", (void *)object, s->objsize); dump_stack(); } @@ -969,27 +891,26 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) /* * Tracking of fully allocated slabs for debugging purposes. - * - * list_lock must be held. */ -static void add_full(struct kmem_cache *s, - struct kmem_cache_node *n, struct page *page) +static void add_full(struct kmem_cache_node *n, struct page *page) { - if (!(s->flags & SLAB_STORE_USER)) - return; - + spin_lock(&n->list_lock); list_add(&page->lru, &n->full); + spin_unlock(&n->list_lock); } -/* - * list_lock must be held. - */ static void remove_full(struct kmem_cache *s, struct page *page) { + struct kmem_cache_node *n; + if (!(s->flags & SLAB_STORE_USER)) return; + n = get_node(s, page_to_nid(page)); + + spin_lock(&n->list_lock); list_del(&page->lru); + spin_unlock(&n->list_lock); } /* Tracking of the number of slabs for debugging purposes */ @@ -1045,6 +966,11 @@ static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *pa if (!check_slab(s, page)) goto bad; + if (!on_freelist(s, page, object)) { + object_err(s, page, object, "Object already allocated"); + goto bad; + } + if (!check_valid_pointer(s, page, object)) { object_err(s, page, object, "Freelist Pointer check fails"); goto bad; @@ -1077,12 +1003,6 @@ bad: static noinline int free_debug_processing(struct kmem_cache *s, struct page *page, void *object, unsigned long addr) { - unsigned long flags; - int rc = 0; - - local_irq_save(flags); - slab_lock(page); - if (!check_slab(s, page)) goto fail; @@ -1097,7 +1017,7 @@ static noinline int free_debug_processing(struct kmem_cache *s, } if (!check_object(s, page, object, SLUB_RED_ACTIVE)) - goto out; + return 0; if (unlikely(s != page->slab)) { if (!PageSlab(page)) { @@ -1114,19 +1034,18 @@ static noinline int free_debug_processing(struct kmem_cache *s, goto fail; } + /* Special debug activities for freeing objects */ + if (!PageSlubFrozen(page) && !page->freelist) + remove_full(s, page); if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_FREE, addr); trace(s, page, object, 0); init_object(s, object, SLUB_RED_INACTIVE); - rc = 1; -out: - slab_unlock(page); - local_irq_restore(flags); - return rc; + return 1; fail: slab_fix(s, "Object at 0x%p not freed", object); - goto out; + return 0; } static int __init setup_slub_debug(char *str) @@ -1226,9 +1145,7 @@ static inline int slab_pad_check(struct kmem_cache *s, struct page *page) { return 1; } static inline int check_object(struct kmem_cache *s, struct page *page, void *object, u8 val) { return 1; } -static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, - struct page *page) {} -static inline void remove_full(struct kmem_cache *s, struct page *page) {} +static inline void add_full(struct kmem_cache_node *n, struct page *page) {} static inline unsigned long kmem_cache_flags(unsigned long objsize, unsigned long flags, const char *name, void (*ctor)(void *)) @@ -1280,11 +1197,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) struct kmem_cache_order_objects oo = s->oo; gfp_t alloc_gfp; - flags &= gfp_allowed_mask; - - if (flags & __GFP_WAIT) - local_irq_enable(); - flags |= s->allocflags; /* @@ -1301,17 +1213,12 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) * Try a lower order alloc if possible */ page = alloc_slab_page(flags, node, oo); + if (!page) + return NULL; - if (page) - stat(s, ORDER_FALLBACK); + stat(s, ORDER_FALLBACK); } - if (flags & __GFP_WAIT) - local_irq_disable(); - - if (!page) - return NULL; - if (kmemcheck_enabled && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { int pages = 1 << oo_order(oo); @@ -1378,8 +1285,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) set_freepointer(s, last, NULL); page->freelist = start; - page->inuse = page->objects; - page->frozen = 1; + page->inuse = 0; out: return page; } @@ -1457,84 +1363,79 @@ static void discard_slab(struct kmem_cache *s, struct page *page) } /* - * Management of partially allocated slabs. - * - * list_lock must be held. + * Per slab locking using the pagelock */ -static inline void add_partial(struct kmem_cache_node *n, +static __always_inline void slab_lock(struct page *page) +{ + bit_spin_lock(PG_locked, &page->flags); +} + +static __always_inline void slab_unlock(struct page *page) +{ + __bit_spin_unlock(PG_locked, &page->flags); +} + +static __always_inline int slab_trylock(struct page *page) +{ + int rc = 1; + + rc = bit_spin_trylock(PG_locked, &page->flags); + return rc; +} + +/* + * Management of partially allocated slabs + */ +static void add_partial(struct kmem_cache_node *n, struct page *page, int tail) { + spin_lock(&n->list_lock); n->nr_partial++; - if (tail == DEACTIVATE_TO_TAIL) + if (tail) list_add_tail(&page->lru, &n->partial); else list_add(&page->lru, &n->partial); + spin_unlock(&n->list_lock); } -/* - * list_lock must be held. - */ -static inline void remove_partial(struct kmem_cache_node *n, +static inline void __remove_partial(struct kmem_cache_node *n, struct page *page) { list_del(&page->lru); n->nr_partial--; } +static void remove_partial(struct kmem_cache *s, struct page *page) +{ + struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + + spin_lock(&n->list_lock); + __remove_partial(n, page); + spin_unlock(&n->list_lock); +} + /* - * Lock slab, remove from the partial list and put the object into the - * per cpu freelist. - * - * Returns a list of objects or NULL if it fails. + * Lock slab and remove from the partial list. * * Must hold list_lock. */ -static inline void *acquire_slab(struct kmem_cache *s, - struct kmem_cache_node *n, struct page *page, - int mode) +static inline int lock_and_freeze_slab(struct kmem_cache_node *n, + struct page *page) { - void *freelist; - unsigned long counters; - struct page new; - - /* - * Zap the freelist and set the frozen bit. - * The old freelist is the list of objects for the - * per cpu allocation list. - */ - do { - freelist = page->freelist; - counters = page->counters; - new.counters = counters; - if (mode) { - new.inuse = page->objects; - new.freelist = NULL; - } else { - new.freelist = freelist; - } - - VM_BUG_ON(new.frozen); - new.frozen = 1; - - } while (!__cmpxchg_double_slab(s, page, - freelist, counters, - new.freelist, new.counters, - "lock and freeze")); - - remove_partial(n, page); - return freelist; + if (slab_trylock(page)) { + __remove_partial(n, page); + __SetPageSlubFrozen(page); + return 1; + } + return 0; } -static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); - /* * Try to allocate a partial slab from a specific node. */ -static void *get_partial_node(struct kmem_cache *s, - struct kmem_cache_node *n, struct kmem_cache_cpu *c) +static struct page *get_partial_node(struct kmem_cache_node *n) { - struct page *page, *page2; - void *object = NULL; + struct page *page; /* * Racy check. If we mistakenly see no partial slabs then we @@ -1546,42 +1447,26 @@ static void *get_partial_node(struct kmem_cache *s, return NULL; spin_lock(&n->list_lock); - list_for_each_entry_safe(page, page2, &n->partial, lru) { - void *t = acquire_slab(s, n, page, object == NULL); - int available; - - if (!t) - break; - - if (!object) { - c->page = page; - c->node = page_to_nid(page); - stat(s, ALLOC_FROM_PARTIAL); - object = t; - available = page->objects - page->inuse; - } else { - available = put_cpu_partial(s, page, 0); - } - if (kmem_cache_debug(s) || available > s->cpu_partial / 2) - break; - - } + list_for_each_entry(page, &n->partial, lru) + if (lock_and_freeze_slab(n, page)) + goto out; + page = NULL; +out: spin_unlock(&n->list_lock); - return object; + return page; } /* * Get a page from somewhere. Search in increasing NUMA distances. */ -static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, - struct kmem_cache_cpu *c) +static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) { #ifdef CONFIG_NUMA struct zonelist *zonelist; struct zoneref *z; struct zone *zone; enum zone_type high_zoneidx = gfp_zone(flags); - void *object; + struct page *page; unsigned int cpuset_mems_cookie; /* @@ -1608,7 +1493,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, do { cpuset_mems_cookie = get_mems_allowed(); - zonelist = node_zonelist(slab_node(), flags); + zonelist = node_zonelist(slab_node(current->mempolicy), flags); for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { struct kmem_cache_node *n; @@ -1616,8 +1501,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, if (n && cpuset_zone_allowed_hardwall(zone, flags) && n->nr_partial > s->min_partial) { - object = get_partial_node(s, n, c); - if (object) { + page = get_partial_node(n); + if (page) { /* * Return the object even if * put_mems_allowed indicated that @@ -1627,7 +1512,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, * and the cpuset update. */ put_mems_allowed(cpuset_mems_cookie); - return object; + return page; } } } @@ -1639,17 +1524,63 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags, /* * Get a partial page, lock it and return it. */ -static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, - struct kmem_cache_cpu *c) +static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) { - void *object; + struct page *page; int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; - object = get_partial_node(s, get_node(s, searchnode), c); - if (object || node != NUMA_NO_NODE) - return object; + page = get_partial_node(get_node(s, searchnode)); + if (page || node != NUMA_NO_NODE) + return page; - return get_any_partial(s, flags, c); + return get_any_partial(s, flags); +} + +/* + * Move a page back to the lists. + * + * Must be called with the slab lock held. + * + * On exit the slab lock will have been dropped. + */ +static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) + __releases(bitlock) +{ + struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + + __ClearPageSlubFrozen(page); + if (page->inuse) { + + if (page->freelist) { + add_partial(n, page, tail); + stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); + } else { + stat(s, DEACTIVATE_FULL); + if (kmem_cache_debug(s) && (s->flags & SLAB_STORE_USER)) + add_full(n, page); + } + slab_unlock(page); + } else { + stat(s, DEACTIVATE_EMPTY); + if (n->nr_partial < s->min_partial) { + /* + * Adding an empty slab to the partial slabs in order + * to avoid page allocator overhead. This slab needs + * to come after the other slabs with objects in + * so that the others get filled first. That way the + * size of the partial list stays small. + * + * kmem_cache_shrink can reclaim any empty slabs from + * the partial list. + */ + add_partial(n, page, 1); + slab_unlock(page); + } else { + slab_unlock(page); + stat(s, FREE_SLAB); + discard_slab(s, page); + } + } } #ifdef CONFIG_PREEMPT @@ -1718,270 +1649,45 @@ void init_kmem_cache_cpus(struct kmem_cache *s) for_each_possible_cpu(cpu) per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); } - /* * Remove the cpu slab */ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) + __releases(bitlock) { - enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; struct page *page = c->page; - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); - int lock = 0; - enum slab_modes l = M_NONE, m = M_NONE; - void *freelist; - void *nextfree; - int tail = DEACTIVATE_TO_HEAD; - struct page new; - struct page old; - - if (page->freelist) { - stat(s, DEACTIVATE_REMOTE_FREES); - tail = DEACTIVATE_TO_TAIL; - } - - c->tid = next_tid(c->tid); - c->page = NULL; - freelist = c->freelist; - c->freelist = NULL; - - /* - * Stage one: Free all available per cpu objects back - * to the page freelist while it is still frozen. Leave the - * last one. - * - * There is no need to take the list->lock because the page - * is still frozen. - */ - while (freelist && (nextfree = get_freepointer(s, freelist))) { - void *prior; - unsigned long counters; - - do { - prior = page->freelist; - counters = page->counters; - set_freepointer(s, freelist, prior); - new.counters = counters; - new.inuse--; - VM_BUG_ON(!new.frozen); - - } while (!__cmpxchg_double_slab(s, page, - prior, counters, - freelist, new.counters, - "drain percpu freelist")); - - freelist = nextfree; - } + int tail = 1; + if (page->freelist) + stat(s, DEACTIVATE_REMOTE_FREES); /* - * Stage two: Ensure that the page is unfrozen while the - * list presence reflects the actual number of objects - * during unfreeze. - * - * We setup the list membership and then perform a cmpxchg - * with the count. If there is a mismatch then the page - * is not unfrozen but the page is on the wrong list. - * - * Then we restart the process which may have to remove - * the page from the list that we just put it on again - * because the number of objects in the slab may have - * changed. + * Merge cpu freelist into slab freelist. Typically we get here + * because both freelists are empty. So this is unlikely + * to occur. */ -redo: - - old.freelist = page->freelist; - old.counters = page->counters; - VM_BUG_ON(!old.frozen); - - /* Determine target state of the slab */ - new.counters = old.counters; - if (freelist) { - new.inuse--; - set_freepointer(s, freelist, old.freelist); - new.freelist = freelist; - } else - new.freelist = old.freelist; - - new.frozen = 0; + while (unlikely(c->freelist)) { + void **object; - if (!new.inuse && n->nr_partial > s->min_partial) - m = M_FREE; - else if (new.freelist) { - m = M_PARTIAL; - if (!lock) { - lock = 1; - /* - * Taking the spinlock removes the possiblity - * that acquire_slab() will see a slab page that - * is frozen - */ - spin_lock(&n->list_lock); - } - } else { - m = M_FULL; - if (kmem_cache_debug(s) && !lock) { - lock = 1; - /* - * This also ensures that the scanning of full - * slabs from diagnostic functions will not see - * any frozen slabs. - */ - spin_lock(&n->list_lock); - } - } - - if (l != m) { + tail = 0; /* Hot objects. Put the slab first */ - if (l == M_PARTIAL) + /* Retrieve object from cpu_freelist */ + object = c->freelist; + c->freelist = get_freepointer(s, c->freelist); - remove_partial(n, page); - - else if (l == M_FULL) - - remove_full(s, page); - - if (m == M_PARTIAL) { - - add_partial(n, page, tail); - stat(s, tail); - - } else if (m == M_FULL) { - - stat(s, DEACTIVATE_FULL); - add_full(s, n, page); - - } - } - - l = m; - if (!__cmpxchg_double_slab(s, page, - old.freelist, old.counters, - new.freelist, new.counters, - "unfreezing slab")) - goto redo; - - if (lock) - spin_unlock(&n->list_lock); - - if (m == M_FREE) { - stat(s, DEACTIVATE_EMPTY); - discard_slab(s, page); - stat(s, FREE_SLAB); + /* And put onto the regular freelist */ + set_freepointer(s, object, page->freelist); + page->freelist = object; + page->inuse--; } -} - -/* Unfreeze all the cpu partial slabs */ -static void unfreeze_partials(struct kmem_cache *s) -{ - struct kmem_cache_node *n = NULL, *n2 = NULL; - struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); - struct page *page, *discard_page = NULL; - - while ((page = c->partial)) { - struct page new; - struct page old; - - c->partial = page->next; - - n2 = get_node(s, page_to_nid(page)); - if (n != n2) { - if (n) - spin_unlock(&n->list_lock); - - n = n2; - spin_lock(&n->list_lock); - } - - do { - - old.freelist = page->freelist; - old.counters = page->counters; - VM_BUG_ON(!old.frozen); - - new.counters = old.counters; - new.freelist = old.freelist; - - new.frozen = 0; - - } while (!cmpxchg_double_slab(s, page, - old.freelist, old.counters, - new.freelist, new.counters, - "unfreezing slab")); - - if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) { - page->next = discard_page; - discard_page = page; - } else { - add_partial(n, page, DEACTIVATE_TO_TAIL); - stat(s, FREE_ADD_PARTIAL); - } - } - - if (n) - spin_unlock(&n->list_lock); - - while (discard_page) { - page = discard_page; - discard_page = discard_page->next; - - stat(s, DEACTIVATE_EMPTY); - discard_slab(s, page); - stat(s, FREE_SLAB); - } -} - -/* - * Put a page that was just frozen (in __slab_free) into a partial page - * slot if available. This is done without interrupts disabled and without - * preemption disabled. The cmpxchg is racy and may put the partial page - * onto a random cpus partial slot. - * - * If we did not find a slot then simply move all the partials to the - * per node partial list. - */ -int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) -{ - struct page *oldpage; - int pages; - int pobjects; - - do { - pages = 0; - pobjects = 0; - oldpage = this_cpu_read(s->cpu_slab->partial); - - if (oldpage) { - pobjects = oldpage->pobjects; - pages = oldpage->pages; - if (drain && pobjects > s->cpu_partial) { - unsigned long flags; - /* - * partial array is full. Move the existing - * set to the per node partial list. - */ - local_irq_save(flags); - unfreeze_partials(s); - local_irq_restore(flags); - pobjects = 0; - pages = 0; - } - } - - pages++; - pobjects += page->objects - page->inuse; - - page->pages = pages; - page->pobjects = pobjects; - page->next = oldpage; - - } while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); - stat(s, CPU_PARTIAL_FREE); - return pobjects; + c->page = NULL; + c->tid = next_tid(c->tid); + unfreeze_slab(s, page, tail); } static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { stat(s, CPUSLAB_FLUSH); + slab_lock(c->page); deactivate_slab(s, c); } @@ -1994,12 +1700,8 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); - if (likely(c)) { - if (c->page) - flush_slab(s, c); - - unfreeze_partials(s); - } + if (likely(c && c->page)) + flush_slab(s, c); } static void flush_cpu_slab(void *d) @@ -2090,39 +1792,12 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) } } -static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, - int node, struct kmem_cache_cpu **pc) -{ - void *object; - struct kmem_cache_cpu *c; - struct page *page = new_slab(s, flags, node); - - if (page) { - c = __this_cpu_ptr(s->cpu_slab); - if (c->page) - flush_slab(s, c); - - /* - * No other reference to the page yet so we can - * muck around with it freely without cmpxchg - */ - object = page->freelist; - page->freelist = NULL; - - stat(s, ALLOC_SLAB); - c->node = page_to_nid(page); - c->page = page; - *pc = c; - } else - object = NULL; - - return object; -} - /* * Slow path. The lockless freelist is empty or we need to perform * debugging duties. * + * Interrupts are disabled. + * * Processing is still very fast if new objects have been freed to the * regular freelist. In that case we simply take over the regular freelist * as the lockless freelist and zap the regular freelist. @@ -2139,9 +1814,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c) { void **object; + struct page *page; unsigned long flags; - struct page new; - unsigned long counters; local_irq_save(flags); #ifdef CONFIG_PREEMPT @@ -2153,96 +1827,87 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, c = this_cpu_ptr(s->cpu_slab); #endif - if (!c->page) - goto new_slab; -redo: - if (unlikely(!node_match(c, node))) { - stat(s, ALLOC_NODE_MISMATCH); - deactivate_slab(s, c); + /* We handle __GFP_ZERO in the caller */ + gfpflags &= ~__GFP_ZERO; + + page = c->page; + if (!page) goto new_slab; - } + + slab_lock(page); + if (unlikely(!node_match(c, node))) + goto another_slab; /* must check again c->freelist in case of cpu migration or IRQ */ object = c->freelist; if (object) - goto load_freelist; - - stat(s, ALLOC_SLOWPATH); - - do { - object = c->page->freelist; - counters = c->page->counters; - new.counters = counters; - VM_BUG_ON(!new.frozen); - - /* - * If there is no object left then we use this loop to - * deactivate the slab which is simple since no objects - * are left in the slab and therefore we do not need to - * put the page back onto the partial list. - * - * If there are objects left then we retrieve them - * and use them to refill the per cpu queue. - */ - - new.inuse = c->page->objects; - new.frozen = object != NULL; - - } while (!__cmpxchg_double_slab(s, c->page, - object, counters, - NULL, new.counters, - "__slab_alloc")); - - if (!object) { - c->page = NULL; - stat(s, DEACTIVATE_BYPASS); - goto new_slab; - } + goto update_freelist; stat(s, ALLOC_REFILL); load_freelist: + object = page->freelist; + if (unlikely(!object)) + goto another_slab; + if (kmem_cache_debug(s)) + goto debug; + +update_freelist: c->freelist = get_freepointer(s, object); + page->inuse = page->objects; + page->freelist = NULL; + + slab_unlock(page); c->tid = next_tid(c->tid); local_irq_restore(flags); + stat(s, ALLOC_SLOWPATH); return object; -new_slab: +another_slab: + deactivate_slab(s, c); - if (c->partial) { - c->page = c->partial; - c->partial = c->page->next; - c->node = page_to_nid(c->page); - stat(s, CPU_PARTIAL_ALLOC); - c->freelist = NULL; - goto redo; +new_slab: + page = get_partial(s, gfpflags, node); + if (page) { + stat(s, ALLOC_FROM_PARTIAL); + c->node = page_to_nid(page); + c->page = page; + goto load_freelist; } - /* Then do expensive stuff like retrieving pages from the partial lists */ - object = get_partial(s, gfpflags, node, c); - - if (unlikely(!object)) { + gfpflags &= gfp_allowed_mask; + if (gfpflags & __GFP_WAIT) + local_irq_enable(); - object = new_slab_objects(s, gfpflags, node, &c); + page = new_slab(s, gfpflags, node); - if (unlikely(!object)) { - if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) - slab_out_of_memory(s, gfpflags, node); + if (gfpflags & __GFP_WAIT) + local_irq_disable(); - local_irq_restore(flags); - return NULL; - } - } + if (page) { + c = __this_cpu_ptr(s->cpu_slab); + stat(s, ALLOC_SLAB); + if (c->page) + flush_slab(s, c); - if (likely(!kmem_cache_debug(s))) + slab_lock(page); + __SetPageSlubFrozen(page); + c->node = page_to_nid(page); + c->page = page; goto load_freelist; + } + if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) + slab_out_of_memory(s, gfpflags, node); + local_irq_restore(flags); + return NULL; +debug: + if (!alloc_debug_processing(s, page, object, addr)) + goto another_slab; - /* Only entered in the debug case */ - if (!alloc_debug_processing(s, c->page, object, addr)) - goto new_slab; /* Slab failed checks. Next slab needed */ - - c->freelist = get_freepointer(s, object); + page->inuse++; + page->freelist = get_freepointer(s, object); deactivate_slab(s, c); + c->page = NULL; c->node = NUMA_NO_NODE; local_irq_restore(flags); return object; @@ -2392,110 +2057,52 @@ static void __slab_free(struct kmem_cache *s, struct page *page, { void *prior; void **object = (void *)x; - int was_frozen; - int inuse; - struct page new; - unsigned long counters; - struct kmem_cache_node *n = NULL; - unsigned long uninitialized_var(flags); + unsigned long flags; + local_irq_save(flags); + slab_lock(page); stat(s, FREE_SLOWPATH); if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr)) - return; - - do { - prior = page->freelist; - counters = page->counters; - set_freepointer(s, object, prior); - new.counters = counters; - was_frozen = new.frozen; - new.inuse--; - if ((!new.inuse || !prior) && !was_frozen && !n) { - - if (!kmem_cache_debug(s) && !prior) - - /* - * Slab was on no list before and will be partially empty - * We can defer the list move and instead freeze it. - */ - new.frozen = 1; - - else { /* Needs to be taken off a list */ - - n = get_node(s, page_to_nid(page)); - /* - * Speculatively acquire the list_lock. - * If the cmpxchg does not succeed then we may - * drop the list_lock without any processing. - * - * Otherwise the list_lock will synchronize with - * other processors updating the list of slabs. - */ - spin_lock_irqsave(&n->list_lock, flags); - - } - } - inuse = new.inuse; - - } while (!cmpxchg_double_slab(s, page, - prior, counters, - object, new.counters, - "__slab_free")); + goto out_unlock; - if (likely(!n)) { + prior = page->freelist; + set_freepointer(s, object, prior); + page->freelist = object; + page->inuse--; - /* - * If we just froze the page then put it onto the - * per cpu partial list. - */ - if (new.frozen && !was_frozen) - put_cpu_partial(s, page, 1); + if (unlikely(PageSlubFrozen(page))) { + stat(s, FREE_FROZEN); + goto out_unlock; + } - /* - * The list lock was not taken therefore no list - * activity can be necessary. - */ - if (was_frozen) - stat(s, FREE_FROZEN); - return; - } + if (unlikely(!page->inuse)) + goto slab_empty; /* - * was_frozen may have been set after we acquired the list_lock in - * an earlier loop. So we need to check it here again. + * Objects left in the slab. If it was not on the partial list before + * then add it. */ - if (was_frozen) - stat(s, FREE_FROZEN); - else { - if (unlikely(!inuse && n->nr_partial > s->min_partial)) - goto slab_empty; - - /* - * Objects left in the slab. If it was not on the partial list before - * then add it. - */ - if (unlikely(!prior)) { - remove_full(s, page); - add_partial(n, page, DEACTIVATE_TO_TAIL); - stat(s, FREE_ADD_PARTIAL); - } + if (unlikely(!prior)) { + add_partial(get_node(s, page_to_nid(page)), page, 1); + stat(s, FREE_ADD_PARTIAL); } - spin_unlock_irqrestore(&n->list_lock, flags); + +out_unlock: + slab_unlock(page); + local_irq_restore(flags); return; slab_empty: if (prior) { /* - * Slab on the partial list. + * Slab still on the partial list. */ - remove_partial(n, page); + remove_partial(s, page); stat(s, FREE_REMOVE_PARTIAL); - } else - /* Slab must be on the full list */ - remove_full(s, page); - - spin_unlock_irqrestore(&n->list_lock, flags); + } + slab_unlock(page); + local_irq_restore(flags); stat(s, FREE_SLAB); discard_slab(s, page); } @@ -2521,6 +2128,7 @@ static __always_inline void slab_free(struct kmem_cache *s, slab_free_hook(s, x); redo: + /* * Determine the currently cpus per cpu slab. * The cpu may change afterward. However that does not matter since @@ -2768,6 +2376,7 @@ static void early_kmem_cache_node_alloc(int node) { struct page *page; struct kmem_cache_node *n; + unsigned long flags; BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); @@ -2784,8 +2393,7 @@ static void early_kmem_cache_node_alloc(int node) n = page->freelist; BUG_ON(!n); page->freelist = get_freepointer(kmem_cache_node, n); - page->inuse = 1; - page->frozen = 0; + page->inuse++; kmem_cache_node->node[node] = n; #ifdef CONFIG_SLUB_DEBUG init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); @@ -2794,7 +2402,14 @@ static void early_kmem_cache_node_alloc(int node) init_kmem_cache_node(n, kmem_cache_node); inc_slabs_node(kmem_cache_node, node, page->objects); - add_partial(n, page, DEACTIVATE_TO_HEAD); + /* + * lockdep requires consistent irq usage for each lock + * so even though there cannot be a race this early in + * the boot sequence, we still disable irqs. + */ + local_irq_save(flags); + add_partial(n, page, 0); + local_irq_restore(flags); } static void free_kmem_cache_nodes(struct kmem_cache *s) @@ -3000,44 +2615,11 @@ static int kmem_cache_open(struct kmem_cache *s, } } -#ifdef CONFIG_CMPXCHG_DOUBLE - if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0) - /* Enable fast mode */ - s->flags |= __CMPXCHG_DOUBLE; -#endif - /* * The larger the object size is, the more pages we want on the partial * list to avoid pounding the page allocator excessively. */ - set_min_partial(s, ilog2(s->size) / 2); - - /* - * cpu_partial determined the maximum number of objects kept in the - * per cpu partial lists of a processor. - * - * Per cpu partial lists mainly contain slabs that just have one - * object freed. If they are used for allocation then they can be - * filled up again with minimal effort. The slab will never hit the - * per node partial lists and therefore no locking will be required. - * - * This setting also determines - * - * A) The number of objects from per cpu partial slabs dumped to the - * per node list when we reach the limit. - * B) The number of objects in cpu partial slabs to extract from the - * per node list when we run out of per cpu objects. We only fetch 50% - * to keep some capacity around for frees. - */ - if (s->size >= PAGE_SIZE) - s->cpu_partial = 2; - else if (s->size >= 1024) - s->cpu_partial = 6; - else if (s->size >= 256) - s->cpu_partial = 13; - else - s->cpu_partial = 30; - + set_min_partial(s, ilog2(s->size)); s->refcount = 1; #ifdef CONFIG_NUMA s->remote_node_defrag_ratio = 1000; @@ -3096,22 +2678,23 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, /* * Attempt to free all partial slabs on a node. - * This is called from kmem_cache_close(). We must be the last thread - * using the cache and therefore we do not need to lock anymore. */ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) { + unsigned long flags; struct page *page, *h; + spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry_safe(page, h, &n->partial, lru) { if (!page->inuse) { - remove_partial(n, page); + __remove_partial(n, page); discard_slab(s, page); } else { list_slab_objects(s, page, "Objects remaining on kmem_cache_close()"); } } + spin_unlock_irqrestore(&n->list_lock, flags); } /* @@ -3145,7 +2728,6 @@ void kmem_cache_destroy(struct kmem_cache *s) s->refcount--; if (!s->refcount) { list_del(&s->list); - up_write(&slub_lock); if (kmem_cache_close(s)) { printk(KERN_ERR "SLUB %s: %s called for cache that " "still has objects.\n", s->name, __func__); @@ -3154,8 +2736,8 @@ void kmem_cache_destroy(struct kmem_cache *s) if (s->flags & SLAB_DESTROY_BY_RCU) rcu_barrier(); sysfs_slab_remove(s); - } else - up_write(&slub_lock); + } + up_write(&slub_lock); } EXPORT_SYMBOL(kmem_cache_destroy); @@ -3372,42 +2954,6 @@ size_t ksize(const void *object) } EXPORT_SYMBOL(ksize); -#ifdef CONFIG_SLUB_DEBUG -bool verify_mem_not_deleted(const void *x) -{ - struct page *page; - void *object = (void *)x; - unsigned long flags; - bool rv; - - if (unlikely(ZERO_OR_NULL_PTR(x))) - return false; - - local_irq_save(flags); - - page = virt_to_head_page(x); - if (unlikely(!PageSlab(page))) { - /* maybe it was from stack? */ - rv = true; - goto out_unlock; - } - - slab_lock(page); - if (on_freelist(page->slab, page, object)) { - object_err(page->slab, page, object, "Object is on free-list"); - rv = false; - } else { - rv = true; - } - slab_unlock(page); - -out_unlock: - local_irq_restore(flags); - return rv; -} -EXPORT_SYMBOL(verify_mem_not_deleted); -#endif - void kfree(const void *x) { struct page *page; @@ -3473,23 +3019,29 @@ int kmem_cache_shrink(struct kmem_cache *s) * list_lock. page->inuse here is the upper limit. */ list_for_each_entry_safe(page, t, &n->partial, lru) { - list_move(&page->lru, slabs_by_inuse + page->inuse); - if (!page->inuse) - n->nr_partial--; + if (!page->inuse && slab_trylock(page)) { + /* + * Must hold slab lock here because slab_free + * may have freed the last object and be + * waiting to release the slab. + */ + __remove_partial(n, page); + slab_unlock(page); + discard_slab(s, page); + } else { + list_move(&page->lru, + slabs_by_inuse + page->inuse); + } } /* * Rebuild the partial list with the slabs filled up most * first and the least used slabs at the end. */ - for (i = objects - 1; i > 0; i--) + for (i = objects - 1; i >= 0; i--) list_splice(slabs_by_inuse + i, n->partial.prev); spin_unlock_irqrestore(&n->list_lock, flags); - - /* Release empty slabs */ - list_for_each_entry_safe(page, t, slabs_by_inuse, lru) - discard_slab(s, page); } kfree(slabs_by_inuse); @@ -4063,9 +3615,12 @@ static int validate_slab(struct kmem_cache *s, struct page *page, static void validate_slab_slab(struct kmem_cache *s, struct page *page, unsigned long *map) { - slab_lock(page); - validate_slab(s, page, map); - slab_unlock(page); + if (slab_trylock(page)) { + validate_slab(s, page, map); + slab_unlock(page); + } else + printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n", + s->name, page); } static int validate_slab_node(struct kmem_cache *s, @@ -4446,37 +4001,22 @@ static ssize_t show_slab_objects(struct kmem_cache *s, for_each_possible_cpu(cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); - int node = ACCESS_ONCE(c->node); - struct page *page; - if (node < 0) + if (!c || c->node < 0) continue; - page = ACCESS_ONCE(c->page); - if (page) { - if (flags & SO_TOTAL) - x = page->objects; + + if (c->page) { + if (flags & SO_TOTAL) + x = c->page->objects; else if (flags & SO_OBJECTS) - x = page->inuse; + x = c->page->inuse; else x = 1; total += x; - nodes[node] += x; + nodes[c->node] += x; } - page = c->partial; - - if (page) { - node = page_to_nid(page); - if (flags & SO_TOTAL) - WARN_ON_ONCE(1); - else if (flags & SO_OBJECTS) - WARN_ON_ONCE(1); - else - x = page->pages; - total += x; - nodes[node] += x; - } - per_cpu[node]++; + per_cpu[c->node]++; } } @@ -4545,7 +4085,7 @@ static int any_slab_objects(struct kmem_cache *s) #endif #define to_slab_attr(n) container_of(n, struct slab_attribute, attr) -#define to_slab(n) container_of(n, struct kmem_cache, kobj) +#define to_slab(n) container_of(n, struct kmem_cache, kobj); struct slab_attribute { struct attribute attr; @@ -4554,12 +4094,11 @@ struct slab_attribute { }; #define SLAB_ATTR_RO(_name) \ - static struct slab_attribute _name##_attr = \ - __ATTR(_name, 0400, _name##_show, NULL) + static struct slab_attribute _name##_attr = __ATTR_RO(_name) #define SLAB_ATTR(_name) \ static struct slab_attribute _name##_attr = \ - __ATTR(_name, 0600, _name##_show, _name##_store) + __ATTR(_name, 0644, _name##_show, _name##_store) static ssize_t slab_size_show(struct kmem_cache *s, char *buf) { @@ -4628,27 +4167,6 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, } SLAB_ATTR(min_partial); -static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) -{ - return sprintf(buf, "%u\n", s->cpu_partial); -} - -static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, - size_t length) -{ - unsigned long objects; - int err; - - err = strict_strtoul(buf, 10, &objects); - if (err) - return err; - - s->cpu_partial = objects; - flush_all(s); - return length; -} -SLAB_ATTR(cpu_partial); - static ssize_t ctor_show(struct kmem_cache *s, char *buf) { if (!s->ctor) @@ -4687,37 +4205,6 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf) } SLAB_ATTR_RO(objects_partial); -static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) -{ - int objects = 0; - int pages = 0; - int cpu; - int len; - - for_each_online_cpu(cpu) { - struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial; - - if (page) { - pages += page->pages; - objects += page->pobjects; - } - } - - len = sprintf(buf, "%d(%d)", objects, pages); - -#ifdef CONFIG_SMP - for_each_online_cpu(cpu) { - struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial; - - if (page && len < PAGE_SIZE - 20) - len += sprintf(buf + len, " C%d=%d(%d)", cpu, - page->pobjects, page->pages); - } -#endif - return len + sprintf(buf + len, "\n"); -} -SLAB_ATTR_RO(slabs_cpu_partial); - static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); @@ -4781,10 +4268,8 @@ static ssize_t sanity_checks_store(struct kmem_cache *s, const char *buf, size_t length) { s->flags &= ~SLAB_DEBUG_FREE; - if (buf[0] == '1') { - s->flags &= ~__CMPXCHG_DOUBLE; + if (buf[0] == '1') s->flags |= SLAB_DEBUG_FREE; - } return length; } SLAB_ATTR(sanity_checks); @@ -4798,10 +4283,8 @@ static ssize_t trace_store(struct kmem_cache *s, const char *buf, size_t length) { s->flags &= ~SLAB_TRACE; - if (buf[0] == '1') { - s->flags &= ~__CMPXCHG_DOUBLE; + if (buf[0] == '1') s->flags |= SLAB_TRACE; - } return length; } SLAB_ATTR(trace); @@ -4818,10 +4301,8 @@ static ssize_t red_zone_store(struct kmem_cache *s, return -EBUSY; s->flags &= ~SLAB_RED_ZONE; - if (buf[0] == '1') { - s->flags &= ~__CMPXCHG_DOUBLE; + if (buf[0] == '1') s->flags |= SLAB_RED_ZONE; - } calculate_sizes(s, -1); return length; } @@ -4839,10 +4320,8 @@ static ssize_t poison_store(struct kmem_cache *s, return -EBUSY; s->flags &= ~SLAB_POISON; - if (buf[0] == '1') { - s->flags &= ~__CMPXCHG_DOUBLE; + if (buf[0] == '1') s->flags |= SLAB_POISON; - } calculate_sizes(s, -1); return length; } @@ -4860,10 +4339,8 @@ static ssize_t store_user_store(struct kmem_cache *s, return -EBUSY; s->flags &= ~SLAB_STORE_USER; - if (buf[0] == '1') { - s->flags &= ~__CMPXCHG_DOUBLE; + if (buf[0] == '1') s->flags |= SLAB_STORE_USER; - } calculate_sizes(s, -1); return length; } @@ -5028,7 +4505,6 @@ STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial); STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial); STAT_ATTR(ALLOC_SLAB, alloc_slab); STAT_ATTR(ALLOC_REFILL, alloc_refill); -STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch); STAT_ATTR(FREE_SLAB, free_slab); STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush); STAT_ATTR(DEACTIVATE_FULL, deactivate_full); @@ -5036,12 +4512,7 @@ STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty); STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); -STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass); STAT_ATTR(ORDER_FALLBACK, order_fallback); -STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); -STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); -STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc); -STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free); #endif static struct attribute *slab_attrs[] = { @@ -5050,7 +4521,6 @@ static struct attribute *slab_attrs[] = { &objs_per_slab_attr.attr, &order_attr.attr, &min_partial_attr.attr, - &cpu_partial_attr.attr, &objects_attr.attr, &objects_partial_attr.attr, &partial_attr.attr, @@ -5063,7 +4533,6 @@ static struct attribute *slab_attrs[] = { &destroy_by_rcu_attr.attr, &shrink_attr.attr, &reserved_attr.attr, - &slabs_cpu_partial_attr.attr, #ifdef CONFIG_SLUB_DEBUG &total_objects_attr.attr, &slabs_attr.attr, @@ -5093,7 +4562,6 @@ static struct attribute *slab_attrs[] = { &alloc_from_partial_attr.attr, &alloc_slab_attr.attr, &alloc_refill_attr.attr, - &alloc_node_mismatch_attr.attr, &free_slab_attr.attr, &cpuslab_flush_attr.attr, &deactivate_full_attr.attr, @@ -5101,12 +4569,7 @@ static struct attribute *slab_attrs[] = { &deactivate_to_head_attr.attr, &deactivate_to_tail_attr.attr, &deactivate_remote_frees_attr.attr, - &deactivate_bypass_attr.attr, &order_fallback_attr.attr, - &cmpxchg_double_fail_attr.attr, - &cmpxchg_double_cpu_fail_attr.attr, - &cpu_partial_alloc_attr.attr, - &cpu_partial_free_attr.attr, #endif #ifdef CONFIG_FAILSLAB &failslab_attr.attr, @@ -5458,7 +4921,7 @@ static const struct file_operations proc_slabinfo_operations = { static int __init slab_proc_init(void) { - proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations); + proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations); return 0; } module_init(slab_proc_init); |