diff options
Diffstat (limited to 'drivers/staging')
-rw-r--r-- | drivers/staging/Kconfig | 2 | ||||
-rw-r--r-- | drivers/staging/Makefile | 2 | ||||
-rw-r--r-- | drivers/staging/android/Kconfig | 17 | ||||
-rw-r--r-- | drivers/staging/android/binder.c | 2 | ||||
-rw-r--r-- | drivers/staging/android/lowmemorykiller.c | 616 | ||||
-rw-r--r-- | drivers/staging/tidspbridge/rmgr/drv_interface.c | 32 | ||||
-rw-r--r-- | drivers/staging/zcache/Kconfig | 10 | ||||
-rw-r--r-- | drivers/staging/zcache/Makefile | 2 | ||||
-rw-r--r-- | drivers/staging/zcache/tmem.c | 102 | ||||
-rw-r--r-- | drivers/staging/zcache/tmem.h | 25 | ||||
-rw-r--r-- | drivers/staging/zcache/zcache-main.c (renamed from drivers/staging/zcache/zcache.c) | 802 | ||||
-rw-r--r-- | drivers/staging/zram/Kconfig | 37 | ||||
-rw-r--r-- | drivers/staging/zram/Makefile | 4 | ||||
-rw-r--r-- | drivers/staging/zram/xvmalloc.c | 510 | ||||
-rw-r--r-- | drivers/staging/zram/xvmalloc.h | 30 | ||||
-rw-r--r-- | drivers/staging/zram/xvmalloc_int.h | 95 | ||||
-rw-r--r-- | drivers/staging/zram/zram.txt | 76 | ||||
-rw-r--r-- | drivers/staging/zram/zram_drv.c | 769 | ||||
-rw-r--r-- | drivers/staging/zram/zram_drv.h | 132 | ||||
-rw-r--r-- | drivers/staging/zram/zram_sysfs.c | 257 |
20 files changed, 984 insertions, 2538 deletions
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 11a4b5b..d0f6718 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -104,8 +104,6 @@ source "drivers/staging/iio/Kconfig" source "drivers/staging/cs5535_gpio/Kconfig" -source "drivers/staging/zram/Kconfig" - source "drivers/staging/zcache/Kconfig" source "drivers/staging/wlags49_h2/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index ae62e92..9400eb7 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -43,8 +43,6 @@ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_DX_SEP) += sep/ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_CS5535_GPIO) += cs5535_gpio/ -obj-$(CONFIG_ZRAM) += zram/ -obj-$(CONFIG_XVMALLOC) += zram/ obj-$(CONFIG_ZCACHE) += zcache/ obj-$(CONFIG_WLAGS49_H2) += wlags49_h2/ obj-$(CONFIG_WLAGS49_H25) += wlags49_h25/ diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index 2471949..30ad4d2 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -90,6 +90,23 @@ config ANDROID_LOW_MEMORY_KILLER ---help--- Register processes to be killed when memory is low +config ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES + bool "Android Low Memory Killer: detect oom_adj values" + depends on ANDROID_LOW_MEMORY_KILLER + default y + ---help--- + Detect oom_adj values written to + /sys/module/lowmemorykiller/parameters/adj and convert them + to oom_score_adj values. + +config ANDROID_LMK_ADJ_RBTREE + bool "Use RBTREE for Android Low Memory Killer" + depends on ANDROID_LOW_MEMORY_KILLER + default y + ---help--- + Use oom_score_adj rbtree to select the best proecss to kill + when system in low memory status. + endif # if ANDROID endmenu diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index e2b69f2..72db295 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -3346,7 +3346,7 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node) static void print_binder_ref(struct seq_file *m, struct binder_ref *ref) { - seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %p\n", + seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %pK\n", ref->debug_id, ref->desc, ref->node->proc ? "" : "dead ", ref->node->debug_id, ref->strong, ref->weak, ref->death); } diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 15bbcd3..5a9835b 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -1,16 +1,17 @@ /* drivers/misc/lowmemorykiller.c * * The lowmemorykiller driver lets user-space specify a set of memory thresholds - * where processes with a range of oom_adj values will get killed. Specify the - * minimum oom_adj values in /sys/module/lowmemorykiller/parameters/adj and the - * number of free pages in /sys/module/lowmemorykiller/parameters/minfree. Both - * files take a comma separated list of numbers in ascending order. + * where processes with a range of oom_score_adj values will get killed. Specify + * the minimum oom_score_adj values in + * /sys/module/lowmemorykiller/parameters/adj and the number of free pages in + * /sys/module/lowmemorykiller/parameters/minfree. Both files take a comma + * separated list of numbers in ascending order. * * For example, write "0,8" to /sys/module/lowmemorykiller/parameters/adj and - * "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill processes - * with a oom_adj value of 8 or higher when the free memory drops below 4096 pages - * and kill processes with a oom_adj value of 0 or higher when the free memory - * drops below 1024 pages. + * "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill + * processes with a oom_score_adj value of 8 or higher when the free memory + * drops below 4096 pages and kill processes with a oom_score_adj value of 0 or + * higher when the free memory drops below 1024 pages. * * The driver considers memory used for caches to be free, but if a large * percentage of the cached memory is locked this can be very inaccurate @@ -29,25 +30,18 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/oom.h> #include <linux/sched.h> -#include <linux/notifier.h> -#ifdef CONFIG_ZRAM_FOR_ANDROID #include <linux/swap.h> -#include <linux/device.h> -#include <linux/err.h> -#include <linux/mm_inline.h> -#endif /* CONFIG_ZRAM_FOR_ANDROID */ -#define ENHANCED_LMK_ROUTINE - -#ifdef ENHANCED_LMK_ROUTINE -#define LOWMEM_DEATHPENDING_DEPTH 3 -#endif +#include <linux/rcupdate.h> +#include <linux/notifier.h> -static uint32_t lowmem_debug_level = 2; +static uint32_t lowmem_debug_level = 1; static int lowmem_adj[6] = { 0, 1, @@ -55,54 +49,20 @@ static int lowmem_adj[6] = { 12, }; static int lowmem_adj_size = 4; -static size_t lowmem_minfree[6] = { +static int lowmem_minfree[6] = { 3 * 512, /* 6MB */ 2 * 1024, /* 8MB */ 4 * 1024, /* 16MB */ 16 * 1024, /* 64MB */ }; static int lowmem_minfree_size = 4; -#ifdef CONFIG_ZRAM_FOR_ANDROID -static struct class *lmk_class; -static struct device *lmk_dev; -static int lmk_kill_pid = 0; -static int lmk_kill_ok = 0; - -extern atomic_t optimize_comp_on; - -extern int isolate_lru_page_compcache(struct page *page); -extern void putback_lru_page(struct page *page); -extern unsigned int zone_id_shrink_pagelist(struct zone *zone_id,struct list_head *page_list); - -#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) - -#define SWAP_PROCESS_DEBUG_LOG 0 -/* free RAM 8M(2048 pages) */ -#define CHECK_FREE_MEMORY 2048 -/* free swap (10240 pages) */ -#define CHECK_FREE_SWAPSPACE 10240 - -static unsigned int check_free_memory = 0; - -enum pageout_io { - PAGEOUT_IO_ASYNC, - PAGEOUT_IO_SYNC, -}; - - -#endif /* CONFIG_ZRAM_FOR_ANDROID */ - -#ifdef ENHANCED_LMK_ROUTINE -static struct task_struct *lowmem_deathpending[LOWMEM_DEATHPENDING_DEPTH] = {NULL,}; -#else static struct task_struct *lowmem_deathpending; -#endif static unsigned long lowmem_deathpending_timeout; #define lowmem_print(level, x...) \ do { \ if (lowmem_debug_level >= (level)) \ - printk(x); \ + pr_info(x); \ } while (0) static int @@ -117,44 +77,31 @@ task_notify_func(struct notifier_block *self, unsigned long val, void *data) { struct task_struct *task = data; -#ifdef ENHANCED_LMK_ROUTINE - int i = 0; - for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) - if (task == lowmem_deathpending[i]) { - lowmem_deathpending[i] = NULL; - break; - } -#else if (task == lowmem_deathpending) lowmem_deathpending = NULL; -#endif return NOTIFY_OK; } +#ifdef CONFIG_ANDROID_LMK_ADJ_RBTREE +static struct task_struct *pick_next_from_adj_tree(struct task_struct *task); +static struct task_struct *pick_first_task(void); +static struct task_struct *pick_last_task(void); +#endif + static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { - struct task_struct *p; -#ifdef ENHANCED_LMK_ROUTINE - struct task_struct *selected[LOWMEM_DEATHPENDING_DEPTH] = {NULL,}; -#else + struct task_struct *tsk; struct task_struct *selected = NULL; -#endif int rem = 0; int tasksize; int i; - int min_adj = OOM_ADJUST_MAX + 1; -#ifdef ENHANCED_LMK_ROUTINE - int selected_tasksize[LOWMEM_DEATHPENDING_DEPTH] = {0,}; - int selected_oom_adj[LOWMEM_DEATHPENDING_DEPTH] = {OOM_ADJUST_MAX,}; - int all_selected_oom = 0; - int max_selected_oom_idx = 0; -#else + int min_score_adj = OOM_SCORE_ADJ_MAX + 1; + int minfree = 0; int selected_tasksize = 0; - int selected_oom_adj; -#endif + int selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); #ifndef CONFIG_DMA_CMA - int other_free = global_page_state(NR_FREE_PAGES); + int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; #else int other_free = global_page_state(NR_FREE_PAGES) - global_page_state(NR_FREE_CMA_PAGES); @@ -169,152 +116,111 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) * this pass. * */ -#ifdef ENHANCED_LMK_ROUTINE - for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { - if (lowmem_deathpending[i] && - time_before_eq(jiffies, lowmem_deathpending_timeout)) - return 0; - } -#else if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; -#endif if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { - if (other_free < lowmem_minfree[i] && - other_file < lowmem_minfree[i]) { - min_adj = lowmem_adj[i]; + minfree = lowmem_minfree[i]; + if (other_free < minfree && other_file < minfree) { + min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", - sc->nr_to_scan, sc->gfp_mask, other_free, other_file, - min_adj); + sc->nr_to_scan, sc->gfp_mask, other_free, + other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); - if (sc->nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) { + if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } -#ifdef ENHANCED_LMK_ROUTINE - for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) - selected_oom_adj[i] = min_adj; + selected_oom_score_adj = min_score_adj; + rcu_read_lock(); + +#ifdef CONFIG_ANDROID_LMK_ADJ_RBTREE + for (tsk = pick_first_task(); + tsk != pick_last_task() && tsk != NULL; + tsk = pick_next_from_adj_tree(tsk)) { #else - selected_oom_adj = min_adj; + for_each_process(tsk) { #endif + struct task_struct *p; + int oom_score_adj; + if (tsk->flags & PF_KTHREAD) + continue; - read_lock(&tasklist_lock); - for_each_process(p) { - struct mm_struct *mm; - struct signal_struct *sig; - int oom_adj; -#ifdef ENHANCED_LMK_ROUTINE - int is_exist_oom_task = 0; -#endif - task_lock(p); - mm = p->mm; - sig = p->signal; - if (!mm || !sig) { + p = find_lock_task_mm(tsk); + if (!p) + continue; + + oom_score_adj = p->signal->oom_score_adj; + if (oom_score_adj < min_score_adj) { task_unlock(p); +#ifdef CONFIG_ANDROID_LMK_ADJ_RBTREE + break; +#else continue; +#endif } - oom_adj = sig->oom_adj; - if (oom_adj < min_adj) { + if (fatal_signal_pending(p)) { + lowmem_print(2, "skip slow dying process %d\n", p->pid); task_unlock(p); continue; } - tasksize = get_mm_rss(mm); + tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; -#ifdef ENHANCED_LMK_ROUTINE - if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) { - for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { - if (!selected[i]) { - is_exist_oom_task = 1; - max_selected_oom_idx = i; - break; - } - } - } else if (selected_oom_adj[max_selected_oom_idx] < oom_adj || - (selected_oom_adj[max_selected_oom_idx] == oom_adj && - selected_tasksize[max_selected_oom_idx] < tasksize)) { - is_exist_oom_task = 1; - } - - if (is_exist_oom_task) { - selected[max_selected_oom_idx] = p; - selected_tasksize[max_selected_oom_idx] = tasksize; - selected_oom_adj[max_selected_oom_idx] = oom_adj; - - if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) - all_selected_oom++; - - if (all_selected_oom == LOWMEM_DEATHPENDING_DEPTH) { - for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { - if (selected_oom_adj[i] < selected_oom_adj[max_selected_oom_idx]) - max_selected_oom_idx = i; - else if (selected_oom_adj[i] == selected_oom_adj[max_selected_oom_idx] && - selected_tasksize[i] < selected_tasksize[max_selected_oom_idx]) - max_selected_oom_idx = i; - } - } - - lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", - p->pid, p->comm, oom_adj, tasksize); - } -#else if (selected) { - if (oom_adj < selected_oom_adj) + if (oom_score_adj < selected_oom_score_adj) +#ifdef CONFIG_ANDROID_LMK_ADJ_RBTREE + break; +#else continue; - if (oom_adj == selected_oom_adj && +#endif + if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; - selected_oom_adj = oom_adj; - lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", - p->pid, p->comm, oom_adj, tasksize); -#endif + selected_oom_score_adj = oom_score_adj; + lowmem_print(2, "select '%s' (%d), adj %d, size %d, to kill\n", + p->comm, p->pid, oom_score_adj, tasksize); } -#ifdef ENHANCED_LMK_ROUTINE - for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { - if (selected[i]) { - lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", - selected[i]->pid, selected[i]->comm, - selected_oom_adj[i], selected_tasksize[i]); - lowmem_deathpending[i] = selected[i]; - lowmem_deathpending_timeout = jiffies + HZ; - force_sig(SIGKILL, selected[i]); - rem -= selected_tasksize[i]; - } - } -#else if (selected) { - lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", - selected->pid, selected->comm, - selected_oom_adj, selected_tasksize); + lowmem_print(1, "Killing '%s' (%d), adj %d,\n" \ + " to free %ldkB on behalf of '%s' (%d) because\n" \ + " cache %ldkB is below limit %ldkB for oom_score_adj %d\n" \ + " Free memory is %ldkB above reserved\n", + selected->comm, selected->pid, + selected_oom_score_adj, + selected_tasksize * (long)(PAGE_SIZE / 1024), + current->comm, current->pid, + other_file * (long)(PAGE_SIZE / 1024), + minfree * (long)(PAGE_SIZE / 1024), + min_score_adj, + other_free * (long)(PAGE_SIZE / 1024)); lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; - force_sig(SIGKILL, selected); + send_sig(SIGKILL, selected, 0); rem -= selected_tasksize; } -#endif lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); - read_unlock(&tasklist_lock); + rcu_read_unlock(); return rem; } @@ -323,241 +229,203 @@ static struct shrinker lowmem_shrinker = { .seeks = DEFAULT_SEEKS * 16 }; -#ifdef CONFIG_ZRAM_FOR_ANDROID -/* - * zone_id_shrink_pagelist() clear page flags, - * update the memory zone status, and swap pagelist - */ - -static unsigned int shrink_pages(struct mm_struct *mm, - struct list_head *zone0_page_list, - struct list_head *zone1_page_list, - unsigned int num_to_scan) +static int __init lowmem_init(void) { - unsigned long addr; - unsigned int isolate_pages_countter = 0; - - struct vm_area_struct *vma = mm->mmap; - while (vma != NULL) { - - for (addr = vma->vm_start; addr < vma->vm_end; - addr += PAGE_SIZE) { - struct page *page; - /*get the page address from virtual memory address */ - page = follow_page(vma, addr, FOLL_GET); - - if (page && !IS_ERR(page)) { - - put_page(page); - /* only moveable, anonymous and not dirty pages can be swapped */ - if ((!PageUnevictable(page)) - && (!PageDirty(page)) && ((PageAnon(page))) - && (0 == page_is_file_cache(page))) { - switch (page_zone_id(page)) { - case 0: - if (!isolate_lru_page_compcache(page)) { - /* isolate page from LRU and add to temp list */ - /*create new page list, it will be used in shrink_page_list */ - list_add_tail(&page->lru, zone0_page_list); - isolate_pages_countter++; - } - break; - case 1: - if (!isolate_lru_page_compcache(page)) { - /* isolate page from LRU and add to temp list */ - /*create new page list, it will be used in shrink_page_list */ - list_add_tail(&page->lru, zone1_page_list); - isolate_pages_countter++; - } - break; - default: - break; - } - } - } - - if (isolate_pages_countter >= num_to_scan) { - return isolate_pages_countter; - } - } + task_free_register(&task_nb); + register_shrinker(&lowmem_shrinker); + return 0; +} - vma = vma->vm_next; - } +static void __exit lowmem_exit(void) +{ + unregister_shrinker(&lowmem_shrinker); + task_free_unregister(&task_nb); +} - return isolate_pages_countter; +#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES +static int lowmem_oom_adj_to_oom_score_adj(int oom_adj) +{ + if (oom_adj == OOM_ADJUST_MAX) + return OOM_SCORE_ADJ_MAX; + else + return (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; } -/* - * swap_application_pages() will search the - * pages which can be swapped, then call - * zone_id_shrink_pagelist to update zone - * status - */ -static unsigned int swap_pages(struct list_head *zone0_page_list, - struct list_head *zone1_page_list) +static void lowmem_autodetect_oom_adj_values(void) { - struct zone *zone_id_0 = &NODE_DATA(0)->node_zones[0]; - struct zone *zone_id_1 = &NODE_DATA(0)->node_zones[1]; - unsigned int pages_counter = 0; - - /*if the page list is not empty, call zone_id_shrink_pagelist to update zone status */ - if ((zone_id_0) && (!list_empty(zone0_page_list))) { - pages_counter += - zone_id_shrink_pagelist(zone_id_0, zone0_page_list); - } - if ((zone_id_1) && (!list_empty(zone1_page_list))) { - pages_counter += - zone_id_shrink_pagelist(zone_id_1, zone1_page_list); + int i; + int oom_adj; + int oom_score_adj; + int array_size = ARRAY_SIZE(lowmem_adj); + + if (lowmem_adj_size < array_size) + array_size = lowmem_adj_size; + + if (array_size <= 0) + return; + + oom_adj = lowmem_adj[array_size - 1]; + if (oom_adj > OOM_ADJUST_MAX) + return; + + oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj); + if (oom_score_adj <= OOM_ADJUST_MAX) + return; + + lowmem_print(1, "lowmem_shrink: convert oom_adj to oom_score_adj:\n"); + for (i = 0; i < array_size; i++) { + oom_adj = lowmem_adj[i]; + oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj); + lowmem_adj[i] = oom_score_adj; + lowmem_print(1, "oom_adj %d => oom_score_adj %d\n", + oom_adj, oom_score_adj); } - return pages_counter; } -static ssize_t lmk_state_show(struct device *dev, - struct device_attribute *attr, char *buf) +static int lowmem_adj_array_set(const char *val, const struct kernel_param *kp) { - return sprintf(buf, "%d,%d\n", lmk_kill_pid, lmk_kill_ok); + int ret; + + ret = param_array_ops.set(val, kp); + + /* HACK: Autodetect oom_adj values in lowmem_adj array */ + lowmem_autodetect_oom_adj_values(); + + return ret; } -/* - * lmk_state_store() will called by framework, - * the framework will send the pid of process that need to be swapped - */ -static ssize_t lmk_state_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) +static int lowmem_adj_array_get(char *buffer, const struct kernel_param *kp) { - sscanf(buf, "%d,%d", &lmk_kill_pid, &lmk_kill_ok); + return param_array_ops.get(buffer, kp); +} - /* if the screen on, the optimized compcache will stop */ - if (atomic_read(&optimize_comp_on) != 1) - return size; +static void lowmem_adj_array_free(void *arg) +{ + param_array_ops.free(arg); +} - if (lmk_kill_ok == 1) { - struct task_struct *p; - struct task_struct *selected = NULL; - struct sysinfo ramzswap_info = { 0 }; - struct mm_struct *mm_scan = NULL; - - /* - * check the free RAM and swap area, - * stop the optimized compcache in cpu idle case; - * leave some swap area for using in low memory case - */ - si_swapinfo(&ramzswap_info); - si_meminfo(&ramzswap_info); - - if ((ramzswap_info.freeswap < CHECK_FREE_SWAPSPACE) || - (ramzswap_info.freeram < check_free_memory)) { -#if SWAP_PROCESS_DEBUG_LOG > 0 - printk(KERN_INFO "idletime compcache is ignored : free RAM %lu, free swap %lu\n", - ramzswap_info.freeram, ramzswap_info.freeswap); -#endif - lmk_kill_ok = 0; - return size; - } +static struct kernel_param_ops lowmem_adj_array_ops = { + .set = lowmem_adj_array_set, + .get = lowmem_adj_array_get, + .free = lowmem_adj_array_free, +}; - read_lock(&tasklist_lock); - for_each_process(p) { - if ((p->pid == lmk_kill_pid) && - (__task_cred(p)->uid > 10000)) { - task_lock(p); - selected = p; - if (!selected->mm || !selected->signal) { - task_unlock(p); - selected = NULL; - break; - } - mm_scan = selected->mm; - if (mm_scan) { - if (selected->flags & PF_KTHREAD) - mm_scan = NULL; - else - atomic_inc(&mm_scan->mm_users); - } - task_unlock(selected); - -#if SWAP_PROCESS_DEBUG_LOG > 0 - printk(KERN_INFO "idle time compcache: swap process pid %d, name %s, oom %d, task size %ld\n", - p->pid, p->comm, - p->signal->oom_adj, - get_mm_rss(p->mm)); +static const struct kparam_array __param_arr_adj = { + .max = ARRAY_SIZE(lowmem_adj), + .num = &lowmem_adj_size, + .ops = ¶m_ops_int, + .elemsize = sizeof(lowmem_adj[0]), + .elem = lowmem_adj, +}; #endif - break; - } - } - read_unlock(&tasklist_lock); - - if (mm_scan) { - LIST_HEAD(zone0_page_list); - LIST_HEAD(zone1_page_list); - int pages_tofree = 0, pages_freed = 0; - - down_read(&mm_scan->mmap_sem); - pages_tofree = - shrink_pages(mm_scan, &zone0_page_list, - &zone1_page_list, 0x7FFFFFFF); - up_read(&mm_scan->mmap_sem); - mmput(mm_scan); - pages_freed = - swap_pages(&zone0_page_list, - &zone1_page_list); - lmk_kill_ok = 0; - } +#ifdef CONFIG_ANDROID_LMK_ADJ_RBTREE +DEFINE_SPINLOCK(lmk_lock); +struct rb_root tasks_scoreadj = RB_ROOT; +/* + * Makesure to invoke the function with holding sighand->siglock + */ +void add_2_adj_tree(struct task_struct *task) +{ + struct rb_node **link; + struct rb_node *parent = NULL; + struct signal_struct *sig_entry; + s64 key = task->signal->oom_score_adj; + + /* + * Find the right place in the rbtree: + */ + spin_lock(&lmk_lock); + link = &tasks_scoreadj.rb_node; + while (*link) { + parent = *link; + sig_entry = rb_entry(parent, struct signal_struct, adj_node); + + if (key < sig_entry->oom_score_adj) + link = &parent->rb_right; + else + link = &parent->rb_left; } - return size; + rb_link_node(&task->signal->adj_node, parent, link); + rb_insert_color(&task->signal->adj_node, &tasks_scoreadj); + spin_unlock(&lmk_lock); } -static DEVICE_ATTR(lmk_state, 0664, lmk_state_show, lmk_state_store); +/* + * Makesure to invoke the function with holding sighand->siglock + */ +void delete_from_adj_tree(struct task_struct *task) +{ + spin_lock(&lmk_lock); + if (!RB_EMPTY_NODE(&task->signal->adj_node)) { + rb_erase(&task->signal->adj_node, &tasks_scoreadj); + RB_CLEAR_NODE(&task->signal->adj_node); + } + spin_unlock(&lmk_lock); +} -#endif /* CONFIG_ZRAM_FOR_ANDROID */ -static int __init lowmem_init(void) +static struct task_struct *pick_next_from_adj_tree(struct task_struct *task) { -#ifdef CONFIG_ZRAM_FOR_ANDROID - struct zone *zone; - unsigned int high_wmark = 0; -#endif - task_free_register(&task_nb); - register_shrinker(&lowmem_shrinker); + struct rb_node *next; + struct signal_struct *next_tsk_sig; -#ifdef CONFIG_ZRAM_FOR_ANDROID - for_each_zone(zone) { - if (high_wmark < zone->watermark[WMARK_HIGH]) - high_wmark = zone->watermark[WMARK_HIGH]; - } - check_free_memory = (high_wmark != 0) ? high_wmark : CHECK_FREE_MEMORY; + spin_lock(&lmk_lock); + next = rb_next(&task->signal->adj_node); + spin_unlock(&lmk_lock); - lmk_class = class_create(THIS_MODULE, "lmk"); - if (IS_ERR(lmk_class)) { - printk(KERN_ERR "Failed to create class(lmk)\n"); - return 0; - } - lmk_dev = device_create(lmk_class, NULL, 0, NULL, "lowmemorykiller"); - if (IS_ERR(lmk_dev)) { - printk(KERN_ERR - "Failed to create device(lowmemorykiller)!= %ld\n", - IS_ERR(lmk_dev)); - return 0; - } - if (device_create_file(lmk_dev, &dev_attr_lmk_state) < 0) - printk(KERN_ERR "Failed to create device file(%s)!\n", - dev_attr_lmk_state.attr.name); -#endif /* CONFIG_ZRAM_FOR_ANDROID */ + if (!next) + return NULL; - return 0; + next_tsk_sig = rb_entry(next, struct signal_struct, adj_node); + return next_tsk_sig->curr_target->group_leader; } -static void __exit lowmem_exit(void) +static struct task_struct *pick_first_task(void) { - unregister_shrinker(&lowmem_shrinker); - task_free_unregister(&task_nb); + struct rb_node *left; + struct signal_struct *first_tsk_sig; + + spin_lock(&lmk_lock); + left = rb_first(&tasks_scoreadj); + spin_unlock(&lmk_lock); + + if (!left) + return NULL; + + first_tsk_sig = rb_entry(left, struct signal_struct, adj_node); + return first_tsk_sig->curr_target->group_leader; +} + +static struct task_struct *pick_last_task(void) +{ + struct rb_node *right; + struct signal_struct *last_tsk_sig; + + spin_lock(&lmk_lock); + right = rb_last(&tasks_scoreadj); + spin_unlock(&lmk_lock); + + if (!right) + return NULL; + + last_tsk_sig = rb_entry(right, struct signal_struct, adj_node); + return last_tsk_sig->curr_target->group_leader; } +#endif module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR); +#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES +__module_param_call(MODULE_PARAM_PREFIX, adj, + &lowmem_adj_array_ops, + .arr = &__param_arr_adj, + S_IRUGO | S_IWUSR, 0664); +__MODULE_PARM_TYPE(adj, "array of int"); +#else module_param_array_named(adj, lowmem_adj, int, &lowmem_adj_size, S_IRUGO | S_IWUSR); +#endif module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size, S_IRUGO | S_IWUSR); module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR); diff --git a/drivers/staging/tidspbridge/rmgr/drv_interface.c b/drivers/staging/tidspbridge/rmgr/drv_interface.c index 80c2ee5..c43c7e3 100644 --- a/drivers/staging/tidspbridge/rmgr/drv_interface.c +++ b/drivers/staging/tidspbridge/rmgr/drv_interface.c @@ -617,11 +617,7 @@ err: static int bridge_mmap(struct file *filp, struct vm_area_struct *vma) { u32 offset = vma->vm_pgoff << PAGE_SHIFT; - unsigned long base_pgoff; - int status; - - struct omap_dsp_platform_data *pdata = - omap_dspbridge_dev->dev.platform_data; + u32 status; DBC_ASSERT(vma->vm_start < vma->vm_end); @@ -632,27 +628,11 @@ static int bridge_mmap(struct file *filp, struct vm_area_struct *vma) "%lx flags %lx\n", __func__, filp, offset, vma->vm_start, vma->vm_end, vma->vm_page_prot, vma->vm_flags); - /* - * vm_iomap_memory() expects vma->vm_pgoff to be expressed as an offset - * from the start of the physical memory pool, but we're called with - * a pfn (physical page number) stored there instead. - * - * To avoid duplicating lots of tricky overflow checking logic, - * temporarily convert vma->vm_pgoff to the offset vm_iomap_memory() - * expects, but restore the original value once the mapping has been - * created. - */ - base_pgoff = pdata->phys_mempool_base >> PAGE_SHIFT; - if (vma->vm_pgoff < base_pgoff) - return -EINVAL; - vma->vm_pgoff -= base_pgoff; - - status = vm_iomap_memory(vma, - pdata->phys_mempool_base, - pdata->phys_mempool_size); - - /* Restore the original value of vma->vm_pgoff */ - vma->vm_pgoff += base_pgoff; + status = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + if (status != 0) + status = -EAGAIN; return status; } diff --git a/drivers/staging/zcache/Kconfig b/drivers/staging/zcache/Kconfig index 7fabcb2..4881839 100644 --- a/drivers/staging/zcache/Kconfig +++ b/drivers/staging/zcache/Kconfig @@ -1,13 +1,11 @@ config ZCACHE - tristate "Dynamic compression of swap pages and clean pagecache pages" - depends on CLEANCACHE || FRONTSWAP - select XVMALLOC - select LZO_COMPRESS - select LZO_DECOMPRESS + bool "Dynamic compression of swap pages and clean pagecache pages" + depends on (CLEANCACHE || FRONTSWAP) && CRYPTO=y && ZSMALLOC=y + select CRYPTO_LZO default n help Zcache doubles RAM efficiency while providing a significant - performance boosts on many workloads. Zcache uses lzo1x + performance boosts on many workloads. Zcache uses compression and an in-kernel implementation of transcendent memory to store clean page cache pages and swap in RAM, providing a noticeable reduction in disk I/O. diff --git a/drivers/staging/zcache/Makefile b/drivers/staging/zcache/Makefile index f5ec64f..60daa27 100644 --- a/drivers/staging/zcache/Makefile +++ b/drivers/staging/zcache/Makefile @@ -1,3 +1,3 @@ -zcache-y := tmem.o +zcache-y := zcache-main.o tmem.o obj-$(CONFIG_ZCACHE) += zcache.o diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c index e954d40..1ca66ea 100644 --- a/drivers/staging/zcache/tmem.c +++ b/drivers/staging/zcache/tmem.c @@ -142,6 +142,7 @@ static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb, obj->oid = *oidp; obj->objnode_count = 0; obj->pampd_count = 0; + (*tmem_pamops.new_obj)(obj); SET_SENTINEL(obj, OBJ); while (*new) { BUG_ON(RB_EMPTY_NODE(*new)); @@ -274,7 +275,7 @@ static void tmem_objnode_free(struct tmem_objnode *objnode) /* * lookup index in object and return associated pampd (or NULL if not found) */ -static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) +static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) { unsigned int height, shift; struct tmem_objnode **slot = NULL; @@ -303,9 +304,33 @@ static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) height--; } out: + return slot != NULL ? (void **)slot : NULL; +} + +static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) +{ + struct tmem_objnode **slot; + + slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); return slot != NULL ? *slot : NULL; } +static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index, + void *new_pampd) +{ + struct tmem_objnode **slot; + void *ret = NULL; + + slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index); + if ((slot != NULL) && (*slot != NULL)) { + void *old_pampd = *(void **)slot; + *(void **)slot = new_pampd; + (*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0); + ret = new_pampd; + } + return ret; +} + static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index, void *pampd) { @@ -456,7 +481,7 @@ static void tmem_objnode_node_destroy(struct tmem_obj *obj, if (ht == 1) { obj->pampd_count--; (*tmem_pamops.free)(objnode->slots[i], - obj->pool); + obj->pool, NULL, 0); objnode->slots[i] = NULL; continue; } @@ -473,7 +498,7 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) return; if (obj->objnode_tree_height == 0) { obj->pampd_count--; - (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool); + (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0); } else { tmem_objnode_node_destroy(obj, obj->objnode_tree_root, obj->objnode_tree_height); @@ -481,6 +506,7 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) obj->objnode_tree_height = 0; } obj->objnode_tree_root = NULL; + (*tmem_pamops.free_obj)(obj->pool, obj); } /* @@ -503,15 +529,13 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj) * always flushes for simplicity. */ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, - struct page *page) + char *data, size_t size, bool raw, bool ephemeral) { struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL; void *pampd = NULL, *pampd_del = NULL; int ret = -ENOMEM; - bool ephemeral; struct tmem_hashbucket *hb; - ephemeral = is_ephemeral(pool); hb = &pool->hashbucket[tmem_oid_hash(oidp)]; spin_lock(&hb->lock); obj = objfound = tmem_obj_find(hb, oidp); @@ -521,7 +545,7 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, /* if found, is a dup put, flush the old one */ pampd_del = tmem_pampd_delete_from_obj(obj, index); BUG_ON(pampd_del != pampd); - (*tmem_pamops.free)(pampd, pool); + (*tmem_pamops.free)(pampd, pool, oidp, index); if (obj->pampd_count == 0) { objnew = obj; objfound = NULL; @@ -538,7 +562,8 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, } BUG_ON(obj == NULL); BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound)); - pampd = (*tmem_pamops.create)(obj->pool, &obj->oid, index, page); + pampd = (*tmem_pamops.create)(data, size, raw, ephemeral, + obj->pool, &obj->oid, index); if (unlikely(pampd == NULL)) goto free; ret = tmem_pampd_add_to_obj(obj, index, pampd); @@ -551,7 +576,7 @@ delete_and_free: (void)tmem_pampd_delete_from_obj(obj, index); free: if (pampd) - (*tmem_pamops.free)(pampd, pool); + (*tmem_pamops.free)(pampd, pool, NULL, 0); if (objnew) { tmem_obj_free(objnew, hb); (*tmem_hostops.obj_free)(objnew, pool); @@ -573,41 +598,52 @@ out: * "put" done with the same handle). */ -int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, - uint32_t index, struct page *page) +int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, + char *data, size_t *size, bool raw, int get_and_free) { struct tmem_obj *obj; void *pampd; bool ephemeral = is_ephemeral(pool); - uint32_t ret = -1; + int ret = -1; struct tmem_hashbucket *hb; + bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral); + bool lock_held = false; hb = &pool->hashbucket[tmem_oid_hash(oidp)]; spin_lock(&hb->lock); + lock_held = true; obj = tmem_obj_find(hb, oidp); if (obj == NULL) goto out; - ephemeral = is_ephemeral(pool); - if (ephemeral) + if (free) pampd = tmem_pampd_delete_from_obj(obj, index); else pampd = tmem_pampd_lookup_in_obj(obj, index); if (pampd == NULL) goto out; - ret = (*tmem_pamops.get_data)(page, pampd, pool); - if (ret < 0) - goto out; - if (ephemeral) { - (*tmem_pamops.free)(pampd, pool); + if (free) { if (obj->pampd_count == 0) { tmem_obj_free(obj, hb); (*tmem_hostops.obj_free)(obj, pool); obj = NULL; } } + if (tmem_pamops.is_remote(pampd)) { + lock_held = false; + spin_unlock(&hb->lock); + } + if (free) + ret = (*tmem_pamops.get_data_and_free)( + data, size, raw, pampd, pool, oidp, index); + else + ret = (*tmem_pamops.get_data)( + data, size, raw, pampd, pool, oidp, index); + if (ret < 0) + goto out; ret = 0; out: - spin_unlock(&hb->lock); + if (lock_held) + spin_unlock(&hb->lock); return ret; } @@ -632,7 +668,7 @@ int tmem_flush_page(struct tmem_pool *pool, pampd = tmem_pampd_delete_from_obj(obj, index); if (pampd == NULL) goto out; - (*tmem_pamops.free)(pampd, pool); + (*tmem_pamops.free)(pampd, pool, oidp, index); if (obj->pampd_count == 0) { tmem_obj_free(obj, hb); (*tmem_hostops.obj_free)(obj, pool); @@ -645,6 +681,30 @@ out: } /* + * If a page in tmem matches the handle, replace the page so that any + * subsequent "get" gets the new page. Returns 0 if + * there was a page to replace, else returns -1. + */ +int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp, + uint32_t index, void *new_pampd) +{ + struct tmem_obj *obj; + int ret = -1; + struct tmem_hashbucket *hb; + + hb = &pool->hashbucket[tmem_oid_hash(oidp)]; + spin_lock(&hb->lock); + obj = tmem_obj_find(hb, oidp); + if (obj == NULL) + goto out; + new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd); + ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj); +out: + spin_unlock(&hb->lock); + return ret; +} + +/* * "Flush" all pages in tmem matching this oid. */ int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp) diff --git a/drivers/staging/zcache/tmem.h b/drivers/staging/zcache/tmem.h index 2e07e21..0d4aa82 100644 --- a/drivers/staging/zcache/tmem.h +++ b/drivers/staging/zcache/tmem.h @@ -47,7 +47,7 @@ #define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0) #endif -#define ASSERT_SPINLOCK(_l) WARN_ON(!spin_is_locked(_l)) +#define ASSERT_SPINLOCK(_l) lockdep_assert_held(_l) /* * A pool is the highest-level data structure managed by tmem and @@ -147,6 +147,7 @@ struct tmem_obj { unsigned int objnode_tree_height; unsigned long objnode_count; long pampd_count; + void *extra; /* for private use by pampd implementation */ DECL_SENTINEL }; @@ -166,10 +167,18 @@ struct tmem_objnode { /* pampd abstract datatype methods provided by the PAM implementation */ struct tmem_pamops { - void *(*create)(struct tmem_pool *, struct tmem_oid *, uint32_t, - struct page *); - int (*get_data)(struct page *, void *, struct tmem_pool *); - void (*free)(void *, struct tmem_pool *); + void *(*create)(char *, size_t, bool, int, + struct tmem_pool *, struct tmem_oid *, uint32_t); + int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *, + struct tmem_oid *, uint32_t); + int (*get_data_and_free)(char *, size_t *, bool, void *, + struct tmem_pool *, struct tmem_oid *, + uint32_t); + void (*free)(void *, struct tmem_pool *, struct tmem_oid *, uint32_t); + void (*free_obj)(struct tmem_pool *, struct tmem_obj *); + bool (*is_remote)(void *); + void (*new_obj)(struct tmem_obj *); + int (*replace_in_obj)(void *, struct tmem_obj *); }; extern void tmem_register_pamops(struct tmem_pamops *m); @@ -184,9 +193,11 @@ extern void tmem_register_hostops(struct tmem_hostops *m); /* core tmem accessor functions */ extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index, - struct page *page); + char *, size_t, bool, bool); extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index, - struct page *page); + char *, size_t *, bool, int); +extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index, + void *); extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *, uint32_t index); extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *); diff --git a/drivers/staging/zcache/zcache.c b/drivers/staging/zcache/zcache-main.c index 77ac2d4..eb65043 100644 --- a/drivers/staging/zcache/zcache.c +++ b/drivers/staging/zcache/zcache-main.c @@ -6,9 +6,10 @@ * * Zcache provides an in-kernel "host implementation" for transcendent memory * and, thus indirectly, for cleancache and frontswap. Zcache includes two - * page-accessible memory [1] interfaces, both utilizing lzo1x compression: + * page-accessible memory [1] interfaces, both utilizing the crypto compression + * API: * 1) "compression buddies" ("zbud") is used for ephemeral pages - * 2) xvmalloc is used for persistent pages. + * 2) zsmalloc is used for persistent pages. * Xvmalloc (based on the TLSF allocator) has very low fragmentation * so maximizes space efficiency, while zbud allows pairs (and potentially, * in the future, more than a pair of) compressed pages to be closely linked @@ -19,17 +20,20 @@ * http://marc.info/?l=linux-mm&m=127811271605009 */ +#include <linux/module.h> #include <linux/cpu.h> #include <linux/highmem.h> #include <linux/list.h> -#include <linux/lzo.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/atomic.h> +#include <linux/math64.h> +#include <linux/crypto.h> +#include <linux/string.h> #include "tmem.h" -#include "../zram/xvmalloc.h" /* if built in drivers/staging */ +#include <linux/zsmalloc.h> #if (!defined(CONFIG_CLEANCACHE) && !defined(CONFIG_FRONTSWAP)) #error "zcache is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP" @@ -49,6 +53,68 @@ (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) #endif +#define MAX_POOLS_PER_CLIENT 16 + +#define MAX_CLIENTS 16 +#define LOCAL_CLIENT ((uint16_t)-1) + +MODULE_LICENSE("GPL"); + +struct zcache_client { + struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT]; + struct zs_pool *zspool; + bool allocated; + atomic_t refcount; +}; + +static struct zcache_client zcache_host; +static struct zcache_client zcache_clients[MAX_CLIENTS]; + +static inline uint16_t get_client_id_from_client(struct zcache_client *cli) +{ + BUG_ON(cli == NULL); + if (cli == &zcache_host) + return LOCAL_CLIENT; + return cli - &zcache_clients[0]; +} + +static inline bool is_local_client(struct zcache_client *cli) +{ + return cli == &zcache_host; +} + +/* crypto API for zcache */ +#define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME +static char zcache_comp_name[ZCACHE_COMP_NAME_SZ]; +static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms; + +enum comp_op { + ZCACHE_COMPOP_COMPRESS, + ZCACHE_COMPOP_DECOMPRESS +}; + +static inline int zcache_comp_op(enum comp_op op, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen) +{ + struct crypto_comp *tfm; + int ret; + + BUG_ON(!zcache_comp_pcpu_tfms); + tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu()); + BUG_ON(!tfm); + switch (op) { + case ZCACHE_COMPOP_COMPRESS: + ret = crypto_comp_compress(tfm, src, slen, dst, dlen); + break; + case ZCACHE_COMPOP_DECOMPRESS: + ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); + break; + } + put_cpu(); + return ret; +} + /********** * Compression buddies ("zbud") provides for packing two (or, possibly * in the future, more) compressed ephemeral pages into a single "raw" @@ -72,7 +138,8 @@ #define ZBUD_MAX_BUDS 2 struct zbud_hdr { - uint32_t pool_id; + uint16_t client_id; + uint16_t pool_id; struct tmem_oid oid; uint32_t index; uint16_t size; /* compressed size in bytes, zero means unused */ @@ -120,6 +187,7 @@ static unsigned long zcache_zbud_curr_zbytes; static unsigned long zcache_zbud_cumul_zpages; static unsigned long zcache_zbud_cumul_zbytes; static unsigned long zcache_compress_poor; +static unsigned long zcache_mean_compress_poor; /* forward references */ static void *zcache_get_free_page(void); @@ -265,10 +333,12 @@ static void zbud_free_and_delist(struct zbud_hdr *zh) struct zbud_page *zbpg = container_of(zh, struct zbud_page, buddy[budnum]); + spin_lock(&zbud_budlists_spinlock); spin_lock(&zbpg->lock); if (list_empty(&zbpg->bud_list)) { /* ignore zombie page... see zbud_evict_pages() */ spin_unlock(&zbpg->lock); + spin_unlock(&zbud_budlists_spinlock); return; } size = zbud_free(zh); @@ -276,7 +346,6 @@ static void zbud_free_and_delist(struct zbud_hdr *zh) zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0]; if (zh_other->size == 0) { /* was unbuddied: unlist and free */ chunks = zbud_size_to_chunks(size) ; - spin_lock(&zbud_budlists_spinlock); BUG_ON(list_empty(&zbud_unbuddied[chunks].list)); list_del_init(&zbpg->bud_list); zbud_unbuddied[chunks].count--; @@ -284,7 +353,6 @@ static void zbud_free_and_delist(struct zbud_hdr *zh) zbud_free_raw_page(zbpg); } else { /* was buddied: move remaining buddy to unbuddied list */ chunks = zbud_size_to_chunks(zh_other->size) ; - spin_lock(&zbud_budlists_spinlock); list_del_init(&zbpg->bud_list); zcache_zbud_buddied_count--; list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list); @@ -294,7 +362,8 @@ static void zbud_free_and_delist(struct zbud_hdr *zh) } } -static struct zbud_hdr *zbud_create(uint32_t pool_id, struct tmem_oid *oid, +static struct zbud_hdr *zbud_create(uint16_t client_id, uint16_t pool_id, + struct tmem_oid *oid, uint32_t index, struct page *page, void *cdata, unsigned size) { @@ -323,8 +392,8 @@ static struct zbud_hdr *zbud_create(uint32_t pool_id, struct tmem_oid *oid, if (unlikely(zbpg == NULL)) goto out; /* ok, have a page, now compress the data before taking locks */ - spin_lock(&zbpg->lock); spin_lock(&zbud_budlists_spinlock); + spin_lock(&zbpg->lock); list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list); zbud_unbuddied[nchunks].count++; zh = &zbpg->buddy[0]; @@ -353,12 +422,12 @@ init_zh: zh->index = index; zh->oid = *oid; zh->pool_id = pool_id; - /* can wait to copy the data until the list locks are dropped */ - spin_unlock(&zbud_budlists_spinlock); - + zh->client_id = client_id; to = zbud_data(zh, size); memcpy(to, cdata, size); spin_unlock(&zbpg->lock); + spin_unlock(&zbud_budlists_spinlock); + zbud_cumul_chunk_counts[nchunks]++; atomic_inc(&zcache_zbud_curr_zpages); zcache_zbud_cumul_zpages++; @@ -372,7 +441,7 @@ static int zbud_decompress(struct page *page, struct zbud_hdr *zh) { struct zbud_page *zbpg; unsigned budnum = zbud_budnum(zh); - size_t out_len = PAGE_SIZE; + unsigned int out_len = PAGE_SIZE; char *to_va, *from_va; unsigned size; int ret = 0; @@ -389,8 +458,9 @@ static int zbud_decompress(struct page *page, struct zbud_hdr *zh) to_va = kmap_atomic(page, KM_USER0); size = zh->size; from_va = zbud_data(zh, size); - ret = lzo1x_decompress_safe(from_va, size, to_va, &out_len); - BUG_ON(ret != LZO_E_OK); + ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, from_va, size, + to_va, &out_len); + BUG_ON(ret); BUG_ON(out_len != PAGE_SIZE); kunmap_atomic(to_va, KM_USER0); out: @@ -407,7 +477,8 @@ static unsigned long zcache_evicted_raw_pages; static unsigned long zcache_evicted_buddied_pages; static unsigned long zcache_evicted_unbuddied_pages; -static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid); +static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, + uint16_t poolid); static void zcache_put_pool(struct tmem_pool *pool); /* @@ -417,7 +488,8 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg) { struct zbud_hdr *zh; int i, j; - uint32_t pool_id[ZBUD_MAX_BUDS], index[ZBUD_MAX_BUDS]; + uint32_t pool_id[ZBUD_MAX_BUDS], client_id[ZBUD_MAX_BUDS]; + uint32_t index[ZBUD_MAX_BUDS]; struct tmem_oid oid[ZBUD_MAX_BUDS]; struct tmem_pool *pool; @@ -426,6 +498,7 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg) for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) { zh = &zbpg->buddy[i]; if (zh->size) { + client_id[j] = zh->client_id; pool_id[j] = zh->pool_id; oid[j] = zh->oid; index[j] = zh->index; @@ -435,7 +508,7 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg) } spin_unlock(&zbpg->lock); for (i = 0; i < j; i++) { - pool = zcache_get_pool_by_id(pool_id[i]); + pool = zcache_get_pool_by_id(client_id[i], pool_id[i]); if (pool != NULL) { tmem_flush_page(pool, &oid[i], index[i]); zcache_put_pool(pool); @@ -552,9 +625,8 @@ static int zbud_show_unbuddied_list_counts(char *buf) int i; char *p = buf; - for (i = 0; i < NCHUNKS - 1; i++) + for (i = 0; i < NCHUNKS; i++) p += sprintf(p, "%u ", zbud_unbuddied[i].count); - p += sprintf(p, "%d\n", zbud_unbuddied[i].count); return p - buf; } @@ -585,8 +657,8 @@ static int zbud_show_cumul_chunk_counts(char *buf) #endif /********** - * This "zv" PAM implementation combines the TLSF-based xvMalloc - * with lzo1x compression to maximize the amount of data that can + * This "zv" PAM implementation combines the slab-based zsmalloc + * with the crypto compression API to maximize the amount of data that can * be packed into a physical page. * * Zv represents a PAM page with the index and object (plus a "size" value @@ -599,72 +671,250 @@ struct zv_hdr { uint32_t pool_id; struct tmem_oid oid; uint32_t index; + size_t size; DECL_SENTINEL }; -static const int zv_max_page_size = (PAGE_SIZE / 8) * 7; +/* rudimentary policy limits */ +/* total number of persistent pages may not exceed this percentage */ +static unsigned int zv_page_count_policy_percent = 75; +/* + * byte count defining poor compression; pages with greater zsize will be + * rejected + */ +static unsigned int zv_max_zsize = (PAGE_SIZE / 8) * 7; +/* + * byte count defining poor *mean* compression; pages with greater zsize + * will be rejected until sufficient better-compressed pages are accepted + * driving the mean below this threshold + */ +static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5; + +static atomic_t zv_curr_dist_counts[NCHUNKS]; +static atomic_t zv_cumul_dist_counts[NCHUNKS]; -static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id, +static unsigned long zv_create(struct zs_pool *pool, uint32_t pool_id, struct tmem_oid *oid, uint32_t index, void *cdata, unsigned clen) { - struct page *page; - struct zv_hdr *zv = NULL; - uint32_t offset; - int ret; + struct zv_hdr *zv; + u32 size = clen + sizeof(struct zv_hdr); + int chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT; + unsigned long handle = 0; BUG_ON(!irqs_disabled()); - ret = xv_malloc(xvpool, clen + sizeof(struct zv_hdr), - &page, &offset, ZCACHE_GFP_MASK); - if (unlikely(ret)) + BUG_ON(chunks >= NCHUNKS); + handle = zs_malloc(pool, size); + if (!handle) goto out; - zv = kmap_atomic(page, KM_USER0) + offset; + atomic_inc(&zv_curr_dist_counts[chunks]); + atomic_inc(&zv_cumul_dist_counts[chunks]); + zv = zs_map_object(pool, handle, ZS_MM_WO); zv->index = index; zv->oid = *oid; zv->pool_id = pool_id; + zv->size = clen; SET_SENTINEL(zv, ZVH); memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen); - kunmap_atomic(zv, KM_USER0); + zs_unmap_object(pool, handle); out: - return zv; + return handle; } -static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv) +static void zv_free(struct zs_pool *pool, unsigned long handle) { unsigned long flags; - struct page *page; - uint32_t offset; + struct zv_hdr *zv; uint16_t size; + int chunks; + zv = zs_map_object(pool, handle, ZS_MM_RW); ASSERT_SENTINEL(zv, ZVH); - size = xv_get_object_size(zv) - sizeof(*zv); - BUG_ON(size == 0 || size > zv_max_page_size); + size = zv->size + sizeof(struct zv_hdr); INVERT_SENTINEL(zv, ZVH); - page = virt_to_page(zv); - offset = (unsigned long)zv & ~PAGE_MASK; + zs_unmap_object(pool, handle); + + chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT; + BUG_ON(chunks >= NCHUNKS); + atomic_dec(&zv_curr_dist_counts[chunks]); + local_irq_save(flags); - xv_free(xvpool, page, offset); + zs_free(pool, handle); local_irq_restore(flags); } -static void zv_decompress(struct page *page, struct zv_hdr *zv) +static void zv_decompress(struct page *page, unsigned long handle) { - size_t clen = PAGE_SIZE; + unsigned int clen = PAGE_SIZE; char *to_va; - unsigned size; int ret; + struct zv_hdr *zv; + zv = zs_map_object(zcache_host.zspool, handle, ZS_MM_RO); + BUG_ON(zv->size == 0); ASSERT_SENTINEL(zv, ZVH); - size = xv_get_object_size(zv) - sizeof(*zv); - BUG_ON(size == 0 || size > zv_max_page_size); to_va = kmap_atomic(page, KM_USER0); - ret = lzo1x_decompress_safe((char *)zv + sizeof(*zv), - size, to_va, &clen); + ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, (char *)zv + sizeof(*zv), + zv->size, to_va, &clen); kunmap_atomic(to_va, KM_USER0); - BUG_ON(ret != LZO_E_OK); + zs_unmap_object(zcache_host.zspool, handle); + BUG_ON(ret); BUG_ON(clen != PAGE_SIZE); } +#ifdef CONFIG_SYSFS +/* + * show a distribution of compression stats for zv pages. + */ + +static int zv_curr_dist_counts_show(char *buf) +{ + unsigned long i, n, chunks = 0, sum_total_chunks = 0; + char *p = buf; + + for (i = 0; i < NCHUNKS; i++) { + n = atomic_read(&zv_curr_dist_counts[i]); + p += sprintf(p, "%lu ", n); + chunks += n; + sum_total_chunks += i * n; + } + p += sprintf(p, "mean:%lu\n", + chunks == 0 ? 0 : sum_total_chunks / chunks); + return p - buf; +} + +static int zv_cumul_dist_counts_show(char *buf) +{ + unsigned long i, n, chunks = 0, sum_total_chunks = 0; + char *p = buf; + + for (i = 0; i < NCHUNKS; i++) { + n = atomic_read(&zv_cumul_dist_counts[i]); + p += sprintf(p, "%lu ", n); + chunks += n; + sum_total_chunks += i * n; + } + p += sprintf(p, "mean:%lu\n", + chunks == 0 ? 0 : sum_total_chunks / chunks); + return p - buf; +} + +/* + * setting zv_max_zsize via sysfs causes all persistent (e.g. swap) + * pages that don't compress to less than this value (including metadata + * overhead) to be rejected. We don't allow the value to get too close + * to PAGE_SIZE. + */ +static ssize_t zv_max_zsize_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", zv_max_zsize); +} + +static ssize_t zv_max_zsize_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + err = kstrtoul(buf, 10, &val); + if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7)) + return -EINVAL; + zv_max_zsize = val; + return count; +} + +/* + * setting zv_max_mean_zsize via sysfs causes all persistent (e.g. swap) + * pages that don't compress to less than this value (including metadata + * overhead) to be rejected UNLESS the mean compression is also smaller + * than this value. In other words, we are load-balancing-by-zsize the + * accepted pages. Again, we don't allow the value to get too close + * to PAGE_SIZE. + */ +static ssize_t zv_max_mean_zsize_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", zv_max_mean_zsize); +} + +static ssize_t zv_max_mean_zsize_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + err = kstrtoul(buf, 10, &val); + if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7)) + return -EINVAL; + zv_max_mean_zsize = val; + return count; +} + +/* + * setting zv_page_count_policy_percent via sysfs sets an upper bound of + * persistent (e.g. swap) pages that will be retained according to: + * (zv_page_count_policy_percent * totalram_pages) / 100) + * when that limit is reached, further puts will be rejected (until + * some pages have been flushed). Note that, due to compression, + * this number may exceed 100; it defaults to 75 and we set an + * arbitary limit of 150. A poor choice will almost certainly result + * in OOM's, so this value should only be changed prudently. + */ +static ssize_t zv_page_count_policy_percent_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", zv_page_count_policy_percent); +} + +static ssize_t zv_page_count_policy_percent_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + err = kstrtoul(buf, 10, &val); + if (err || (val == 0) || (val > 150)) + return -EINVAL; + zv_page_count_policy_percent = val; + return count; +} + +static struct kobj_attribute zcache_zv_max_zsize_attr = { + .attr = { .name = "zv_max_zsize", .mode = 0644 }, + .show = zv_max_zsize_show, + .store = zv_max_zsize_store, +}; + +static struct kobj_attribute zcache_zv_max_mean_zsize_attr = { + .attr = { .name = "zv_max_mean_zsize", .mode = 0644 }, + .show = zv_max_mean_zsize_show, + .store = zv_max_mean_zsize_store, +}; + +static struct kobj_attribute zcache_zv_page_count_policy_percent_attr = { + .attr = { .name = "zv_page_count_policy_percent", + .mode = 0644 }, + .show = zv_page_count_policy_percent_show, + .store = zv_page_count_policy_percent_store, +}; +#endif + /* * zcache core code starts here */ @@ -677,51 +927,76 @@ static unsigned long zcache_flobj_found; static unsigned long zcache_failed_eph_puts; static unsigned long zcache_failed_pers_puts; -#define MAX_POOLS_PER_CLIENT 16 - -static struct { - struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT]; - struct xv_pool *xvpool; -} zcache_client; - /* * Tmem operations assume the poolid implies the invoking client. - * Zcache only has one client (the kernel itself), so translate - * the poolid into the tmem_pool allocated for it. A KVM version + * Zcache only has one client (the kernel itself): LOCAL_CLIENT. + * RAMster has each client numbered by cluster node, and a KVM version * of zcache would have one client per guest and each client might * have a poolid==N. */ -static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid) +static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid) { struct tmem_pool *pool = NULL; + struct zcache_client *cli = NULL; - if (poolid >= 0) { - pool = zcache_client.tmem_pools[poolid]; + if (cli_id == LOCAL_CLIENT) + cli = &zcache_host; + else { + if (cli_id >= MAX_CLIENTS) + goto out; + cli = &zcache_clients[cli_id]; + if (cli == NULL) + goto out; + atomic_inc(&cli->refcount); + } + if (poolid < MAX_POOLS_PER_CLIENT) { + pool = cli->tmem_pools[poolid]; if (pool != NULL) atomic_inc(&pool->refcount); } +out: return pool; } static void zcache_put_pool(struct tmem_pool *pool) { - if (pool != NULL) - atomic_dec(&pool->refcount); + struct zcache_client *cli = NULL; + + if (pool == NULL) + BUG(); + cli = pool->client; + atomic_dec(&pool->refcount); + atomic_dec(&cli->refcount); +} + +int zcache_new_client(uint16_t cli_id) +{ + struct zcache_client *cli = NULL; + int ret = -1; + + if (cli_id == LOCAL_CLIENT) + cli = &zcache_host; + else if ((unsigned int)cli_id < MAX_CLIENTS) + cli = &zcache_clients[cli_id]; + if (cli == NULL) + goto out; + if (cli->allocated) + goto out; + cli->allocated = 1; +#ifdef CONFIG_FRONTSWAP + cli->zspool = zs_create_pool("zcache", ZCACHE_GFP_MASK); + if (cli->zspool == NULL) + goto out; +#endif + ret = 0; +out: + return ret; } /* counters for debugging */ static unsigned long zcache_failed_get_free_pages; static unsigned long zcache_failed_alloc; static unsigned long zcache_put_to_flush; -static unsigned long zcache_aborted_preload; -static unsigned long zcache_aborted_shrink; - -/* - * Ensure that memory allocation requests in zcache don't result - * in direct reclaim requests via the shrinker, which would cause - * an infinite loop. Maybe a GFP flag would be better? - */ -static DEFINE_SPINLOCK(zcache_direct_reclaim_lock); /* * for now, used named slabs so can easily track usage; later can @@ -760,10 +1035,6 @@ static int zcache_do_preload(struct tmem_pool *pool) goto out; if (unlikely(zcache_obj_cache == NULL)) goto out; - if (!spin_trylock(&zcache_direct_reclaim_lock)) { - zcache_aborted_preload++; - goto out; - } preempt_disable(); kp = &__get_cpu_var(zcache_preloads); while (kp->nr < ARRAY_SIZE(kp->objnodes)) { @@ -772,7 +1043,7 @@ static int zcache_do_preload(struct tmem_pool *pool) ZCACHE_GFP_MASK); if (unlikely(objnode == NULL)) { zcache_failed_alloc++; - goto unlock_out; + goto out; } preempt_disable(); kp = &__get_cpu_var(zcache_preloads); @@ -785,13 +1056,13 @@ static int zcache_do_preload(struct tmem_pool *pool) obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK); if (unlikely(obj == NULL)) { zcache_failed_alloc++; - goto unlock_out; + goto out; } page = (void *)__get_free_page(ZCACHE_GFP_MASK); if (unlikely(page == NULL)) { zcache_failed_get_free_pages++; kmem_cache_free(zcache_obj_cache, obj); - goto unlock_out; + goto out; } preempt_disable(); kp = &__get_cpu_var(zcache_preloads); @@ -804,8 +1075,6 @@ static int zcache_do_preload(struct tmem_pool *pool) else free_page((unsigned long)page); ret = 0; -unlock_out: - spin_unlock(&zcache_direct_reclaim_lock); out: return ret; } @@ -899,50 +1168,63 @@ static atomic_t zcache_curr_pers_pampd_count = ATOMIC_INIT(0); static unsigned long zcache_curr_pers_pampd_count_max; /* forward reference */ -static int zcache_compress(struct page *from, void **out_va, size_t *out_len); +static int zcache_compress(struct page *from, void **out_va, unsigned *out_len); -static void *zcache_pampd_create(struct tmem_pool *pool, struct tmem_oid *oid, - uint32_t index, struct page *page) +static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph, + struct tmem_pool *pool, struct tmem_oid *oid, + uint32_t index) { void *pampd = NULL, *cdata; - size_t clen; + unsigned clen; int ret; - bool ephemeral = is_ephemeral(pool); unsigned long count; - - if (ephemeral) { + struct page *page = (struct page *)(data); + struct zcache_client *cli = pool->client; + uint16_t client_id = get_client_id_from_client(cli); + unsigned long zv_mean_zsize; + unsigned long curr_pers_pampd_count; + u64 total_zsize; + + if (eph) { ret = zcache_compress(page, &cdata, &clen); if (ret == 0) - goto out; if (clen == 0 || clen > zbud_max_buddy_size()) { zcache_compress_poor++; goto out; } - pampd = (void *)zbud_create(pool->pool_id, oid, index, - page, cdata, clen); + pampd = (void *)zbud_create(client_id, pool->pool_id, oid, + index, page, cdata, clen); if (pampd != NULL) { count = atomic_inc_return(&zcache_curr_eph_pampd_count); if (count > zcache_curr_eph_pampd_count_max) zcache_curr_eph_pampd_count_max = count; } } else { - /* - * FIXME: This is all the "policy" there is for now. - * 3/4 totpages should allow ~37% of RAM to be filled with - * compressed frontswap pages - */ - if (atomic_read(&zcache_curr_pers_pampd_count) > - 3 * totalram_pages / 4) + curr_pers_pampd_count = + atomic_read(&zcache_curr_pers_pampd_count); + if (curr_pers_pampd_count > + (zv_page_count_policy_percent * totalram_pages) / 100) goto out; ret = zcache_compress(page, &cdata, &clen); if (ret == 0) goto out; - if (clen > zv_max_page_size) { + /* reject if compression is too poor */ + if (clen > zv_max_zsize) { zcache_compress_poor++; goto out; } - pampd = (void *)zv_create(zcache_client.xvpool, pool->pool_id, + /* reject if mean compression is too poor */ + if ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) { + total_zsize = zs_get_total_pages(cli->zspool) << PAGE_SHIFT; + zv_mean_zsize = div_u64(total_zsize, + curr_pers_pampd_count); + if (zv_mean_zsize > zv_max_mean_zsize) { + zcache_mean_compress_poor++; + goto out; + } + } + pampd = (void *)zv_create(cli->zspool, pool->pool_id, oid, index, cdata, clen); if (pampd == NULL) goto out; @@ -958,64 +1240,104 @@ out: * fill the pageframe corresponding to the struct page with the data * from the passed pampd */ -static int zcache_pampd_get_data(struct page *page, void *pampd, - struct tmem_pool *pool) +static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw, + void *pampd, struct tmem_pool *pool, + struct tmem_oid *oid, uint32_t index) { int ret = 0; - if (is_ephemeral(pool)) - ret = zbud_decompress(page, pampd); - else - zv_decompress(page, pampd); + BUG_ON(is_ephemeral(pool)); + zv_decompress((struct page *)(data), (unsigned long)pampd); return ret; } /* + * fill the pageframe corresponding to the struct page with the data + * from the passed pampd + */ +static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw, + void *pampd, struct tmem_pool *pool, + struct tmem_oid *oid, uint32_t index) +{ + BUG_ON(!is_ephemeral(pool)); + if (zbud_decompress((struct page *)(data), pampd) < 0) + return -EINVAL; + zbud_free_and_delist((struct zbud_hdr *)pampd); + atomic_dec(&zcache_curr_eph_pampd_count); + return 0; +} + +/* * free the pampd and remove it from any zcache lists * pampd must no longer be pointed to from any tmem data structures! */ -static void zcache_pampd_free(void *pampd, struct tmem_pool *pool) +static void zcache_pampd_free(void *pampd, struct tmem_pool *pool, + struct tmem_oid *oid, uint32_t index) { + struct zcache_client *cli = pool->client; + if (is_ephemeral(pool)) { zbud_free_and_delist((struct zbud_hdr *)pampd); atomic_dec(&zcache_curr_eph_pampd_count); BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0); } else { - zv_free(zcache_client.xvpool, (struct zv_hdr *)pampd); + zv_free(cli->zspool, (unsigned long)pampd); atomic_dec(&zcache_curr_pers_pampd_count); BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0); } } +static void zcache_pampd_free_obj(struct tmem_pool *pool, struct tmem_obj *obj) +{ +} + +static void zcache_pampd_new_obj(struct tmem_obj *obj) +{ +} + +static int zcache_pampd_replace_in_obj(void *pampd, struct tmem_obj *obj) +{ + return -1; +} + +static bool zcache_pampd_is_remote(void *pampd) +{ + return 0; +} + static struct tmem_pamops zcache_pamops = { .create = zcache_pampd_create, .get_data = zcache_pampd_get_data, + .get_data_and_free = zcache_pampd_get_data_and_free, .free = zcache_pampd_free, + .free_obj = zcache_pampd_free_obj, + .new_obj = zcache_pampd_new_obj, + .replace_in_obj = zcache_pampd_replace_in_obj, + .is_remote = zcache_pampd_is_remote, }; /* * zcache compression/decompression and related per-cpu stuff */ -#define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS -#define LZO_DSTMEM_PAGE_ORDER 1 -static DEFINE_PER_CPU(unsigned char *, zcache_workmem); static DEFINE_PER_CPU(unsigned char *, zcache_dstmem); +#define ZCACHE_DSTMEM_ORDER 1 -static int zcache_compress(struct page *from, void **out_va, size_t *out_len) +static int zcache_compress(struct page *from, void **out_va, unsigned *out_len) { int ret = 0; unsigned char *dmem = __get_cpu_var(zcache_dstmem); - unsigned char *wmem = __get_cpu_var(zcache_workmem); char *from_va; BUG_ON(!irqs_disabled()); - if (unlikely(dmem == NULL || wmem == NULL)) - goto out; /* no buffer, so can't compress */ + if (unlikely(dmem == NULL)) + goto out; /* no buffer or no compressor so can't compress */ + *out_len = PAGE_SIZE << ZCACHE_DSTMEM_ORDER; from_va = kmap_atomic(from, KM_USER0); mb(); - ret = lzo1x_1_compress(from_va, PAGE_SIZE, dmem, out_len, wmem); - BUG_ON(ret != LZO_E_OK); + ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, from_va, PAGE_SIZE, dmem, + out_len); + BUG_ON(ret); *out_va = dmem; kunmap_atomic(from_va, KM_USER0); ret = 1; @@ -1023,29 +1345,48 @@ out: return ret; } +static int zcache_comp_cpu_up(int cpu) +{ + struct crypto_comp *tfm; + + tfm = crypto_alloc_comp(zcache_comp_name, 0, 0); + if (IS_ERR(tfm)) + return NOTIFY_BAD; + *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm; + return NOTIFY_OK; +} + +static void zcache_comp_cpu_down(int cpu) +{ + struct crypto_comp *tfm; + + tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu); + crypto_free_comp(tfm); + *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL; +} static int zcache_cpu_notifier(struct notifier_block *nb, unsigned long action, void *pcpu) { - int cpu = (long)pcpu; + int ret, cpu = (long)pcpu; struct zcache_preload *kp; switch (action) { case CPU_UP_PREPARE: + ret = zcache_comp_cpu_up(cpu); + if (ret != NOTIFY_OK) { + pr_err("zcache: can't allocate compressor transform\n"); + return ret; + } per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages( - GFP_KERNEL | __GFP_REPEAT, - LZO_DSTMEM_PAGE_ORDER), - per_cpu(zcache_workmem, cpu) = - kzalloc(LZO1X_MEM_COMPRESS, - GFP_KERNEL | __GFP_REPEAT); + GFP_KERNEL | __GFP_REPEAT, ZCACHE_DSTMEM_ORDER); break; case CPU_DEAD: case CPU_UP_CANCELED: + zcache_comp_cpu_down(cpu); free_pages((unsigned long)per_cpu(zcache_dstmem, cpu), - LZO_DSTMEM_PAGE_ORDER); + ZCACHE_DSTMEM_ORDER); per_cpu(zcache_dstmem, cpu) = NULL; - kfree(per_cpu(zcache_workmem, cpu)); - per_cpu(zcache_workmem, cpu) = NULL; kp = &per_cpu(zcache_preloads, cpu); while (kp->nr) { kmem_cache_free(zcache_objnode_cache, @@ -1053,8 +1394,14 @@ static int zcache_cpu_notifier(struct notifier_block *nb, kp->objnodes[kp->nr - 1] = NULL; kp->nr--; } - kmem_cache_free(zcache_obj_cache, kp->obj); - free_page((unsigned long)kp->page); + if (kp->obj) { + kmem_cache_free(zcache_obj_cache, kp->obj); + kp->obj = NULL; + } + if (kp->page) { + free_page((unsigned long)kp->page); + kp->page = NULL; + } break; default: break; @@ -1119,9 +1466,8 @@ ZCACHE_SYSFS_RO(evicted_buddied_pages); ZCACHE_SYSFS_RO(failed_get_free_pages); ZCACHE_SYSFS_RO(failed_alloc); ZCACHE_SYSFS_RO(put_to_flush); -ZCACHE_SYSFS_RO(aborted_preload); -ZCACHE_SYSFS_RO(aborted_shrink); ZCACHE_SYSFS_RO(compress_poor); +ZCACHE_SYSFS_RO(mean_compress_poor); ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages); ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages); ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count); @@ -1130,6 +1476,10 @@ ZCACHE_SYSFS_RO_CUSTOM(zbud_unbuddied_list_counts, zbud_show_unbuddied_list_counts); ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts, zbud_show_cumul_chunk_counts); +ZCACHE_SYSFS_RO_CUSTOM(zv_curr_dist_counts, + zv_curr_dist_counts_show); +ZCACHE_SYSFS_RO_CUSTOM(zv_cumul_dist_counts, + zv_cumul_dist_counts_show); static struct attribute *zcache_attrs[] = { &zcache_curr_obj_count_attr.attr, @@ -1143,6 +1493,7 @@ static struct attribute *zcache_attrs[] = { &zcache_failed_eph_puts_attr.attr, &zcache_failed_pers_puts_attr.attr, &zcache_compress_poor_attr.attr, + &zcache_mean_compress_poor_attr.attr, &zcache_zbud_curr_raw_pages_attr.attr, &zcache_zbud_curr_zpages_attr.attr, &zcache_zbud_curr_zbytes_attr.attr, @@ -1156,10 +1507,13 @@ static struct attribute *zcache_attrs[] = { &zcache_failed_get_free_pages_attr.attr, &zcache_failed_alloc_attr.attr, &zcache_put_to_flush_attr.attr, - &zcache_aborted_preload_attr.attr, - &zcache_aborted_shrink_attr.attr, &zcache_zbud_unbuddied_list_counts_attr.attr, &zcache_zbud_cumul_chunk_counts_attr.attr, + &zcache_zv_curr_dist_counts_attr.attr, + &zcache_zv_cumul_dist_counts_attr.attr, + &zcache_zv_max_zsize_attr.attr, + &zcache_zv_max_mean_zsize_attr.attr, + &zcache_zv_page_count_policy_percent_attr.attr, NULL, }; @@ -1192,11 +1546,7 @@ static int shrink_zcache_memory(struct shrinker *shrink, if (!(gfp_mask & __GFP_FS)) /* does this case really need to be skipped? */ goto out; - if (spin_trylock(&zcache_direct_reclaim_lock)) { - zbud_evict_pages(nr); - spin_unlock(&zcache_direct_reclaim_lock); - } else - zcache_aborted_shrink++; + zbud_evict_pages(nr); } ret = (int)atomic_read(&zcache_zbud_curr_raw_pages); out: @@ -1212,19 +1562,20 @@ static struct shrinker zcache_shrinker = { * zcache shims between cleancache/frontswap ops and tmem */ -static int zcache_put_page(int pool_id, struct tmem_oid *oidp, +static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp, uint32_t index, struct page *page) { struct tmem_pool *pool; int ret = -1; BUG_ON(!irqs_disabled()); - pool = zcache_get_pool_by_id(pool_id); + pool = zcache_get_pool_by_id(cli_id, pool_id); if (unlikely(pool == NULL)) goto out; if (!zcache_freeze && zcache_do_preload(pool) == 0) { /* preload does preempt_disable on success */ - ret = tmem_put(pool, oidp, index, page); + ret = tmem_put(pool, oidp, index, (char *)(page), + PAGE_SIZE, 0, is_ephemeral(pool)); if (ret < 0) { if (is_ephemeral(pool)) zcache_failed_eph_puts++; @@ -1244,25 +1595,28 @@ out: return ret; } -static int zcache_get_page(int pool_id, struct tmem_oid *oidp, +static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp, uint32_t index, struct page *page) { struct tmem_pool *pool; int ret = -1; unsigned long flags; + size_t size = PAGE_SIZE; local_irq_save(flags); - pool = zcache_get_pool_by_id(pool_id); + pool = zcache_get_pool_by_id(cli_id, pool_id); if (likely(pool != NULL)) { if (atomic_read(&pool->obj_count) > 0) - ret = tmem_get(pool, oidp, index, page); + ret = tmem_get(pool, oidp, index, (char *)(page), + &size, 0, is_ephemeral(pool)); zcache_put_pool(pool); } local_irq_restore(flags); return ret; } -static int zcache_flush_page(int pool_id, struct tmem_oid *oidp, uint32_t index) +static int zcache_flush_page(int cli_id, int pool_id, + struct tmem_oid *oidp, uint32_t index) { struct tmem_pool *pool; int ret = -1; @@ -1270,7 +1624,7 @@ static int zcache_flush_page(int pool_id, struct tmem_oid *oidp, uint32_t index) local_irq_save(flags); zcache_flush_total++; - pool = zcache_get_pool_by_id(pool_id); + pool = zcache_get_pool_by_id(cli_id, pool_id); if (likely(pool != NULL)) { if (atomic_read(&pool->obj_count) > 0) ret = tmem_flush_page(pool, oidp, index); @@ -1282,7 +1636,8 @@ static int zcache_flush_page(int pool_id, struct tmem_oid *oidp, uint32_t index) return ret; } -static int zcache_flush_object(int pool_id, struct tmem_oid *oidp) +static int zcache_flush_object(int cli_id, int pool_id, + struct tmem_oid *oidp) { struct tmem_pool *pool; int ret = -1; @@ -1290,7 +1645,7 @@ static int zcache_flush_object(int pool_id, struct tmem_oid *oidp) local_irq_save(flags); zcache_flobj_total++; - pool = zcache_get_pool_by_id(pool_id); + pool = zcache_get_pool_by_id(cli_id, pool_id); if (likely(pool != NULL)) { if (atomic_read(&pool->obj_count) > 0) ret = tmem_flush_object(pool, oidp); @@ -1302,42 +1657,60 @@ static int zcache_flush_object(int pool_id, struct tmem_oid *oidp) return ret; } -static int zcache_destroy_pool(int pool_id) +static int zcache_destroy_pool(int cli_id, int pool_id) { struct tmem_pool *pool = NULL; + struct zcache_client *cli = NULL; int ret = -1; if (pool_id < 0) goto out; - pool = zcache_client.tmem_pools[pool_id]; + if (cli_id == LOCAL_CLIENT) + cli = &zcache_host; + else if ((unsigned int)cli_id < MAX_CLIENTS) + cli = &zcache_clients[cli_id]; + if (cli == NULL) + goto out; + atomic_inc(&cli->refcount); + pool = cli->tmem_pools[pool_id]; if (pool == NULL) goto out; - zcache_client.tmem_pools[pool_id] = NULL; + cli->tmem_pools[pool_id] = NULL; /* wait for pool activity on other cpus to quiesce */ while (atomic_read(&pool->refcount) != 0) ; + atomic_dec(&cli->refcount); local_bh_disable(); ret = tmem_destroy_pool(pool); local_bh_enable(); kfree(pool); - pr_info("zcache: destroyed pool id=%d\n", pool_id); + pr_info("zcache: destroyed pool id=%d, cli_id=%d\n", + pool_id, cli_id); out: return ret; } -static int zcache_new_pool(uint32_t flags) +static int zcache_new_pool(uint16_t cli_id, uint32_t flags) { int poolid = -1; struct tmem_pool *pool; + struct zcache_client *cli = NULL; - pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL); + if (cli_id == LOCAL_CLIENT) + cli = &zcache_host; + else if ((unsigned int)cli_id < MAX_CLIENTS) + cli = &zcache_clients[cli_id]; + if (cli == NULL) + goto out; + atomic_inc(&cli->refcount); + pool = kmalloc(sizeof(struct tmem_pool), GFP_ATOMIC); if (pool == NULL) { pr_info("zcache: pool creation failed: out of memory\n"); goto out; } for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++) - if (zcache_client.tmem_pools[poolid] == NULL) + if (cli->tmem_pools[poolid] == NULL) break; if (poolid >= MAX_POOLS_PER_CLIENT) { pr_info("zcache: pool creation failed: max exceeded\n"); @@ -1346,14 +1719,16 @@ static int zcache_new_pool(uint32_t flags) goto out; } atomic_set(&pool->refcount, 0); - pool->client = &zcache_client; + pool->client = cli; pool->pool_id = poolid; tmem_new_pool(pool, flags); - zcache_client.tmem_pools[poolid] = pool; - pr_info("zcache: created %s tmem pool, id=%d\n", + cli->tmem_pools[poolid] = pool; + pr_info("zcache: created %s tmem pool, id=%d, client=%d\n", flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", - poolid); + poolid, cli_id); out: + if (cli != NULL) + atomic_dec(&cli->refcount); return poolid; } @@ -1373,8 +1748,10 @@ static void zcache_cleancache_put_page(int pool_id, u32 ind = (u32) index; struct tmem_oid oid = *(struct tmem_oid *)&key; + if (!PageWasActive(page)) + return; if (likely(ind == index)) - (void)zcache_put_page(pool_id, &oid, index, page); + (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, page); } static int zcache_cleancache_get_page(int pool_id, @@ -1386,7 +1763,9 @@ static int zcache_cleancache_get_page(int pool_id, int ret = -1; if (likely(ind == index)) - ret = zcache_get_page(pool_id, &oid, index, page); + ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, page); + if (ret == 0) + SetPageWasActive(page); return ret; } @@ -1398,7 +1777,7 @@ static void zcache_cleancache_flush_page(int pool_id, struct tmem_oid oid = *(struct tmem_oid *)&key; if (likely(ind == index)) - (void)zcache_flush_page(pool_id, &oid, ind); + (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind); } static void zcache_cleancache_flush_inode(int pool_id, @@ -1406,13 +1785,13 @@ static void zcache_cleancache_flush_inode(int pool_id, { struct tmem_oid oid = *(struct tmem_oid *)&key; - (void)zcache_flush_object(pool_id, &oid); + (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid); } static void zcache_cleancache_flush_fs(int pool_id) { if (pool_id >= 0) - (void)zcache_destroy_pool(pool_id); + (void)zcache_destroy_pool(LOCAL_CLIENT, pool_id); } static int zcache_cleancache_init_fs(size_t pagesize) @@ -1420,7 +1799,7 @@ static int zcache_cleancache_init_fs(size_t pagesize) BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); BUG_ON(pagesize != PAGE_SIZE); - return zcache_new_pool(0); + return zcache_new_pool(LOCAL_CLIENT, 0); } static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize) @@ -1429,15 +1808,15 @@ static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize) BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); BUG_ON(pagesize != PAGE_SIZE); - return zcache_new_pool(0); + return zcache_new_pool(LOCAL_CLIENT, 0); } static struct cleancache_ops zcache_cleancache_ops = { .put_page = zcache_cleancache_put_page, .get_page = zcache_cleancache_get_page, - .flush_page = zcache_cleancache_flush_page, - .flush_inode = zcache_cleancache_flush_inode, - .flush_fs = zcache_cleancache_flush_fs, + .invalidate_page = zcache_cleancache_flush_page, + .invalidate_inode = zcache_cleancache_flush_inode, + .invalidate_fs = zcache_cleancache_flush_fs, .init_shared_fs = zcache_cleancache_init_shared_fs, .init_fs = zcache_cleancache_init_fs }; @@ -1458,8 +1837,10 @@ static int zcache_frontswap_poolid = -1; /* * Swizzling increases objects per swaptype, increasing tmem concurrency * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS + * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from + * frontswap_get_page(), but has side-effects. Hence using 8. */ -#define SWIZ_BITS 4 +#define SWIZ_BITS 8 #define SWIZ_MASK ((1 << SWIZ_BITS) - 1) #define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) #define iswiz(_ind) (_ind >> SWIZ_BITS) @@ -1483,8 +1864,8 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, BUG_ON(!PageLocked(page)); if (likely(ind64 == ind)) { local_irq_save(flags); - ret = zcache_put_page(zcache_frontswap_poolid, &oid, - iswiz(ind), page); + ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid, + &oid, iswiz(ind), page); local_irq_restore(flags); } return ret; @@ -1502,8 +1883,8 @@ static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, BUG_ON(!PageLocked(page)); if (likely(ind64 == ind)) - ret = zcache_get_page(zcache_frontswap_poolid, &oid, - iswiz(ind), page); + ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid, + &oid, iswiz(ind), page); return ret; } @@ -1515,8 +1896,8 @@ static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset) struct tmem_oid oid = oswiz(type, ind); if (likely(ind64 == ind)) - (void)zcache_flush_page(zcache_frontswap_poolid, &oid, - iswiz(ind)); + (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid, + &oid, iswiz(ind)); } /* flush all pages from the passed swaptype */ @@ -1527,7 +1908,8 @@ static void zcache_frontswap_flush_area(unsigned type) for (ind = SWIZ_MASK; ind >= 0; ind--) { oid = oswiz(type, ind); - (void)zcache_flush_object(zcache_frontswap_poolid, &oid); + (void)zcache_flush_object(LOCAL_CLIENT, + zcache_frontswap_poolid, &oid); } } @@ -1535,14 +1917,15 @@ static void zcache_frontswap_init(unsigned ignored) { /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ if (zcache_frontswap_poolid < 0) - zcache_frontswap_poolid = zcache_new_pool(TMEM_POOL_PERSIST); + zcache_frontswap_poolid = + zcache_new_pool(LOCAL_CLIENT, TMEM_POOL_PERSIST); } static struct frontswap_ops zcache_frontswap_ops = { .put_page = zcache_frontswap_put_page, .get_page = zcache_frontswap_get_page, - .flush_page = zcache_frontswap_flush_page, - .flush_area = zcache_frontswap_flush_area, + .invalidate_page = zcache_frontswap_flush_page, + .invalidate_area = zcache_frontswap_flush_area, .init = zcache_frontswap_init }; @@ -1592,11 +1975,49 @@ static int __init no_frontswap(char *s) __setup("nofrontswap", no_frontswap); +static int __init enable_zcache_compressor(char *s) +{ + strncpy(zcache_comp_name, s, ZCACHE_COMP_NAME_SZ); + zcache_enabled = 1; + return 1; +} +__setup("zcache=", enable_zcache_compressor); + + +static int zcache_comp_init(void) +{ + int ret = 0; + + /* check crypto algorithm */ + if (*zcache_comp_name != '\0') { + ret = crypto_has_comp(zcache_comp_name, 0, 0); + if (!ret) + pr_info("zcache: %s not supported\n", + zcache_comp_name); + } + if (!ret) + strcpy(zcache_comp_name, "lzo"); + ret = crypto_has_comp(zcache_comp_name, 0, 0); + if (!ret) { + ret = 1; + goto out; + } + pr_info("zcache: using %s compressor\n", zcache_comp_name); + + /* alloc percpu transforms */ + ret = 0; + zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); + if (!zcache_comp_pcpu_tfms) + ret = 1; +out: + return ret; +} + static int __init zcache_init(void) { -#ifdef CONFIG_SYSFS int ret = 0; +#ifdef CONFIG_SYSFS ret = sysfs_create_group(mm_kobj, &zcache_attr_group); if (ret) { pr_err("zcache: can't create sysfs\n"); @@ -1614,6 +2035,11 @@ static int __init zcache_init(void) pr_err("zcache: can't register cpu notifier\n"); goto out; } + ret = zcache_comp_init(); + if (ret) { + pr_err("zcache: compressor initialization failed\n"); + goto out; + } for_each_online_cpu(cpu) { void *pcpu = (void *)(long)cpu; zcache_cpu_notifier(&zcache_cpu_notifier_block, @@ -1624,6 +2050,11 @@ static int __init zcache_init(void) sizeof(struct tmem_objnode), 0, 0, NULL); zcache_obj_cache = kmem_cache_create("zcache_obj", sizeof(struct tmem_obj), 0, 0, NULL); + ret = zcache_new_client(LOCAL_CLIENT); + if (ret) { + pr_err("zcache: can't create client\n"); + goto out; + } #endif #ifdef CONFIG_CLEANCACHE if (zcache_enabled && use_cleancache) { @@ -1642,16 +2073,11 @@ static int __init zcache_init(void) if (zcache_enabled && use_frontswap) { struct frontswap_ops old_ops; - zcache_client.xvpool = xv_create_pool(); - if (zcache_client.xvpool == NULL) { - pr_err("zcache: can't create xvpool\n"); - goto out; - } old_ops = zcache_frontswap_register_ops(); pr_info("zcache: frontswap enabled using kernel " - "transcendent memory and xvmalloc\n"); + "transcendent memory and zsmalloc\n"); if (old_ops.init != NULL) - pr_warning("ktmem: frontswap_ops overridden"); + pr_warning("zcache: frontswap_ops overridden"); } #endif out: diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig deleted file mode 100644 index 06f741a..0000000 --- a/drivers/staging/zram/Kconfig +++ /dev/null @@ -1,37 +0,0 @@ -config XVMALLOC - bool - default n - -config ZRAM - tristate "Compressed RAM block device support" - depends on BLOCK && SYSFS - select XVMALLOC - select LZO_COMPRESS - select LZO_DECOMPRESS - default n - help - Creates virtual block devices called /dev/zramX (X = 0, 1, ...). - Pages written to these disks are compressed and stored in memory - itself. These disks allow very fast I/O and compression provides - good amounts of memory savings. - - It has several use cases, for example: /tmp storage, use as swap - disks and maybe many more. - - See zram.txt for more information. - Project home: http://compcache.googlecode.com/ - -config ZRAM_DEBUG - bool "Compressed RAM block device debug support" - depends on ZRAM - default n - help - This option adds additional debugging code to the compressed - RAM block device driver. - -config ZRAM_FOR_ANDROID - bool "Optimize zram behavior for android" - depends on ZRAM && ANDROID - default n - help - This option enables modified zram behavior optimized for android diff --git a/drivers/staging/zram/Makefile b/drivers/staging/zram/Makefile deleted file mode 100644 index 2a6d321..0000000 --- a/drivers/staging/zram/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -zram-y := zram_drv.o zram_sysfs.o - -obj-$(CONFIG_ZRAM) += zram.o -obj-$(CONFIG_XVMALLOC) += xvmalloc.o
\ No newline at end of file diff --git a/drivers/staging/zram/xvmalloc.c b/drivers/staging/zram/xvmalloc.c deleted file mode 100644 index 1f9c508..0000000 --- a/drivers/staging/zram/xvmalloc.c +++ /dev/null @@ -1,510 +0,0 @@ -/* - * xvmalloc memory allocator - * - * Copyright (C) 2008, 2009, 2010 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the licence that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - */ - -#ifdef CONFIG_ZRAM_DEBUG -#define DEBUG -#endif - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/bitops.h> -#include <linux/errno.h> -#include <linux/highmem.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/slab.h> - -#include "xvmalloc.h" -#include "xvmalloc_int.h" - -static void stat_inc(u64 *value) -{ - *value = *value + 1; -} - -static void stat_dec(u64 *value) -{ - *value = *value - 1; -} - -static int test_flag(struct block_header *block, enum blockflags flag) -{ - return block->prev & BIT(flag); -} - -static void set_flag(struct block_header *block, enum blockflags flag) -{ - block->prev |= BIT(flag); -} - -static void clear_flag(struct block_header *block, enum blockflags flag) -{ - block->prev &= ~BIT(flag); -} - -/* - * Given <page, offset> pair, provide a dereferencable pointer. - * This is called from xv_malloc/xv_free path, so it - * needs to be fast. - */ -static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type) -{ - unsigned char *base; - - base = kmap_atomic(page, type); - return base + offset; -} - -static void put_ptr_atomic(void *ptr, enum km_type type) -{ - kunmap_atomic(ptr, type); -} - -static u32 get_blockprev(struct block_header *block) -{ - return block->prev & PREV_MASK; -} - -static void set_blockprev(struct block_header *block, u16 new_offset) -{ - block->prev = new_offset | (block->prev & FLAGS_MASK); -} - -static struct block_header *BLOCK_NEXT(struct block_header *block) -{ - return (struct block_header *) - ((char *)block + block->size + XV_ALIGN); -} - -/* - * Get index of free list containing blocks of maximum size - * which is less than or equal to given size. - */ -static u32 get_index_for_insert(u32 size) -{ - if (unlikely(size > XV_MAX_ALLOC_SIZE)) - size = XV_MAX_ALLOC_SIZE; - size &= ~FL_DELTA_MASK; - return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; -} - -/* - * Get index of free list having blocks of size greater than - * or equal to requested size. - */ -static u32 get_index(u32 size) -{ - if (unlikely(size < XV_MIN_ALLOC_SIZE)) - size = XV_MIN_ALLOC_SIZE; - size = ALIGN(size, FL_DELTA); - return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT; -} - -/** - * find_block - find block of at least given size - * @pool: memory pool to search from - * @size: size of block required - * @page: page containing required block - * @offset: offset within the page where block is located. - * - * Searches two level bitmap to locate block of at least - * the given size. If such a block is found, it provides - * <page, offset> to identify this block and returns index - * in freelist where we found this block. - * Otherwise, returns 0 and <page, offset> params are not touched. - */ -static u32 find_block(struct xv_pool *pool, u32 size, - struct page **page, u32 *offset) -{ - ulong flbitmap, slbitmap; - u32 flindex, slindex, slbitstart; - - /* There are no free blocks in this pool */ - if (!pool->flbitmap) - return 0; - - /* Get freelist index correspoding to this size */ - slindex = get_index(size); - slbitmap = pool->slbitmap[slindex / BITS_PER_LONG]; - slbitstart = slindex % BITS_PER_LONG; - - /* - * If freelist is not empty at this index, we found the - * block - head of this list. This is approximate best-fit match. - */ - if (test_bit(slbitstart, &slbitmap)) { - *page = pool->freelist[slindex].page; - *offset = pool->freelist[slindex].offset; - return slindex; - } - - /* - * No best-fit found. Search a bit further in bitmap for a free block. - * Second level bitmap consists of series of 32-bit chunks. Search - * further in the chunk where we expected a best-fit, starting from - * index location found above. - */ - slbitstart++; - slbitmap >>= slbitstart; - - /* Skip this search if we were already at end of this bitmap chunk */ - if ((slbitstart != BITS_PER_LONG) && slbitmap) { - slindex += __ffs(slbitmap) + 1; - *page = pool->freelist[slindex].page; - *offset = pool->freelist[slindex].offset; - return slindex; - } - - /* Now do a full two-level bitmap search to find next nearest fit */ - flindex = slindex / BITS_PER_LONG; - - flbitmap = (pool->flbitmap) >> (flindex + 1); - if (!flbitmap) - return 0; - - flindex += __ffs(flbitmap) + 1; - slbitmap = pool->slbitmap[flindex]; - slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap); - *page = pool->freelist[slindex].page; - *offset = pool->freelist[slindex].offset; - - return slindex; -} - -/* - * Insert block at <page, offset> in freelist of given pool. - * freelist used depends on block size. - */ -static void insert_block(struct xv_pool *pool, struct page *page, u32 offset, - struct block_header *block) -{ - u32 flindex, slindex; - struct block_header *nextblock; - - slindex = get_index_for_insert(block->size); - flindex = slindex / BITS_PER_LONG; - - block->link.prev_page = NULL; - block->link.prev_offset = 0; - block->link.next_page = pool->freelist[slindex].page; - block->link.next_offset = pool->freelist[slindex].offset; - pool->freelist[slindex].page = page; - pool->freelist[slindex].offset = offset; - - if (block->link.next_page) { - nextblock = get_ptr_atomic(block->link.next_page, - block->link.next_offset, KM_USER1); - nextblock->link.prev_page = page; - nextblock->link.prev_offset = offset; - put_ptr_atomic(nextblock, KM_USER1); - /* If there was a next page then the free bits are set. */ - return; - } - - __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]); - __set_bit(flindex, &pool->flbitmap); -} - -/* - * Remove block from freelist. Index 'slindex' identifies the freelist. - */ -static void remove_block(struct xv_pool *pool, struct page *page, u32 offset, - struct block_header *block, u32 slindex) -{ - u32 flindex = slindex / BITS_PER_LONG; - struct block_header *tmpblock; - - if (block->link.prev_page) { - tmpblock = get_ptr_atomic(block->link.prev_page, - block->link.prev_offset, KM_USER1); - tmpblock->link.next_page = block->link.next_page; - tmpblock->link.next_offset = block->link.next_offset; - put_ptr_atomic(tmpblock, KM_USER1); - } - - if (block->link.next_page) { - tmpblock = get_ptr_atomic(block->link.next_page, - block->link.next_offset, KM_USER1); - tmpblock->link.prev_page = block->link.prev_page; - tmpblock->link.prev_offset = block->link.prev_offset; - put_ptr_atomic(tmpblock, KM_USER1); - } - - /* Is this block is at the head of the freelist? */ - if (pool->freelist[slindex].page == page - && pool->freelist[slindex].offset == offset) { - - pool->freelist[slindex].page = block->link.next_page; - pool->freelist[slindex].offset = block->link.next_offset; - - if (pool->freelist[slindex].page) { - struct block_header *tmpblock; - tmpblock = get_ptr_atomic(pool->freelist[slindex].page, - pool->freelist[slindex].offset, - KM_USER1); - tmpblock->link.prev_page = NULL; - tmpblock->link.prev_offset = 0; - put_ptr_atomic(tmpblock, KM_USER1); - } else { - /* This freelist bucket is empty */ - __clear_bit(slindex % BITS_PER_LONG, - &pool->slbitmap[flindex]); - if (!pool->slbitmap[flindex]) - __clear_bit(flindex, &pool->flbitmap); - } - } - - block->link.prev_page = NULL; - block->link.prev_offset = 0; - block->link.next_page = NULL; - block->link.next_offset = 0; -} - -/* - * Allocate a page and add it to freelist of given pool. - */ -static int grow_pool(struct xv_pool *pool, gfp_t flags) -{ - struct page *page; - struct block_header *block; - - page = alloc_page(flags); - if (unlikely(!page)) - return -ENOMEM; - - stat_inc(&pool->total_pages); - - spin_lock(&pool->lock); - block = get_ptr_atomic(page, 0, KM_USER0); - - block->size = PAGE_SIZE - XV_ALIGN; - set_flag(block, BLOCK_FREE); - clear_flag(block, PREV_FREE); - set_blockprev(block, 0); - - insert_block(pool, page, 0, block); - - put_ptr_atomic(block, KM_USER0); - spin_unlock(&pool->lock); - - return 0; -} - -/* - * Create a memory pool. Allocates freelist, bitmaps and other - * per-pool metadata. - */ -struct xv_pool *xv_create_pool(void) -{ - u32 ovhd_size; - struct xv_pool *pool; - - ovhd_size = roundup(sizeof(*pool), PAGE_SIZE); - pool = kzalloc(ovhd_size, GFP_KERNEL); - if (!pool) - return NULL; - - spin_lock_init(&pool->lock); - - return pool; -} -EXPORT_SYMBOL_GPL(xv_create_pool); - -void xv_destroy_pool(struct xv_pool *pool) -{ - kfree(pool); -} -EXPORT_SYMBOL_GPL(xv_destroy_pool); - -/** - * xv_malloc - Allocate block of given size from pool. - * @pool: pool to allocate from - * @size: size of block to allocate - * @page: page no. that holds the object - * @offset: location of object within page - * - * On success, <page, offset> identifies block allocated - * and 0 is returned. On failure, <page, offset> is set to - * 0 and -ENOMEM is returned. - * - * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail. - */ -int xv_malloc(struct xv_pool *pool, u32 size, struct page **page, - u32 *offset, gfp_t flags) -{ - int error; - u32 index, tmpsize, origsize, tmpoffset; - struct block_header *block, *tmpblock; - - *page = NULL; - *offset = 0; - origsize = size; - - if (unlikely(!size || size > XV_MAX_ALLOC_SIZE)) - return -ENOMEM; - - size = ALIGN(size, XV_ALIGN); - - spin_lock(&pool->lock); - - index = find_block(pool, size, page, offset); - - if (!*page) { - spin_unlock(&pool->lock); - if (flags & GFP_NOWAIT) - return -ENOMEM; - error = grow_pool(pool, flags); - if (unlikely(error)) - return error; - - spin_lock(&pool->lock); - index = find_block(pool, size, page, offset); - } - - if (!*page) { - spin_unlock(&pool->lock); - return -ENOMEM; - } - - block = get_ptr_atomic(*page, *offset, KM_USER0); - - remove_block(pool, *page, *offset, block, index); - - /* Split the block if required */ - tmpoffset = *offset + size + XV_ALIGN; - tmpsize = block->size - size; - tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN); - if (tmpsize) { - tmpblock->size = tmpsize - XV_ALIGN; - set_flag(tmpblock, BLOCK_FREE); - clear_flag(tmpblock, PREV_FREE); - - set_blockprev(tmpblock, *offset); - if (tmpblock->size >= XV_MIN_ALLOC_SIZE) - insert_block(pool, *page, tmpoffset, tmpblock); - - if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) { - tmpblock = BLOCK_NEXT(tmpblock); - set_blockprev(tmpblock, tmpoffset); - } - } else { - /* This block is exact fit */ - if (tmpoffset != PAGE_SIZE) - clear_flag(tmpblock, PREV_FREE); - } - - block->size = origsize; - clear_flag(block, BLOCK_FREE); - - put_ptr_atomic(block, KM_USER0); - spin_unlock(&pool->lock); - - *offset += XV_ALIGN; - - return 0; -} -EXPORT_SYMBOL_GPL(xv_malloc); - -/* - * Free block identified with <page, offset> - */ -void xv_free(struct xv_pool *pool, struct page *page, u32 offset) -{ - void *page_start; - struct block_header *block, *tmpblock; - - offset -= XV_ALIGN; - - spin_lock(&pool->lock); - - page_start = get_ptr_atomic(page, 0, KM_USER0); - block = (struct block_header *)((char *)page_start + offset); - - /* Catch double free bugs */ - BUG_ON(test_flag(block, BLOCK_FREE)); - - block->size = ALIGN(block->size, XV_ALIGN); - - tmpblock = BLOCK_NEXT(block); - if (offset + block->size + XV_ALIGN == PAGE_SIZE) - tmpblock = NULL; - - /* Merge next block if its free */ - if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) { - /* - * Blocks smaller than XV_MIN_ALLOC_SIZE - * are not inserted in any free list. - */ - if (tmpblock->size >= XV_MIN_ALLOC_SIZE) { - remove_block(pool, page, - offset + block->size + XV_ALIGN, tmpblock, - get_index_for_insert(tmpblock->size)); - } - block->size += tmpblock->size + XV_ALIGN; - } - - /* Merge previous block if its free */ - if (test_flag(block, PREV_FREE)) { - tmpblock = (struct block_header *)((char *)(page_start) + - get_blockprev(block)); - offset = offset - tmpblock->size - XV_ALIGN; - - if (tmpblock->size >= XV_MIN_ALLOC_SIZE) - remove_block(pool, page, offset, tmpblock, - get_index_for_insert(tmpblock->size)); - - tmpblock->size += block->size + XV_ALIGN; - block = tmpblock; - } - - /* No used objects in this page. Free it. */ - if (block->size == PAGE_SIZE - XV_ALIGN) { - put_ptr_atomic(page_start, KM_USER0); - spin_unlock(&pool->lock); - - __free_page(page); - stat_dec(&pool->total_pages); - return; - } - - set_flag(block, BLOCK_FREE); - if (block->size >= XV_MIN_ALLOC_SIZE) - insert_block(pool, page, offset, block); - - if (offset + block->size + XV_ALIGN != PAGE_SIZE) { - tmpblock = BLOCK_NEXT(block); - set_flag(tmpblock, PREV_FREE); - set_blockprev(tmpblock, offset); - } - - put_ptr_atomic(page_start, KM_USER0); - spin_unlock(&pool->lock); -} -EXPORT_SYMBOL_GPL(xv_free); - -u32 xv_get_object_size(void *obj) -{ - struct block_header *blk; - - blk = (struct block_header *)((char *)(obj) - XV_ALIGN); - return blk->size; -} -EXPORT_SYMBOL_GPL(xv_get_object_size); - -/* - * Returns total memory used by allocator (userdata + metadata) - */ -u64 xv_get_total_size_bytes(struct xv_pool *pool) -{ - return pool->total_pages << PAGE_SHIFT; -} -EXPORT_SYMBOL_GPL(xv_get_total_size_bytes); diff --git a/drivers/staging/zram/xvmalloc.h b/drivers/staging/zram/xvmalloc.h deleted file mode 100644 index 5b1a81a..0000000 --- a/drivers/staging/zram/xvmalloc.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * xvmalloc memory allocator - * - * Copyright (C) 2008, 2009, 2010 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the licence that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - */ - -#ifndef _XV_MALLOC_H_ -#define _XV_MALLOC_H_ - -#include <linux/types.h> - -struct xv_pool; - -struct xv_pool *xv_create_pool(void); -void xv_destroy_pool(struct xv_pool *pool); - -int xv_malloc(struct xv_pool *pool, u32 size, struct page **page, - u32 *offset, gfp_t flags); -void xv_free(struct xv_pool *pool, struct page *page, u32 offset); - -u32 xv_get_object_size(void *obj); -u64 xv_get_total_size_bytes(struct xv_pool *pool); - -#endif diff --git a/drivers/staging/zram/xvmalloc_int.h b/drivers/staging/zram/xvmalloc_int.h deleted file mode 100644 index b5f1f7f..0000000 --- a/drivers/staging/zram/xvmalloc_int.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * xvmalloc memory allocator - * - * Copyright (C) 2008, 2009, 2010 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the licence that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - */ - -#ifndef _XV_MALLOC_INT_H_ -#define _XV_MALLOC_INT_H_ - -#include <linux/kernel.h> -#include <linux/types.h> - -/* User configurable params */ - -/* Must be power of two */ -#ifdef CONFIG_64BIT -#define XV_ALIGN_SHIFT 3 -#else -#define XV_ALIGN_SHIFT 2 -#endif -#define XV_ALIGN (1 << XV_ALIGN_SHIFT) -#define XV_ALIGN_MASK (XV_ALIGN - 1) - -/* This must be greater than sizeof(link_free) */ -#define XV_MIN_ALLOC_SIZE 32 -#define XV_MAX_ALLOC_SIZE (PAGE_SIZE - XV_ALIGN) - -/* - * Free lists are separated by FL_DELTA bytes - * This value is 3 for 4k pages and 4 for 64k pages, for any - * other page size, a conservative (PAGE_SHIFT - 9) is used. - */ -#if PAGE_SHIFT == 16 -#define FL_DELTA_SHIFT 4 -#else -#define FL_DELTA_SHIFT (PAGE_SHIFT - 9) -#endif -#define FL_DELTA (1 << FL_DELTA_SHIFT) -#define FL_DELTA_MASK (FL_DELTA - 1) -#define NUM_FREE_LISTS ((XV_MAX_ALLOC_SIZE - XV_MIN_ALLOC_SIZE) \ - / FL_DELTA + 1) - -#define MAX_FLI DIV_ROUND_UP(NUM_FREE_LISTS, BITS_PER_LONG) - -/* End of user params */ - -enum blockflags { - BLOCK_FREE, - PREV_FREE, - __NR_BLOCKFLAGS, -}; - -#define FLAGS_MASK XV_ALIGN_MASK -#define PREV_MASK (~FLAGS_MASK) - -struct freelist_entry { - struct page *page; - u16 offset; - u16 pad; -}; - -struct link_free { - struct page *prev_page; - struct page *next_page; - u16 prev_offset; - u16 next_offset; -}; - -struct block_header { - union { - /* This common header must be XV_ALIGN bytes */ - u8 common[XV_ALIGN]; - struct { - u16 size; - u16 prev; - }; - }; - struct link_free link; -}; - -struct xv_pool { - ulong flbitmap; - ulong slbitmap[MAX_FLI]; - u64 total_pages; /* stats */ - struct freelist_entry freelist[NUM_FREE_LISTS]; - spinlock_t lock; -}; - -#endif diff --git a/drivers/staging/zram/zram.txt b/drivers/staging/zram/zram.txt deleted file mode 100644 index 5f75d29..0000000 --- a/drivers/staging/zram/zram.txt +++ /dev/null @@ -1,76 +0,0 @@ -zram: Compressed RAM based block devices ----------------------------------------- - -Project home: http://compcache.googlecode.com/ - -* Introduction - -The zram module creates RAM based block devices named /dev/zram<id> -(<id> = 0, 1, ...). Pages written to these disks are compressed and stored -in memory itself. These disks allow very fast I/O and compression provides -good amounts of memory savings. Some of the usecases include /tmp storage, -use as swap disks, various caches under /var and maybe many more :) - -Statistics for individual zram devices are exported through sysfs nodes at -/sys/block/zram<id>/ - -* Usage - -Following shows a typical sequence of steps for using zram. - -1) Load Module: - modprobe zram num_devices=4 - This creates 4 devices: /dev/zram{0,1,2,3} - (num_devices parameter is optional. Default: 1) - -2) Set Disksize (Optional): - Set disk size by writing the value to sysfs node 'disksize' - (in bytes). If disksize is not given, default value of 25% - of RAM is used. - - # Initialize /dev/zram0 with 50MB disksize - echo $((50*1024*1024)) > /sys/block/zram0/disksize - - NOTE: disksize cannot be changed if the disk contains any - data. So, for such a disk, you need to issue 'reset' (see below) - before you can change its disksize. - -3) Activate: - mkswap /dev/zram0 - swapon /dev/zram0 - - mkfs.ext4 /dev/zram1 - mount /dev/zram1 /tmp - -4) Stats: - Per-device statistics are exported as various nodes under - /sys/block/zram<id>/ - disksize - num_reads - num_writes - invalid_io - notify_free - discard - zero_pages - orig_data_size - compr_data_size - mem_used_total - -5) Deactivate: - swapoff /dev/zram0 - umount /dev/zram1 - -6) Reset: - Write any positive value to 'reset' sysfs node - echo 1 > /sys/block/zram0/reset - echo 1 > /sys/block/zram1/reset - - (This frees all the memory allocated for the given device). - - -Please report any problems at: - - Mailing list: linux-mm-cc at laptop dot org - - Issue tracker: http://code.google.com/p/compcache/issues/list - -Nitin Gupta -ngupta@vflare.org diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c deleted file mode 100644 index 5258c78..0000000 --- a/drivers/staging/zram/zram_drv.c +++ /dev/null @@ -1,769 +0,0 @@ -/* - * Compressed RAM block device - * - * Copyright (C) 2008, 2009, 2010 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the licence that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - * - * Project home: http://compcache.googlecode.com - */ - -#define KMSG_COMPONENT "zram" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#ifdef CONFIG_ZRAM_DEBUG -#define DEBUG -#endif - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/bio.h> -#include <linux/bitops.h> -#include <linux/blkdev.h> -#include <linux/buffer_head.h> -#include <linux/device.h> -#include <linux/genhd.h> -#include <linux/highmem.h> -#include <linux/slab.h> -#include <linux/lzo.h> -#include <linux/string.h> -#include <linux/vmalloc.h> -#ifdef CONFIG_ZRAM_FOR_ANDROID -#include <linux/swap.h> -#endif /* CONFIG_ZRAM_FOR_ANDROID */ - - -#include "zram_drv.h" - -/* Globals */ -static int zram_major; -struct zram *zram_devices; - -/* Module params (documentation at end) */ -unsigned int num_devices; - -static void zram_stat_inc(u32 *v) -{ - *v = *v + 1; -} - -static void zram_stat_dec(u32 *v) -{ - *v = *v - 1; -} - -static void zram_stat64_add(struct zram *zram, u64 *v, u64 inc) -{ - spin_lock(&zram->stat64_lock); - *v = *v + inc; - spin_unlock(&zram->stat64_lock); -} - -static void zram_stat64_sub(struct zram *zram, u64 *v, u64 dec) -{ - spin_lock(&zram->stat64_lock); - *v = *v - dec; - spin_unlock(&zram->stat64_lock); -} - -static void zram_stat64_inc(struct zram *zram, u64 *v) -{ - zram_stat64_add(zram, v, 1); -} - -static int zram_test_flag(struct zram *zram, u32 index, - enum zram_pageflags flag) -{ - return zram->table[index].flags & BIT(flag); -} - -static void zram_set_flag(struct zram *zram, u32 index, - enum zram_pageflags flag) -{ - zram->table[index].flags |= BIT(flag); -} - -static void zram_clear_flag(struct zram *zram, u32 index, - enum zram_pageflags flag) -{ - zram->table[index].flags &= ~BIT(flag); -} - -static int page_zero_filled(void *ptr) -{ - unsigned int pos; - unsigned long *page; - - page = (unsigned long *)ptr; - - for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { - if (page[pos]) - return 0; - } - - return 1; -} - -static void zram_set_disksize(struct zram *zram, size_t totalram_bytes) -{ - if (!zram->disksize) { - pr_info( - "disk size not provided. You can use disksize_kb module " - "param to specify size.\nUsing default: (%u%% of RAM).\n", - default_disksize_perc_ram - ); - zram->disksize = default_disksize_perc_ram * - (totalram_bytes / 100); - } - - if (zram->disksize > 2 * (totalram_bytes)) { - pr_info( - "There is little point creating a zram of greater than " - "twice the size of memory since we expect a 2:1 compression " - "ratio. Note that zram uses about 0.1%% of the size of " - "the disk when not in use so a huge zram is " - "wasteful.\n" - "\tMemory Size: %zu kB\n" - "\tSize you selected: %llu kB\n" - "Continuing anyway ...\n", - totalram_bytes >> 10, zram->disksize - ); - } - - zram->disksize &= PAGE_MASK; -} - -#ifdef CONFIG_ZRAM_FOR_ANDROID -/* - * Swap header (1st page of swap device) contains information - * about a swap file/partition. Prepare such a header for the - * given ramzswap device so that swapon can identify it as a - * swap partition. - */ -static void setup_swap_header(struct zram *zram, union swap_header *s) -{ - s->info.version = 1; - s->info.last_page = (zram->disksize >> PAGE_SHIFT) - 1; - s->info.nr_badpages = 0; - memcpy(s->magic.magic, "SWAPSPACE2", 10); -} -#endif /* CONFIG_ZRAM_FOR_ANDROID */ - -static void zram_free_page(struct zram *zram, size_t index) -{ - u32 clen; - void *obj; - - struct page *page = zram->table[index].page; - u32 offset = zram->table[index].offset; - - if (unlikely(!page)) { - /* - * No memory is allocated for zero filled pages. - * Simply clear zero page flag. - */ - if (zram_test_flag(zram, index, ZRAM_ZERO)) { - zram_clear_flag(zram, index, ZRAM_ZERO); - zram_stat_dec(&zram->stats.pages_zero); - } - return; - } - - if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) { - clen = PAGE_SIZE; - __free_page(page); - zram_clear_flag(zram, index, ZRAM_UNCOMPRESSED); - zram_stat_dec(&zram->stats.pages_expand); - goto out; - } - - obj = kmap_atomic(page, KM_USER0) + offset; - clen = xv_get_object_size(obj) - sizeof(struct zobj_header); - kunmap_atomic(obj, KM_USER0); - - xv_free(zram->mem_pool, page, offset); - if (clen <= PAGE_SIZE / 2) - zram_stat_dec(&zram->stats.good_compress); - -out: - zram_stat64_sub(zram, &zram->stats.compr_size, clen); - zram_stat_dec(&zram->stats.pages_stored); - - zram->table[index].page = NULL; - zram->table[index].offset = 0; -} - -static void handle_zero_page(struct page *page) -{ - void *user_mem; - - user_mem = kmap_atomic(page, KM_USER0); - memset(user_mem, 0, PAGE_SIZE); - kunmap_atomic(user_mem, KM_USER0); - - flush_dcache_page(page); -} - -static void handle_uncompressed_page(struct zram *zram, - struct page *page, u32 index) -{ - unsigned char *user_mem, *cmem; - - user_mem = kmap_atomic(page, KM_USER0); - cmem = kmap_atomic(zram->table[index].page, KM_USER1) + - zram->table[index].offset; - - memcpy(user_mem, cmem, PAGE_SIZE); - kunmap_atomic(user_mem, KM_USER0); - kunmap_atomic(cmem, KM_USER1); - - flush_dcache_page(page); -} - -static void zram_read(struct zram *zram, struct bio *bio) -{ - - int i; - u32 index; - struct bio_vec *bvec; - - zram_stat64_inc(zram, &zram->stats.num_reads); - index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; - - bio_for_each_segment(bvec, bio, i) { - int ret; - size_t clen; - struct page *page; - struct zobj_header *zheader; - unsigned char *user_mem, *cmem; - - page = bvec->bv_page; - - if (zram_test_flag(zram, index, ZRAM_ZERO)) { - handle_zero_page(page); - index++; - continue; - } - - /* Requested page is not present in compressed area */ - if (unlikely(!zram->table[index].page)) { - pr_debug("Read before write: sector=%lu, size=%u", - (ulong)(bio->bi_sector), bio->bi_size); - handle_zero_page(page); - index++; - continue; - } - - /* Page is stored uncompressed since it's incompressible */ - if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) { - handle_uncompressed_page(zram, page, index); - index++; - continue; - } - - user_mem = kmap_atomic(page, KM_USER0); - clen = PAGE_SIZE; - - cmem = kmap_atomic(zram->table[index].page, KM_USER1) + - zram->table[index].offset; - - ret = lzo1x_decompress_safe( - cmem + sizeof(*zheader), - xv_get_object_size(cmem) - sizeof(*zheader), - user_mem, &clen); - - kunmap_atomic(user_mem, KM_USER0); - kunmap_atomic(cmem, KM_USER1); - - /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) { - pr_err("Decompression failed! err=%d, page=%u\n", - ret, index); - zram_stat64_inc(zram, &zram->stats.failed_reads); - goto out; - } - - flush_dcache_page(page); - index++; - } - - set_bit(BIO_UPTODATE, &bio->bi_flags); - bio_endio(bio, 0); - return; - -out: - bio_io_error(bio); -} - -static void zram_write(struct zram *zram, struct bio *bio) -{ - int i; - u32 index; - struct bio_vec *bvec; - - zram_stat64_inc(zram, &zram->stats.num_writes); - index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; - - bio_for_each_segment(bvec, bio, i) { - int ret; - u32 offset; - size_t clen; - struct zobj_header *zheader; - struct page *page, *page_store; - unsigned char *user_mem, *cmem, *src; - - page = bvec->bv_page; - src = zram->compress_buffer; - - /* - * System overwrites unused sectors. Free memory associated - * with this sector now. - */ - if (zram->table[index].page || - zram_test_flag(zram, index, ZRAM_ZERO)) - zram_free_page(zram, index); - - mutex_lock(&zram->lock); - - user_mem = kmap_atomic(page, KM_USER0); - if (page_zero_filled(user_mem)) { - kunmap_atomic(user_mem, KM_USER0); - mutex_unlock(&zram->lock); - zram_stat_inc(&zram->stats.pages_zero); - zram_set_flag(zram, index, ZRAM_ZERO); - index++; - continue; - } - - ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen, - zram->compress_workmem); - - kunmap_atomic(user_mem, KM_USER0); - - if (unlikely(ret != LZO_E_OK)) { - mutex_unlock(&zram->lock); - pr_err("Compression failed! err=%d\n", ret); - zram_stat64_inc(zram, &zram->stats.failed_writes); - goto out; - } - - /* - * Page is incompressible. Store it as-is (uncompressed) - * since we do not want to return too many disk write - * errors which has side effect of hanging the system. - */ - if (unlikely(clen > max_zpage_size)) { - clen = PAGE_SIZE; - page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM); - if (unlikely(!page_store)) { - mutex_unlock(&zram->lock); - pr_info("Error allocating memory for " - "incompressible page: %u\n", index); - zram_stat64_inc(zram, - &zram->stats.failed_writes); - goto out; - } - - offset = 0; - zram_set_flag(zram, index, ZRAM_UNCOMPRESSED); - zram_stat_inc(&zram->stats.pages_expand); - zram->table[index].page = page_store; - src = kmap_atomic(page, KM_USER0); - goto memstore; - } - - if (xv_malloc(zram->mem_pool, clen + sizeof(*zheader), - &zram->table[index].page, &offset, - GFP_NOIO | __GFP_HIGHMEM)) { - mutex_unlock(&zram->lock); - pr_info("Error allocating memory for compressed " - "page: %u, size=%zu\n", index, clen); - zram_stat64_inc(zram, &zram->stats.failed_writes); - goto out; - } - -memstore: - zram->table[index].offset = offset; - - cmem = kmap_atomic(zram->table[index].page, KM_USER1) + - zram->table[index].offset; - -#if 0 - /* Back-reference needed for memory defragmentation */ - if (!zram_test_flag(zram, index, ZRAM_UNCOMPRESSED)) { - zheader = (struct zobj_header *)cmem; - zheader->table_idx = index; - cmem += sizeof(*zheader); - } -#endif - - memcpy(cmem, src, clen); - - kunmap_atomic(cmem, KM_USER1); - if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) - kunmap_atomic(src, KM_USER0); - - /* Update stats */ - zram_stat64_add(zram, &zram->stats.compr_size, clen); - zram_stat_inc(&zram->stats.pages_stored); - if (clen <= PAGE_SIZE / 2) - zram_stat_inc(&zram->stats.good_compress); - - mutex_unlock(&zram->lock); - index++; - } - - set_bit(BIO_UPTODATE, &bio->bi_flags); - bio_endio(bio, 0); - return; - -out: - bio_io_error(bio); -} - -/* - * Check if request is within bounds and page aligned. - */ -static inline int valid_io_request(struct zram *zram, struct bio *bio) -{ - if (unlikely( - (bio->bi_sector >= (zram->disksize >> SECTOR_SHIFT)) || - (bio->bi_sector & (SECTORS_PER_PAGE - 1)) || - (bio->bi_size & (PAGE_SIZE - 1)))) { - - return 0; - } - - /* I/O request is valid */ - return 1; -} - -/* - * Handler function for all zram I/O requests. - */ -static int zram_make_request(struct request_queue *queue, struct bio *bio) -{ - struct zram *zram = queue->queuedata; - - if (!valid_io_request(zram, bio)) { - zram_stat64_inc(zram, &zram->stats.invalid_io); - bio_io_error(bio); - return 0; - } - - if (unlikely(!zram->init_done) && zram_init_device(zram)) { - bio_io_error(bio); - return 0; - } - - switch (bio_data_dir(bio)) { - case READ: - zram_read(zram, bio); - break; - - case WRITE: - zram_write(zram, bio); - break; - } - - return 0; -} - -void zram_reset_device(struct zram *zram) -{ - size_t index; - - mutex_lock(&zram->init_lock); - zram->init_done = 0; - - /* Free various per-device buffers */ - kfree(zram->compress_workmem); - free_pages((unsigned long)zram->compress_buffer, 1); - - zram->compress_workmem = NULL; - zram->compress_buffer = NULL; - - /* Free all pages that are still in this zram device */ - for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { - struct page *page; - u16 offset; - - page = zram->table[index].page; - offset = zram->table[index].offset; - - if (!page) - continue; - - if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) - __free_page(page); - else - xv_free(zram->mem_pool, page, offset); - } - - vfree(zram->table); - zram->table = NULL; - - xv_destroy_pool(zram->mem_pool); - zram->mem_pool = NULL; - - /* Reset stats */ - memset(&zram->stats, 0, sizeof(zram->stats)); - - zram->disksize = 0; - mutex_unlock(&zram->init_lock); -} - -int zram_init_device(struct zram *zram) -{ - int ret; - size_t num_pages; -#ifdef CONFIG_ZRAM_FOR_ANDROID - struct page *page; - union swap_header *swap_header; -#endif /* CONFIG_ZRAM_FOR_ANDROID */ - - mutex_lock(&zram->init_lock); - - if (zram->init_done) { - mutex_unlock(&zram->init_lock); - return 0; - } - - zram_set_disksize(zram, totalram_pages << PAGE_SHIFT); - - zram->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); - if (!zram->compress_workmem) { - pr_err("Error allocating compressor working memory!\n"); - ret = -ENOMEM; - goto fail; - } - - zram->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1); - if (!zram->compress_buffer) { - pr_err("Error allocating compressor buffer space\n"); - ret = -ENOMEM; - goto fail; - } - - num_pages = zram->disksize >> PAGE_SHIFT; - zram->table = vzalloc(num_pages * sizeof(*zram->table)); - if (!zram->table) { - pr_err("Error allocating zram address table\n"); - /* To prevent accessing table entries during cleanup */ - zram->disksize = 0; - ret = -ENOMEM; - goto fail; - } - -#ifdef CONFIG_ZRAM_FOR_ANDROID - page = alloc_page(__GFP_ZERO); - if (!page) { - pr_err("Error allocating swap header page\n"); - ret = -ENOMEM; - goto fail; - } - zram->table[0].page = page; - zram_set_flag(zram, 0, ZRAM_UNCOMPRESSED); - swap_header = kmap(page); - setup_swap_header(zram, swap_header); - kunmap(page); -#endif /* CONFIG_ZRAM_FOR_ANDROID */ - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - - /* zram devices sort of resembles non-rotational disks */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - - zram->mem_pool = xv_create_pool(); - if (!zram->mem_pool) { - pr_err("Error creating memory pool\n"); - ret = -ENOMEM; - goto fail; - } - - zram->init_done = 1; - mutex_unlock(&zram->init_lock); - - pr_debug("Initialization done!\n"); - return 0; - -fail: - mutex_unlock(&zram->init_lock); - zram_reset_device(zram); - - pr_err("Initialization failed: err=%d\n", ret); - return ret; -} - -void zram_slot_free_notify(struct block_device *bdev, unsigned long index) -{ - struct zram *zram; - - zram = bdev->bd_disk->private_data; - zram_free_page(zram, index); - zram_stat64_inc(zram, &zram->stats.notify_free); -} - -static const struct block_device_operations zram_devops = { - .swap_slot_free_notify = zram_slot_free_notify, - .owner = THIS_MODULE -}; - -static int create_device(struct zram *zram, int device_id) -{ - int ret = 0; - - mutex_init(&zram->lock); - mutex_init(&zram->init_lock); - spin_lock_init(&zram->stat64_lock); - - zram->queue = blk_alloc_queue(GFP_KERNEL); - if (!zram->queue) { - pr_err("Error allocating disk queue for device %d\n", - device_id); - ret = -ENOMEM; - goto out; - } - - blk_queue_make_request(zram->queue, zram_make_request); - zram->queue->queuedata = zram; - - /* gendisk structure */ - zram->disk = alloc_disk(1); - if (!zram->disk) { - blk_cleanup_queue(zram->queue); - pr_warning("Error allocating disk structure for device %d\n", - device_id); - ret = -ENOMEM; - goto out; - } - - zram->disk->major = zram_major; - zram->disk->first_minor = device_id; - zram->disk->fops = &zram_devops; - zram->disk->queue = zram->queue; - zram->disk->private_data = zram; - snprintf(zram->disk->disk_name, 16, "zram%d", device_id); - - /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ - set_capacity(zram->disk, 0); - - /* - * To ensure that we always get PAGE_SIZE aligned - * and n*PAGE_SIZED sized I/O requests. - */ - blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); - blk_queue_logical_block_size(zram->disk->queue, - ZRAM_LOGICAL_BLOCK_SIZE); - blk_queue_io_min(zram->disk->queue, PAGE_SIZE); - blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); - - add_disk(zram->disk); - - ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); - if (ret < 0) { - pr_warning("Error creating sysfs group"); - goto out; - } - - zram->init_done = 0; - -out: - return ret; -} - -static void destroy_device(struct zram *zram) -{ - sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); - - if (zram->disk) { - del_gendisk(zram->disk); - put_disk(zram->disk); - } - - if (zram->queue) - blk_cleanup_queue(zram->queue); -} - -static int __init zram_init(void) -{ - int ret, dev_id; - - if (num_devices > max_num_devices) { - pr_warning("Invalid value for num_devices: %u\n", - num_devices); - ret = -EINVAL; - goto out; - } - - zram_major = register_blkdev(0, "zram"); - if (zram_major <= 0) { - pr_warning("Unable to get major number\n"); - ret = -EBUSY; - goto out; - } - - if (!num_devices) { - pr_info("num_devices not specified. Using default: 1\n"); - num_devices = 1; - } - - /* Allocate the device array and initialize each one */ - pr_info("Creating %u devices ...\n", num_devices); - zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL); - if (!zram_devices) { - ret = -ENOMEM; - goto unregister; - } - - for (dev_id = 0; dev_id < num_devices; dev_id++) { - ret = create_device(&zram_devices[dev_id], dev_id); - if (ret) - goto free_devices; - } - - return 0; - -free_devices: - while (dev_id) - destroy_device(&zram_devices[--dev_id]); - kfree(zram_devices); -unregister: - unregister_blkdev(zram_major, "zram"); -out: - return ret; -} - -static void __exit zram_exit(void) -{ - int i; - struct zram *zram; - - for (i = 0; i < num_devices; i++) { - zram = &zram_devices[i]; - - destroy_device(zram); - if (zram->init_done) - zram_reset_device(zram); - } - - unregister_blkdev(zram_major, "zram"); - - kfree(zram_devices); - pr_debug("Cleanup done!\n"); -} - -module_param(num_devices, uint, 0); -MODULE_PARM_DESC(num_devices, "Number of zram devices"); - -module_init(zram_init); -module_exit(zram_exit); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); -MODULE_DESCRIPTION("Compressed RAM Block Device"); diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h deleted file mode 100644 index 3ad9486..0000000 --- a/drivers/staging/zram/zram_drv.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Compressed RAM block device - * - * Copyright (C) 2008, 2009, 2010 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the licence that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - * - * Project home: http://compcache.googlecode.com - */ - -#ifndef _ZRAM_DRV_H_ -#define _ZRAM_DRV_H_ - -#include <linux/spinlock.h> -#include <linux/mutex.h> - -#include "xvmalloc.h" - -/* - * Some arbitrary value. This is just to catch - * invalid value for num_devices module parameter. - */ -static const unsigned max_num_devices = 32; - -/* - * Stored at beginning of each compressed object. - * - * It stores back-reference to table entry which points to this - * object. This is required to support memory defragmentation. - */ -struct zobj_header { -#if 0 - u32 table_idx; -#endif -}; - -/*-- Configurable parameters */ - -/* Default zram disk size: 25% of total RAM */ -static const unsigned default_disksize_perc_ram = 25; - -/* - * Pages that compress to size greater than this are stored - * uncompressed in memory. - */ -static const unsigned max_zpage_size = PAGE_SIZE / 4 * 3; - -/* - * NOTE: max_zpage_size must be less than or equal to: - * XV_MAX_ALLOC_SIZE - sizeof(struct zobj_header) - * otherwise, xv_malloc() would always return failure. - */ - -/*-- End of configurable params */ - -#define SECTOR_SHIFT 9 -#define SECTOR_SIZE (1 << SECTOR_SHIFT) -#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) -#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) -#define ZRAM_LOGICAL_BLOCK_SIZE 4096 - -/* Flags for zram pages (table[page_no].flags) */ -enum zram_pageflags { - /* Page is stored uncompressed */ - ZRAM_UNCOMPRESSED, - - /* Page consists entirely of zeros */ - ZRAM_ZERO, - - __NR_ZRAM_PAGEFLAGS, -}; - -/*-- Data structures */ - -/* Allocated for each disk page */ -struct table { - struct page *page; - u16 offset; - u8 count; /* object ref count (not yet used) */ - u8 flags; -} __attribute__((aligned(4))); - -struct zram_stats { - u64 compr_size; /* compressed size of pages stored */ - u64 num_reads; /* failed + successful */ - u64 num_writes; /* --do-- */ - u64 failed_reads; /* should NEVER! happen */ - u64 failed_writes; /* can happen when memory is too low */ - u64 invalid_io; /* non-page-aligned I/O requests */ - u64 notify_free; /* no. of swap slot free notifications */ - u32 pages_zero; /* no. of zero filled pages */ - u32 pages_stored; /* no. of pages currently stored */ - u32 good_compress; /* % of pages with compression ratio<=50% */ - u32 pages_expand; /* % of incompressible pages */ -}; - -struct zram { - struct xv_pool *mem_pool; - void *compress_workmem; - void *compress_buffer; - struct table *table; - spinlock_t stat64_lock; /* protect 64-bit stats */ - struct mutex lock; /* protect compression buffers against - * concurrent writes */ - struct request_queue *queue; - struct gendisk *disk; - int init_done; - /* Prevent concurrent execution of device init and reset */ - struct mutex init_lock; - /* - * This is the limit on amount of *uncompressed* worth of data - * we can store in a disk. - */ - u64 disksize; /* bytes */ - - struct zram_stats stats; -}; - -extern struct zram *zram_devices; -extern unsigned int num_devices; -#ifdef CONFIG_SYSFS -extern struct attribute_group zram_disk_attr_group; -#endif - -extern int zram_init_device(struct zram *zram); -extern void zram_reset_device(struct zram *zram); - -#endif diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c deleted file mode 100644 index 8a23554..0000000 --- a/drivers/staging/zram/zram_sysfs.c +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Compressed RAM block device - * - * Copyright (C) 2008, 2009, 2010 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the licence that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - * - * Project home: http://compcache.googlecode.com/ - */ - -#include <linux/device.h> -#include <linux/genhd.h> -#include <linux/mm.h> - -#include "zram_drv.h" - -static u64 zram_stat64_read(struct zram *zram, u64 *v) -{ - u64 val; - - spin_lock(&zram->stat64_lock); - val = *v; - spin_unlock(&zram->stat64_lock); - - return val; -} - -static struct zram *dev_to_zram(struct device *dev) -{ - int i; - struct zram *zram = NULL; - - for (i = 0; i < num_devices; i++) { - zram = &zram_devices[i]; - if (disk_to_dev(zram->disk) == dev) - break; - } - - return zram; -} - -static ssize_t disksize_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", zram->disksize); -} - -static ssize_t disksize_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - int ret; - struct zram *zram = dev_to_zram(dev); - - if (zram->init_done) { - pr_info("Cannot change disksize for initialized device\n"); - return -EBUSY; - } - - ret = strict_strtoull(buf, 10, &zram->disksize); - if (ret) - return ret; - - zram->disksize = PAGE_ALIGN(zram->disksize); - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - - return len; -} - -static ssize_t initstate_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%u\n", zram->init_done); -} - -#ifdef CONFIG_ZRAM_FOR_ANDROID -extern int swapon(const char*specialfile, int swap_flags); - -static ssize_t initstate_store(struct device *dev, - struct device_attribute *attr, const char *buf, - size_t len) -{ - int ret; - unsigned long do_init; - struct zram *zram = dev_to_zram(dev); - - if (zram->init_done) { - pr_info("the device is initialized device\n"); - return -EBUSY; - } - - ret = strict_strtoul(buf, 10, &do_init); - if (ret) - return ret; - if (!do_init) - return -EINVAL; - - zram_init_device(zram); - swapon("/dev/block/zram0", 0); - return len; -} -#else -static inline ssize_t initstate_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) -{ - return 0; -} -#endif /* CONFIG_ZRAM_FOR_ANDROID */ - - -static ssize_t reset_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - int ret; - unsigned long do_reset; - struct zram *zram; - struct block_device *bdev; - - zram = dev_to_zram(dev); - bdev = bdget_disk(zram->disk, 0); - - /* Do not reset an active device! */ - if (bdev->bd_holders) - return -EBUSY; - - ret = strict_strtoul(buf, 10, &do_reset); - if (ret) - return ret; - - if (!do_reset) - return -EINVAL; - - /* Make sure all pending I/O is finished */ - if (bdev) - fsync_bdev(bdev); - - if (zram->init_done) - zram_reset_device(zram); - - return len; -} - -static ssize_t num_reads_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.num_reads)); -} - -static ssize_t num_writes_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.num_writes)); -} - -static ssize_t invalid_io_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.invalid_io)); -} - -static ssize_t notify_free_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.notify_free)); -} - -static ssize_t zero_pages_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%u\n", zram->stats.pages_zero); -} - -static ssize_t orig_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)(zram->stats.pages_stored) << PAGE_SHIFT); -} - -static ssize_t compr_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.compr_size)); -} - -static ssize_t mem_used_total_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val = 0; - struct zram *zram = dev_to_zram(dev); - - if (zram->init_done) { - val = xv_get_total_size_bytes(zram->mem_pool) + - ((u64)(zram->stats.pages_expand) << PAGE_SHIFT); - } - - return sprintf(buf, "%llu\n", val); -} - -static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, - disksize_show, disksize_store); -static DEVICE_ATTR(initstate, S_IRUGO | S_IWUSR, initstate_show, initstate_store); -static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); -static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); -static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); -static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); -static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); -static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); -static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); -static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); -static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); - -static struct attribute *zram_disk_attrs[] = { - &dev_attr_disksize.attr, - &dev_attr_initstate.attr, - &dev_attr_reset.attr, - &dev_attr_num_reads.attr, - &dev_attr_num_writes.attr, - &dev_attr_invalid_io.attr, - &dev_attr_notify_free.attr, - &dev_attr_zero_pages.attr, - &dev_attr_orig_data_size.attr, - &dev_attr_compr_data_size.attr, - &dev_attr_mem_used_total.attr, - NULL, -}; - -struct attribute_group zram_disk_attr_group = { - .attrs = zram_disk_attrs, -}; |