aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cpufreq
diff options
context:
space:
mode:
authorcodeworkx <daniel.hillenbrand@codeworkx.de>2012-06-02 13:09:29 +0200
committercodeworkx <daniel.hillenbrand@codeworkx.de>2012-06-02 13:09:29 +0200
commitc6da2cfeb05178a11c6d062a06f8078150ee492f (patch)
treef3b4021d252c52d6463a9b3c1bb7245e399b009c /drivers/cpufreq
parentc6d7c4dbff353eac7919342ae6b3299a378160a6 (diff)
downloadkernel_samsung_smdk4412-c6da2cfeb05178a11c6d062a06f8078150ee492f.zip
kernel_samsung_smdk4412-c6da2cfeb05178a11c6d062a06f8078150ee492f.tar.gz
kernel_samsung_smdk4412-c6da2cfeb05178a11c6d062a06f8078150ee492f.tar.bz2
samsung update 1
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r--drivers/cpufreq/Kconfig95
-rw-r--r--drivers/cpufreq/Makefile5
-rw-r--r--drivers/cpufreq/cpufreq.c2
-rw-r--r--drivers/cpufreq/cpufreq_adaptive.c952
-rw-r--r--drivers/cpufreq/cpufreq_interactive.c707
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c462
-rw-r--r--drivers/cpufreq/cpufreq_pegasusq.c1411
-rw-r--r--drivers/cpufreq/cpufreq_stats.c25
-rw-r--r--drivers/cpufreq/dvfs_monitor.c236
9 files changed, 3886 insertions, 9 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 9fb8485..8e089bd 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -99,6 +99,30 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
Be aware that not all cpufreq drivers support the conservative
governor. If unsure have a look at the help section of the
driver. Fallback governor will be the performance governor.
+
+config CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+ bool "interactive"
+ select CPU_FREQ_GOV_INTERACTIVE
+ help
+ Use the CPUFreq governor 'interactive' as default. This allows
+ you to get a full dynamic cpu frequency capable system by simply
+ loading your cpufreq low-level hardware driver, using the
+ 'interactive' governor for latency-sensitive workloads.
+
+config CPU_FREQ_DEFAULT_GOV_ADAPTIVE
+ bool "adaptive"
+ select CPU_FREQ_GOV_ADAPTIVE
+ help
+ Use the CPUFreq governor 'adaptive' as default. This allows
+ you to get a full dynamic cpu frequency capable system by simply
+ loading your cpufreq low-level hardware driver, using the
+ 'adaptive' governor for latency-sensitive workloads and demanding
+ performance.
+
+config CPU_FREQ_DEFAULT_GOV_PEGASUSQ
+ bool "pegasusq"
+ select CPU_FREQ_GOV_PEGASUSQ
+
endchoice
config CPU_FREQ_GOV_PERFORMANCE
@@ -156,6 +180,45 @@ config CPU_FREQ_GOV_ONDEMAND
If in doubt, say N.
+config CPU_FREQ_GOV_ONDEMAND_FLEXRATE
+ bool "flexrate interface for 'ondemand' cpufreq policy governor"
+ depends on CPU_FREQ_GOV_ONDEMAND
+ help
+ Flexrate for 'ondemand' governor provides an interface to request
+ faster polling temporarily. This is to let it react quickly to
+ load changes when there is high probablity of load increase
+ in short time. For example, when a user event occurs, we have
+ use this interface. It does not increase the frequency
+ unconditionally; however, it allows ondemand to react fast
+ by temporarily decreasing sampling rate. Flexrate provides both
+ sysfs interface and in-kernel interface.
+
+config CPU_FREQ_GOV_ONDEMAND_FLEXRATE_MAX_DURATION
+ int "flexrate's maximum duration of sampling rate override"
+ range 5 500
+ depends on CPU_FREQ_GOV_ONDEMAND_FLEXRATE
+ default "100"
+ help
+ The maximum number of ondemand sampling whose rate is
+ overriden by Flexrate for ondemand.
+
+config CPU_FREQ_GOV_INTERACTIVE
+ tristate "'interactive' cpufreq policy governor"
+ help
+ 'interactive' - This driver adds a dynamic cpufreq policy governor
+ designed for latency-sensitive workloads.
+
+ This governor attempts to reduce the latency of clock
+ increases so that the system is more responsive to
+ interactive workloads.
+
+ To compile this driver as a module, choose M here: the
+ module will be called cpufreq_interactive.
+
+ For details, take a look at linux/Documentation/cpu-freq.
+
+ If in doubt, say N.
+
config CPU_FREQ_GOV_CONSERVATIVE
tristate "'conservative' cpufreq governor"
depends on CPU_FREQ
@@ -179,6 +242,38 @@ config CPU_FREQ_GOV_CONSERVATIVE
If in doubt, say N.
+config CPU_FREQ_GOV_ADAPTIVE
+ tristate "'adaptive' cpufreq policy governor"
+ help
+ 'adaptive' - This driver adds a dynamic cpufreq policy governor
+ designed for latency-sensitive workloads and also for demanding
+ performance.
+
+ This governor attempts to reduce the latency of clock
+ increases so that the system is more responsive to
+ interactive workloads in loweset steady-state but to
+ to reduce power consumption in middle operation level level up
+ will be done in step by step to prohibit system from going to
+ max operation level.
+
+ To compile this driver as a module, choose M here: the
+ module will be called cpufreq_adaptive.
+
+ For details, take a look at linux/Documentation/cpu-freq.
+
+ If in doubt, say N.
+
+config CPU_FREQ_GOV_PEGASUSQ
+ tristate "'pegasusq' cpufreq policy governor"
+
+config CPU_FREQ_DVFS_MONITOR
+ bool "dvfs monitor"
+ depends on CPU_FREQ
+ help
+ This option adds a proc node for dvfs monitoring.
+ /proc/dvfs_mon
+
+
menu "x86 CPU frequency scaling drivers"
depends on X86
source "drivers/cpufreq/Kconfig.x86"
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index e2fc2d2..ed91c0d 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -9,10 +9,15 @@ obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o
obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o
obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o
obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o
+obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE) += cpufreq_interactive.o
+obj-$(CONFIG_CPU_FREQ_GOV_ADAPTIVE) += cpufreq_adaptive.o
+obj-$(CONFIG_CPU_FREQ_GOV_PEGASUSQ) += cpufreq_pegasusq.o
# CPUfreq cross-arch helpers
obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o
+obj-$(CONFIG_CPU_FREQ_DVFS_MONITOR) += dvfs_monitor.o
+
##################################################################################d
# x86 drivers.
# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 0a5bea9..9785cf7 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -189,7 +189,7 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
* systems as each CPU might be scaled differently. So, use the arch
* per-CPU loops_per_jiffy value wherever possible.
*/
-#ifndef CONFIG_SMP
+#if !defined CONFIG_SMP || defined(CONFIG_ARCH_EXYNOS4) || defined(CONFIG_ARCH_EXYNOS5)
static unsigned long l_p_j_ref;
static unsigned int l_p_j_ref_freq;
diff --git a/drivers/cpufreq/cpufreq_adaptive.c b/drivers/cpufreq/cpufreq_adaptive.c
new file mode 100644
index 0000000..ad7f7de
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_adaptive.c
@@ -0,0 +1,952 @@
+/*
+ * drivers/cpufreq/cpufreq_adaptive.c
+ *
+ * Copyright (C) 2001 Russell King
+ * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
+ * Jun Nakajima <jun.nakajima@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/cpu.h>
+#include <linux/jiffies.h>
+#include <linux/kernel_stat.h>
+#include <linux/mutex.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
+#include <linux/kthread.h>
+
+#include <mach/ppmu.h>
+
+/*
+ * dbs is used in this file as a shortform for demandbased switching
+ * It helps to keep variable names smaller, simpler
+ */
+
+#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10)
+#define DEF_FREQUENCY_UP_THRESHOLD (80)
+#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3)
+#define MICRO_FREQUENCY_UP_THRESHOLD (95)
+#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
+#define MIN_FREQUENCY_UP_THRESHOLD (11)
+#define MAX_FREQUENCY_UP_THRESHOLD (100)
+#define MIN_ONDEMAND_THRESHOLD (4)
+/*
+ * The polling frequency of this governor depends on the capability of
+ * the processor. Default polling frequency is 1000 times the transition
+ * latency of the processor. The governor will work on any processor with
+ * transition latency <= 10mS, using appropriate sampling
+ * rate.
+ * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
+ * this governor will not work.
+ * All times here are in uS.
+ */
+#define MIN_SAMPLING_RATE_RATIO (2)
+
+static unsigned int min_sampling_rate;
+
+#define LATENCY_MULTIPLIER (1000)
+#define MIN_LATENCY_MULTIPLIER (100)
+#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)
+
+static void (*pm_idle_old)(void);
+static void do_dbs_timer(struct work_struct *work);
+static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
+ unsigned int event);
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ADAPTIVE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_adaptive = {
+ .name = "adaptive",
+ .governor = cpufreq_governor_dbs,
+ .max_transition_latency = TRANSITION_LATENCY_LIMIT,
+ .owner = THIS_MODULE,
+};
+
+/* Sampling types */
+enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
+
+struct cpu_dbs_info_s {
+ cputime64_t prev_cpu_idle;
+ cputime64_t prev_cpu_iowait;
+ cputime64_t prev_cpu_wall;
+ cputime64_t prev_cpu_nice;
+ struct cpufreq_policy *cur_policy;
+ struct delayed_work work;
+ struct cpufreq_frequency_table *freq_table;
+ unsigned int freq_hi_jiffies;
+ int cpu;
+ unsigned int sample_type:1;
+ bool ondemand;
+ /*
+ * percpu mutex that serializes governor limit change with
+ * do_dbs_timer invocation. We do not want do_dbs_timer to run
+ * when user is changing the governor or limits.
+ */
+ struct mutex timer_mutex;
+};
+static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
+
+static unsigned int dbs_enable; /* number of CPUs using this policy */
+
+/*
+ * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on
+ * different CPUs. It protects dbs_enable in governor start/stop.
+ */
+static DEFINE_MUTEX(dbs_mutex);
+static struct task_struct *up_task;
+static struct workqueue_struct *down_wq;
+static struct work_struct freq_scale_down_work;
+static cpumask_t up_cpumask;
+static spinlock_t up_cpumask_lock;
+static cpumask_t down_cpumask;
+static spinlock_t down_cpumask_lock;
+
+static DEFINE_PER_CPU(cputime64_t, idle_in_idle);
+static DEFINE_PER_CPU(cputime64_t, idle_exit_wall);
+
+static struct timer_list cpu_timer;
+static unsigned int target_freq;
+static DEFINE_MUTEX(short_timer_mutex);
+
+/* Go to max speed when CPU load at or above this value. */
+#define DEFAULT_GO_MAXSPEED_LOAD 60
+static unsigned long go_maxspeed_load;
+
+#define DEFAULT_KEEP_MINSPEED_LOAD 30
+static unsigned long keep_minspeed_load;
+
+#define DEFAULT_STEPUP_LOAD 10
+static unsigned long step_up_load;
+
+static struct dbs_tuners {
+ unsigned int sampling_rate;
+ unsigned int up_threshold;
+ unsigned int down_differential;
+ unsigned int ignore_nice;
+ unsigned int io_is_busy;
+} dbs_tuners_ins = {
+ .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
+ .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
+ .ignore_nice = 0,
+};
+
+static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall)
+{
+ u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
+
+ if (iowait_time == -1ULL)
+ return 0;
+
+ return iowait_time;
+}
+
+static void adaptive_init_cpu(int cpu)
+{
+ struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
+ dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
+}
+
+/************************** sysfs interface ************************/
+
+static ssize_t show_sampling_rate_max(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ printk_once(KERN_INFO "CPUFREQ: adaptive sampling_rate_max "
+ "sysfs file is deprecated - used by: %s\n", current->comm);
+ return sprintf(buf, "%u\n", -1U);
+}
+
+static ssize_t show_sampling_rate_min(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", min_sampling_rate);
+}
+
+define_one_global_ro(sampling_rate_max);
+define_one_global_ro(sampling_rate_min);
+
+/* cpufreq_adaptive Governor Tunables */
+#define show_one(file_name, object) \
+static ssize_t show_##file_name \
+(struct kobject *kobj, struct attribute *attr, char *buf) \
+{ \
+ return sprintf(buf, "%u\n", dbs_tuners_ins.object); \
+}
+show_one(sampling_rate, sampling_rate);
+show_one(io_is_busy, io_is_busy);
+show_one(up_threshold, up_threshold);
+show_one(ignore_nice_load, ignore_nice);
+
+/*** delete after deprecation time ***/
+
+#define DEPRECATION_MSG(file_name) \
+ printk_once(KERN_INFO "CPUFREQ: Per core adaptive sysfs " \
+ "interface is deprecated - " #file_name "\n");
+
+#define show_one_old(file_name) \
+static ssize_t show_##file_name##_old \
+(struct cpufreq_policy *unused, char *buf) \
+{ \
+ printk_once(KERN_INFO "CPUFREQ: Per core adaptive sysfs " \
+ "interface is deprecated - " #file_name "\n"); \
+ return show_##file_name(NULL, NULL, buf); \
+}
+
+/*** delete after deprecation time ***/
+
+static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+
+ mutex_lock(&dbs_mutex);
+ dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
+ mutex_unlock(&dbs_mutex);
+
+ return count;
+}
+
+static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+
+ mutex_lock(&dbs_mutex);
+ dbs_tuners_ins.io_is_busy = !!input;
+ mutex_unlock(&dbs_mutex);
+
+ return count;
+}
+
+static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+
+ if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
+ input < MIN_FREQUENCY_UP_THRESHOLD) {
+ return -EINVAL;
+ }
+
+ mutex_lock(&dbs_mutex);
+ dbs_tuners_ins.up_threshold = input;
+ mutex_unlock(&dbs_mutex);
+
+ return count;
+}
+
+static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+
+ unsigned int j;
+
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+
+ if (input > 1)
+ input = 1;
+
+ mutex_lock(&dbs_mutex);
+ if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
+ mutex_unlock(&dbs_mutex);
+ return count;
+ }
+ dbs_tuners_ins.ignore_nice = input;
+
+ /* we need to re-evaluate prev_cpu_idle */
+ for_each_online_cpu(j) {
+ struct cpu_dbs_info_s *dbs_info;
+ dbs_info = &per_cpu(od_cpu_dbs_info, j);
+ dbs_info->prev_cpu_idle = get_cpu_idle_time_us(j,
+ &dbs_info->prev_cpu_wall);
+ if (dbs_tuners_ins.ignore_nice)
+ dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+
+ }
+ mutex_unlock(&dbs_mutex);
+
+ return count;
+}
+
+define_one_global_rw(sampling_rate);
+define_one_global_rw(io_is_busy);
+define_one_global_rw(up_threshold);
+define_one_global_rw(ignore_nice_load);
+
+static struct attribute *dbs_attributes[] = {
+ &sampling_rate_max.attr,
+ &sampling_rate_min.attr,
+ &sampling_rate.attr,
+ &up_threshold.attr,
+ &ignore_nice_load.attr,
+ &io_is_busy.attr,
+ NULL
+};
+
+static struct attribute_group dbs_attr_group = {
+ .attrs = dbs_attributes,
+ .name = "adaptive",
+};
+
+/*** delete after deprecation time ***/
+
+#define write_one_old(file_name) \
+static ssize_t store_##file_name##_old \
+(struct cpufreq_policy *unused, const char *buf, size_t count) \
+{ \
+ printk_once(KERN_INFO "CPUFREQ: Per core adaptive sysfs " \
+ "interface is deprecated - " #file_name "\n"); \
+ return store_##file_name(NULL, NULL, buf, count); \
+}
+
+static void cpufreq_adaptive_timer(unsigned long data)
+{
+ cputime64_t cur_idle;
+ cputime64_t cur_wall;
+ unsigned int delta_idle;
+ unsigned int delta_time;
+ int short_load;
+ unsigned int new_freq;
+ unsigned long flags;
+ struct cpu_dbs_info_s *this_dbs_info;
+ struct cpufreq_policy *policy;
+ unsigned int j;
+ unsigned int index;
+ unsigned int max_load = 0;
+
+ this_dbs_info = &per_cpu(od_cpu_dbs_info, 0);
+
+ policy = this_dbs_info->cur_policy;
+
+ for_each_online_cpu(j) {
+ cur_idle = get_cpu_idle_time_us(j, &cur_wall);
+
+ delta_idle = (unsigned int) cputime64_sub(cur_idle,
+ per_cpu(idle_in_idle, j));
+ delta_time = (unsigned int) cputime64_sub(cur_wall,
+ per_cpu(idle_exit_wall, j));
+
+ /*
+ * If timer ran less than 1ms after short-term sample started, retry.
+ */
+ if (delta_time < 1000)
+ goto do_nothing;
+
+ if (delta_idle > delta_time)
+ short_load = 0;
+ else
+ short_load = 100 * (delta_time - delta_idle) / delta_time;
+
+ if (short_load > max_load)
+ max_load = short_load;
+ }
+
+ if (this_dbs_info->ondemand)
+ goto do_nothing;
+
+ if (max_load >= go_maxspeed_load)
+ new_freq = policy->max;
+ else
+ new_freq = policy->max * max_load / 100;
+
+ if ((max_load <= keep_minspeed_load) &&
+ (policy->cur == policy->min))
+ new_freq = policy->cur;
+
+ if (cpufreq_frequency_table_target(policy, this_dbs_info->freq_table,
+ new_freq, CPUFREQ_RELATION_L,
+ &index)) {
+ goto do_nothing;
+ }
+
+ new_freq = this_dbs_info->freq_table[index].frequency;
+
+ target_freq = new_freq;
+
+ if (new_freq < this_dbs_info->cur_policy->cur) {
+ spin_lock_irqsave(&down_cpumask_lock, flags);
+ cpumask_set_cpu(0, &down_cpumask);
+ spin_unlock_irqrestore(&down_cpumask_lock, flags);
+ queue_work(down_wq, &freq_scale_down_work);
+ } else {
+ spin_lock_irqsave(&up_cpumask_lock, flags);
+ cpumask_set_cpu(0, &up_cpumask);
+ spin_unlock_irqrestore(&up_cpumask_lock, flags);
+ wake_up_process(up_task);
+ }
+
+ return;
+
+do_nothing:
+ for_each_online_cpu(j) {
+ per_cpu(idle_in_idle, j) =
+ get_cpu_idle_time_us(j,
+ &per_cpu(idle_exit_wall, j));
+ }
+ mod_timer(&cpu_timer, jiffies + 2);
+ schedule_delayed_work_on(0, &this_dbs_info->work, 10);
+
+ if (mutex_is_locked(&short_timer_mutex))
+ mutex_unlock(&short_timer_mutex);
+ return;
+}
+
+/*** delete after deprecation time ***/
+
+/************************** sysfs end ************************/
+
+static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
+{
+#ifndef CONFIG_ARCH_EXYNOS4
+ if (p->cur == p->max)
+ return;
+#endif
+ __cpufreq_driver_target(p, freq, CPUFREQ_RELATION_H);
+}
+
+static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
+{
+ unsigned int max_load_freq;
+
+ struct cpufreq_policy *policy;
+ unsigned int j;
+
+ unsigned int index, new_freq;
+ unsigned int longterm_load = 0;
+
+ policy = this_dbs_info->cur_policy;
+
+ /*
+ * Every sampling_rate, we check, if current idle time is less
+ * than 20% (default), then we try to increase frequency
+ * Every sampling_rate, we look for a the lowest
+ * frequency which can sustain the load while keeping idle time over
+ * 30%. If such a frequency exist, we try to decrease to this frequency.
+ *
+ * Any frequency increase takes it to the maximum frequency.
+ * Frequency reduction happens at minimum steps of
+ * 5% (default) of current frequency
+ */
+
+ /* Get Absolute Load - in terms of freq */
+ max_load_freq = 0;
+
+ for_each_cpu(j, policy->cpus) {
+ struct cpu_dbs_info_s *j_dbs_info;
+ cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
+ unsigned int idle_time, wall_time, iowait_time;
+ unsigned int load, load_freq;
+ int freq_avg;
+
+ j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
+
+ cur_idle_time = get_cpu_idle_time_us(j, &cur_wall_time);
+ cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
+
+ wall_time = (unsigned int) cputime64_sub(cur_wall_time,
+ j_dbs_info->prev_cpu_wall);
+ j_dbs_info->prev_cpu_wall = cur_wall_time;
+
+ idle_time = (unsigned int) cputime64_sub(cur_idle_time,
+ j_dbs_info->prev_cpu_idle);
+ j_dbs_info->prev_cpu_idle = cur_idle_time;
+
+ iowait_time = (unsigned int) cputime64_sub(cur_iowait_time,
+ j_dbs_info->prev_cpu_iowait);
+ j_dbs_info->prev_cpu_iowait = cur_iowait_time;
+
+ if (dbs_tuners_ins.ignore_nice) {
+ cputime64_t cur_nice;
+ unsigned long cur_nice_jiffies;
+
+ cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
+ j_dbs_info->prev_cpu_nice);
+ /*
+ * Assumption: nice time between sampling periods will
+ * be less than 2^32 jiffies for 32 bit sys
+ */
+ cur_nice_jiffies = (unsigned long)
+ cputime64_to_jiffies64(cur_nice);
+
+ j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+ idle_time += jiffies_to_usecs(cur_nice_jiffies);
+ }
+
+ /*
+ * For the purpose of adaptive, waiting for disk IO is an
+ * indication that you're performance critical, and not that
+ * the system is actually idle. So subtract the iowait time
+ * from the cpu idle time.
+ */
+
+ if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time)
+ idle_time -= iowait_time;
+
+ if (unlikely(!wall_time || wall_time < idle_time))
+ continue;
+
+ load = 100 * (wall_time - idle_time) / wall_time;
+
+ if (load > longterm_load)
+ longterm_load = load;
+
+ freq_avg = __cpufreq_driver_getavg(policy, j);
+ if (freq_avg <= 0)
+ freq_avg = policy->cur;
+
+ load_freq = load * freq_avg;
+
+ if (load_freq > max_load_freq)
+ max_load_freq = load_freq;
+ }
+
+ if (longterm_load >= MIN_ONDEMAND_THRESHOLD)
+ this_dbs_info->ondemand = true;
+ else
+ this_dbs_info->ondemand = false;
+
+ /* Check for frequency increase */
+ if (max_load_freq > (dbs_tuners_ins.up_threshold * policy->cur)) {
+ cpufreq_frequency_table_target(policy,
+ this_dbs_info->freq_table,
+ (policy->cur + step_up_load),
+ CPUFREQ_RELATION_L, &index);
+
+ new_freq = this_dbs_info->freq_table[index].frequency;
+ dbs_freq_increase(policy, new_freq);
+ return;
+ }
+
+ /* Check for frequency decrease */
+ /* if we cannot reduce the frequency anymore, break out early */
+#ifndef CONFIG_ARCH_EXYNOS4
+ if (policy->cur == policy->min)
+ return;
+#endif
+ /*
+ * The optimal frequency is the frequency that is the lowest that
+ * can support the current CPU usage without triggering the up
+ * policy. To be safe, we focus 10 points under the threshold.
+ */
+ if (max_load_freq <
+ (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
+ policy->cur) {
+ unsigned int freq_next;
+ freq_next = max_load_freq /
+ (dbs_tuners_ins.up_threshold -
+ dbs_tuners_ins.down_differential);
+
+ if (freq_next < policy->min)
+ freq_next = policy->min;
+
+ __cpufreq_driver_target(policy, freq_next,
+ CPUFREQ_RELATION_L);
+ }
+}
+
+static void do_dbs_timer(struct work_struct *work)
+{
+ struct cpu_dbs_info_s *dbs_info =
+ container_of(work, struct cpu_dbs_info_s, work.work);
+ unsigned int cpu = dbs_info->cpu;
+
+ int delay;
+
+ mutex_lock(&dbs_info->timer_mutex);
+
+ /* Common NORMAL_SAMPLE setup */
+ dbs_info->sample_type = DBS_NORMAL_SAMPLE;
+ dbs_check_cpu(dbs_info);
+
+ /* We want all CPUs to do sampling nearly on
+ * same jiffy
+ */
+ delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+
+ schedule_delayed_work_on(cpu, &dbs_info->work, delay);
+
+ mutex_unlock(&dbs_info->timer_mutex);
+}
+
+static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
+{
+ /* We want all CPUs to do sampling nearly on same jiffy */
+ int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+
+ dbs_info->sample_type = DBS_NORMAL_SAMPLE;
+ INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
+ schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
+}
+
+static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
+{
+ cancel_delayed_work_sync(&dbs_info->work);
+}
+
+/*
+ * Not all CPUs want IO time to be accounted as busy; this dependson how
+ * efficient idling at a higher frequency/voltage is.
+ * Pavel Machek says this is not so for various generations of AMD and old
+ * Intel systems.
+ * Mike Chan (androidlcom) calis this is also not true for ARM.
+ * Because of this, whitelist specific known (series) of CPUs by default, and
+ * leave all others up to the user.
+ */
+static int should_io_be_busy(void)
+{
+#if defined(CONFIG_X86)
+ /*
+ * For Intel, Core 2 (model 15) andl later have an efficient idle.
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86 == 6 &&
+ boot_cpu_data.x86_model >= 15)
+ return 1;
+#endif
+ return 0;
+}
+
+static void cpufreq_adaptive_idle(void)
+{
+ int i;
+ struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, 0);
+ struct cpufreq_policy *policy;
+
+ policy = dbs_info->cur_policy;
+
+ pm_idle_old();
+
+ if ((policy->cur == policy->min) ||
+ (policy->cur == policy->max)) {
+
+ if (timer_pending(&cpu_timer))
+ return;
+
+ if (mutex_trylock(&short_timer_mutex)) {
+ for_each_online_cpu(i) {
+ per_cpu(idle_in_idle, i) =
+ get_cpu_idle_time_us(i,
+ &per_cpu(idle_exit_wall, i));
+ }
+
+ mod_timer(&cpu_timer, jiffies + 2);
+ cancel_delayed_work(&dbs_info->work);
+ }
+ } else {
+ if (timer_pending(&cpu_timer))
+ del_timer(&cpu_timer);
+
+ }
+}
+
+static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ unsigned int cpu = policy->cpu;
+ struct cpu_dbs_info_s *this_dbs_info;
+ unsigned int j;
+ int rc;
+
+ this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ if ((!cpu_online(cpu)) || (!policy->cur))
+ return -EINVAL;
+
+ mutex_lock(&dbs_mutex);
+
+ rc = sysfs_create_group(&policy->kobj, &dbs_attr_group);
+ if (rc) {
+ mutex_unlock(&dbs_mutex);
+ return rc;
+ }
+
+ dbs_enable++;
+ for_each_cpu(j, policy->cpus) {
+ struct cpu_dbs_info_s *j_dbs_info;
+ j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
+ j_dbs_info->cur_policy = policy;
+
+ j_dbs_info->prev_cpu_idle = get_cpu_idle_time_us(j,
+ &j_dbs_info->prev_cpu_wall);
+ if (dbs_tuners_ins.ignore_nice) {
+ j_dbs_info->prev_cpu_nice =
+ kstat_cpu(j).cpustat.nice;
+ }
+ }
+ this_dbs_info->cpu = cpu;
+ adaptive_init_cpu(cpu);
+
+ /*
+ * Start the timerschedule work, when this governor
+ * is used for first time
+ */
+ if (dbs_enable == 1) {
+ unsigned int latency;
+
+ rc = sysfs_create_group(cpufreq_global_kobject,
+ &dbs_attr_group);
+ if (rc) {
+ mutex_unlock(&dbs_mutex);
+ return rc;
+ }
+
+ /* policy latency is in nS. Convert it to uS first */
+ latency = policy->cpuinfo.transition_latency / 1000;
+ if (latency == 0)
+ latency = 1;
+ /* Bring kernel and HW constraints together */
+ min_sampling_rate = max(min_sampling_rate,
+ MIN_LATENCY_MULTIPLIER * latency);
+ dbs_tuners_ins.sampling_rate =
+ max(min_sampling_rate,
+ latency * LATENCY_MULTIPLIER);
+ dbs_tuners_ins.io_is_busy = should_io_be_busy();
+ }
+ mutex_unlock(&dbs_mutex);
+
+ mutex_init(&this_dbs_info->timer_mutex);
+ dbs_timer_init(this_dbs_info);
+
+ pm_idle_old = pm_idle;
+ pm_idle = cpufreq_adaptive_idle;
+ break;
+
+ case CPUFREQ_GOV_STOP:
+ dbs_timer_exit(this_dbs_info);
+
+ mutex_lock(&dbs_mutex);
+ sysfs_remove_group(&policy->kobj, &dbs_attr_group);
+ mutex_destroy(&this_dbs_info->timer_mutex);
+ dbs_enable--;
+ mutex_unlock(&dbs_mutex);
+ if (!dbs_enable)
+ sysfs_remove_group(cpufreq_global_kobject,
+ &dbs_attr_group);
+
+ pm_idle = pm_idle_old;
+ break;
+
+ case CPUFREQ_GOV_LIMITS:
+ mutex_lock(&this_dbs_info->timer_mutex);
+ if (policy->max < this_dbs_info->cur_policy->cur)
+ __cpufreq_driver_target(this_dbs_info->cur_policy,
+ policy->max, CPUFREQ_RELATION_H);
+ else if (policy->min > this_dbs_info->cur_policy->cur)
+ __cpufreq_driver_target(this_dbs_info->cur_policy,
+ policy->min, CPUFREQ_RELATION_L);
+ mutex_unlock(&this_dbs_info->timer_mutex);
+ break;
+ }
+ return 0;
+}
+
+static inline void cpufreq_adaptive_update_time(void)
+{
+ struct cpu_dbs_info_s *this_dbs_info;
+ struct cpufreq_policy *policy;
+ int j;
+
+ this_dbs_info = &per_cpu(od_cpu_dbs_info, 0);
+ policy = this_dbs_info->cur_policy;
+
+ for_each_cpu(j, policy->cpus) {
+ struct cpu_dbs_info_s *j_dbs_info;
+ cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
+
+ j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
+
+ cur_idle_time = get_cpu_idle_time_us(j, &cur_wall_time);
+ cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
+
+ j_dbs_info->prev_cpu_wall = cur_wall_time;
+
+ j_dbs_info->prev_cpu_idle = cur_idle_time;
+
+ j_dbs_info->prev_cpu_iowait = cur_iowait_time;
+
+ if (dbs_tuners_ins.ignore_nice)
+ j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+
+ }
+
+}
+
+static int cpufreq_adaptive_up_task(void *data)
+{
+ unsigned long flags;
+ struct cpu_dbs_info_s *this_dbs_info;
+ struct cpufreq_policy *policy;
+ int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+
+ this_dbs_info = &per_cpu(od_cpu_dbs_info, 0);
+ policy = this_dbs_info->cur_policy;
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_lock_irqsave(&up_cpumask_lock, flags);
+
+ if (cpumask_empty(&up_cpumask)) {
+ spin_unlock_irqrestore(&up_cpumask_lock, flags);
+ schedule();
+
+ if (kthread_should_stop())
+ break;
+
+ spin_lock_irqsave(&up_cpumask_lock, flags);
+ }
+
+ set_current_state(TASK_RUNNING);
+
+ cpumask_clear(&up_cpumask);
+ spin_unlock_irqrestore(&up_cpumask_lock, flags);
+
+ __cpufreq_driver_target(this_dbs_info->cur_policy,
+ target_freq,
+ CPUFREQ_RELATION_H);
+ if (policy->cur != policy->max) {
+ mutex_lock(&this_dbs_info->timer_mutex);
+
+ schedule_delayed_work_on(0, &this_dbs_info->work, delay);
+ mutex_unlock(&this_dbs_info->timer_mutex);
+ cpufreq_adaptive_update_time();
+ }
+ if (mutex_is_locked(&short_timer_mutex))
+ mutex_unlock(&short_timer_mutex);
+ }
+
+ return 0;
+}
+
+static void cpufreq_adaptive_freq_down(struct work_struct *work)
+{
+ unsigned long flags;
+ struct cpu_dbs_info_s *this_dbs_info;
+ struct cpufreq_policy *policy;
+ int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+
+ spin_lock_irqsave(&down_cpumask_lock, flags);
+ cpumask_clear(&down_cpumask);
+ spin_unlock_irqrestore(&down_cpumask_lock, flags);
+
+ this_dbs_info = &per_cpu(od_cpu_dbs_info, 0);
+ policy = this_dbs_info->cur_policy;
+
+ __cpufreq_driver_target(this_dbs_info->cur_policy,
+ target_freq,
+ CPUFREQ_RELATION_H);
+
+ if (policy->cur != policy->min) {
+ mutex_lock(&this_dbs_info->timer_mutex);
+
+ schedule_delayed_work_on(0, &this_dbs_info->work, delay);
+ mutex_unlock(&this_dbs_info->timer_mutex);
+ cpufreq_adaptive_update_time();
+ }
+
+ if (mutex_is_locked(&short_timer_mutex))
+ mutex_unlock(&short_timer_mutex);
+}
+
+static int __init cpufreq_gov_dbs_init(void)
+{
+ cputime64_t wall;
+ u64 idle_time;
+ int cpu = get_cpu();
+
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+ go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD;
+ keep_minspeed_load = DEFAULT_KEEP_MINSPEED_LOAD;
+ step_up_load = DEFAULT_STEPUP_LOAD;
+
+ idle_time = get_cpu_idle_time_us(cpu, &wall);
+ put_cpu();
+ if (idle_time != -1ULL) {
+ /* Idle micro accounting is supported. Use finer thresholds */
+ dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
+ dbs_tuners_ins.down_differential =
+ MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
+ /*
+ * In no_hz/micro accounting case we set the minimum frequency
+ * not depending on HZ, but fixed (very low). The deferred
+ * timer might skip some samples if idle/sleeping as needed.
+ */
+ min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
+ } else {
+ /* For correct statistics, we need 10 ticks for each measure */
+ min_sampling_rate =
+ MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
+ }
+
+ init_timer(&cpu_timer);
+ cpu_timer.function = cpufreq_adaptive_timer;
+
+ up_task = kthread_create(cpufreq_adaptive_up_task, NULL,
+ "kadaptiveup");
+
+ if (IS_ERR(up_task))
+ return PTR_ERR(up_task);
+
+ sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
+ get_task_struct(up_task);
+
+ /* No rescuer thread, bind to CPU queuing the work for possibly
+ warm cache (probably doesn't matter much). */
+ down_wq = alloc_workqueue("kadaptive_down", 0, 1);
+
+ if (!down_wq)
+ goto err_freeuptask;
+
+ INIT_WORK(&freq_scale_down_work, cpufreq_adaptive_freq_down);
+
+
+ return cpufreq_register_governor(&cpufreq_gov_adaptive);
+err_freeuptask:
+ put_task_struct(up_task);
+ return -ENOMEM;
+}
+
+static void __exit cpufreq_gov_dbs_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_adaptive);
+}
+
+
+MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
+MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
+MODULE_DESCRIPTION("'cpufreq_adaptive' - A dynamic cpufreq governor for "
+ "Low Latency Frequency Transition capable processors");
+MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ADAPTIVE
+fs_initcall(cpufreq_gov_dbs_init);
+#else
+module_init(cpufreq_gov_dbs_init);
+#endif
+module_exit(cpufreq_gov_dbs_exit);
diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c
new file mode 100644
index 0000000..45266d5
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -0,0 +1,707 @@
+/*
+ * drivers/cpufreq/cpufreq_interactive.c
+ *
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: Mike Chan (mike@android.com)
+ *
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/tick.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+
+#include <asm/cputime.h>
+
+static atomic_t active_count = ATOMIC_INIT(0);
+
+struct cpufreq_interactive_cpuinfo {
+ struct timer_list cpu_timer;
+ int timer_idlecancel;
+ u64 time_in_idle;
+ u64 idle_exit_time;
+ u64 timer_run_time;
+ int idling;
+ u64 freq_change_time;
+ u64 freq_change_time_in_idle;
+ struct cpufreq_policy *policy;
+ struct cpufreq_frequency_table *freq_table;
+ unsigned int target_freq;
+ int governor_enabled;
+};
+
+static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
+
+/* Workqueues handle frequency scaling */
+static struct task_struct *up_task;
+static struct workqueue_struct *down_wq;
+static struct work_struct freq_scale_down_work;
+static cpumask_t up_cpumask;
+static spinlock_t up_cpumask_lock;
+static cpumask_t down_cpumask;
+static spinlock_t down_cpumask_lock;
+static struct mutex set_speed_lock;
+
+/* Hi speed to bump to from lo speed when load burst (default max) */
+static u64 hispeed_freq;
+
+/* Go to hi speed when CPU load at or above this value. */
+#define DEFAULT_GO_HISPEED_LOAD 95
+static unsigned long go_hispeed_load;
+
+/*
+ * The minimum amount of time to spend at a frequency before we can ramp down.
+ */
+#define DEFAULT_MIN_SAMPLE_TIME 20 * USEC_PER_MSEC
+static unsigned long min_sample_time;
+
+/*
+ * The sample rate of the timer used to increase frequency
+ */
+#define DEFAULT_TIMER_RATE 20 * USEC_PER_MSEC
+static unsigned long timer_rate;
+
+static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
+ unsigned int event);
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_interactive = {
+ .name = "interactive",
+ .governor = cpufreq_governor_interactive,
+ .max_transition_latency = 10000000,
+ .owner = THIS_MODULE,
+};
+
+static void cpufreq_interactive_timer(unsigned long data)
+{
+ unsigned int delta_idle;
+ unsigned int delta_time;
+ int cpu_load;
+ int load_since_change;
+ u64 time_in_idle;
+ u64 idle_exit_time;
+ struct cpufreq_interactive_cpuinfo *pcpu =
+ &per_cpu(cpuinfo, data);
+ u64 now_idle;
+ unsigned int new_freq;
+ unsigned int index;
+ unsigned long flags;
+
+ smp_rmb();
+
+ if (!pcpu->governor_enabled)
+ goto exit;
+
+ /*
+ * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
+ * this lets idle exit know the current idle time sample has
+ * been processed, and idle exit can generate a new sample and
+ * re-arm the timer. This prevents a concurrent idle
+ * exit on that CPU from writing a new set of info at the same time
+ * the timer function runs (the timer function can't use that info
+ * until more time passes).
+ */
+ time_in_idle = pcpu->time_in_idle;
+ idle_exit_time = pcpu->idle_exit_time;
+ now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
+ smp_wmb();
+
+ /* If we raced with cancelling a timer, skip. */
+ if (!idle_exit_time)
+ goto exit;
+
+ delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle);
+ delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
+ idle_exit_time);
+
+ /*
+ * If timer ran less than 1ms after short-term sample started, retry.
+ */
+ if (delta_time < 1000)
+ goto rearm;
+
+ if (delta_idle > delta_time)
+ cpu_load = 0;
+ else
+ cpu_load = 100 * (delta_time - delta_idle) / delta_time;
+
+ delta_idle = (unsigned int) cputime64_sub(now_idle,
+ pcpu->freq_change_time_in_idle);
+ delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
+ pcpu->freq_change_time);
+
+ if ((delta_time == 0) || (delta_idle > delta_time))
+ load_since_change = 0;
+ else
+ load_since_change =
+ 100 * (delta_time - delta_idle) / delta_time;
+
+ /*
+ * Choose greater of short-term load (since last idle timer
+ * started or timer function re-armed itself) or long-term load
+ * (since last frequency change).
+ */
+ if (load_since_change > cpu_load)
+ cpu_load = load_since_change;
+
+ if (cpu_load >= go_hispeed_load) {
+ if (pcpu->policy->cur == pcpu->policy->min)
+ new_freq = hispeed_freq;
+ else
+ new_freq = pcpu->policy->max * cpu_load / 100;
+ } else {
+ new_freq = pcpu->policy->cur * cpu_load / 100;
+ }
+
+ if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
+ new_freq, CPUFREQ_RELATION_H,
+ &index)) {
+ pr_warn_once("timer %d: cpufreq_frequency_table_target error\n",
+ (int) data);
+ goto rearm;
+ }
+
+ new_freq = pcpu->freq_table[index].frequency;
+
+ if (pcpu->target_freq == new_freq)
+ goto rearm_if_notmax;
+
+ /*
+ * Do not scale down unless we have been at this frequency for the
+ * minimum sample time.
+ */
+ if (new_freq < pcpu->target_freq) {
+ if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time)
+ < min_sample_time)
+ goto rearm;
+ }
+
+ if (new_freq < pcpu->target_freq) {
+ pcpu->target_freq = new_freq;
+ spin_lock_irqsave(&down_cpumask_lock, flags);
+ cpumask_set_cpu(data, &down_cpumask);
+ spin_unlock_irqrestore(&down_cpumask_lock, flags);
+ queue_work(down_wq, &freq_scale_down_work);
+ } else {
+ pcpu->target_freq = new_freq;
+ spin_lock_irqsave(&up_cpumask_lock, flags);
+ cpumask_set_cpu(data, &up_cpumask);
+ spin_unlock_irqrestore(&up_cpumask_lock, flags);
+ wake_up_process(up_task);
+ }
+
+rearm_if_notmax:
+ /*
+ * Already set max speed and don't see a need to change that,
+ * wait until next idle to re-evaluate, don't need timer.
+ */
+ if (pcpu->target_freq == pcpu->policy->max)
+ goto exit;
+
+rearm:
+ if (!timer_pending(&pcpu->cpu_timer)) {
+ /*
+ * If already at min: if that CPU is idle, don't set timer.
+ * Else cancel the timer if that CPU goes idle. We don't
+ * need to re-evaluate speed until the next idle exit.
+ */
+ if (pcpu->target_freq == pcpu->policy->min) {
+ smp_rmb();
+
+ if (pcpu->idling)
+ goto exit;
+
+ pcpu->timer_idlecancel = 1;
+ }
+
+ pcpu->time_in_idle = get_cpu_idle_time_us(
+ data, &pcpu->idle_exit_time);
+ mod_timer(&pcpu->cpu_timer,
+ jiffies + usecs_to_jiffies(timer_rate));
+ }
+
+exit:
+ return;
+}
+
+static void cpufreq_interactive_idle_start(void)
+{
+ struct cpufreq_interactive_cpuinfo *pcpu =
+ &per_cpu(cpuinfo, smp_processor_id());
+ int pending;
+
+ if (!pcpu->governor_enabled)
+ return;
+
+ pcpu->idling = 1;
+ smp_wmb();
+ pending = timer_pending(&pcpu->cpu_timer);
+
+ if (pcpu->target_freq != pcpu->policy->min) {
+#ifdef CONFIG_SMP
+ /*
+ * Entering idle while not at lowest speed. On some
+ * platforms this can hold the other CPU(s) at that speed
+ * even though the CPU is idle. Set a timer to re-evaluate
+ * speed so this idle CPU doesn't hold the other CPUs above
+ * min indefinitely. This should probably be a quirk of
+ * the CPUFreq driver.
+ */
+ if (!pending) {
+ pcpu->time_in_idle = get_cpu_idle_time_us(
+ smp_processor_id(), &pcpu->idle_exit_time);
+ pcpu->timer_idlecancel = 0;
+ mod_timer(&pcpu->cpu_timer,
+ jiffies + usecs_to_jiffies(timer_rate));
+ }
+#endif
+ } else {
+ /*
+ * If at min speed and entering idle after load has
+ * already been evaluated, and a timer has been set just in
+ * case the CPU suddenly goes busy, cancel that timer. The
+ * CPU didn't go busy; we'll recheck things upon idle exit.
+ */
+ if (pending && pcpu->timer_idlecancel) {
+ del_timer(&pcpu->cpu_timer);
+ /*
+ * Ensure last timer run time is after current idle
+ * sample start time, so next idle exit will always
+ * start a new idle sampling period.
+ */
+ pcpu->idle_exit_time = 0;
+ pcpu->timer_idlecancel = 0;
+ }
+ }
+
+}
+
+static void cpufreq_interactive_idle_end(void)
+{
+ struct cpufreq_interactive_cpuinfo *pcpu =
+ &per_cpu(cpuinfo, smp_processor_id());
+
+ pcpu->idling = 0;
+ smp_wmb();
+
+ /*
+ * Arm the timer for 1-2 ticks later if not already, and if the timer
+ * function has already processed the previous load sampling
+ * interval. (If the timer is not pending but has not processed
+ * the previous interval, it is probably racing with us on another
+ * CPU. Let it compute load based on the previous sample and then
+ * re-arm the timer for another interval when it's done, rather
+ * than updating the interval start time to be "now", which doesn't
+ * give the timer function enough time to make a decision on this
+ * run.)
+ */
+ if (timer_pending(&pcpu->cpu_timer) == 0 &&
+ pcpu->timer_run_time >= pcpu->idle_exit_time &&
+ pcpu->governor_enabled) {
+ pcpu->time_in_idle =
+ get_cpu_idle_time_us(smp_processor_id(),
+ &pcpu->idle_exit_time);
+ pcpu->timer_idlecancel = 0;
+ mod_timer(&pcpu->cpu_timer,
+ jiffies + usecs_to_jiffies(timer_rate));
+ }
+
+}
+
+static int cpufreq_interactive_up_task(void *data)
+{
+ unsigned int cpu;
+ cpumask_t tmp_mask;
+ unsigned long flags;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_lock_irqsave(&up_cpumask_lock, flags);
+
+ if (cpumask_empty(&up_cpumask)) {
+ spin_unlock_irqrestore(&up_cpumask_lock, flags);
+ schedule();
+
+ if (kthread_should_stop())
+ break;
+
+ spin_lock_irqsave(&up_cpumask_lock, flags);
+ }
+
+ set_current_state(TASK_RUNNING);
+ tmp_mask = up_cpumask;
+ cpumask_clear(&up_cpumask);
+ spin_unlock_irqrestore(&up_cpumask_lock, flags);
+
+ for_each_cpu(cpu, &tmp_mask) {
+ unsigned int j;
+ unsigned int max_freq = 0;
+
+ pcpu = &per_cpu(cpuinfo, cpu);
+ smp_rmb();
+
+ if (!pcpu->governor_enabled)
+ continue;
+
+ mutex_lock(&set_speed_lock);
+
+ for_each_cpu(j, pcpu->policy->cpus) {
+ struct cpufreq_interactive_cpuinfo *pjcpu =
+ &per_cpu(cpuinfo, j);
+
+ if (pjcpu->target_freq > max_freq)
+ max_freq = pjcpu->target_freq;
+ }
+
+ if (max_freq != pcpu->policy->cur)
+ __cpufreq_driver_target(pcpu->policy,
+ max_freq,
+ CPUFREQ_RELATION_H);
+ mutex_unlock(&set_speed_lock);
+
+ pcpu->freq_change_time_in_idle =
+ get_cpu_idle_time_us(cpu,
+ &pcpu->freq_change_time);
+ }
+ }
+
+ return 0;
+}
+
+static void cpufreq_interactive_freq_down(struct work_struct *work)
+{
+ unsigned int cpu;
+ cpumask_t tmp_mask;
+ unsigned long flags;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+
+ spin_lock_irqsave(&down_cpumask_lock, flags);
+ tmp_mask = down_cpumask;
+ cpumask_clear(&down_cpumask);
+ spin_unlock_irqrestore(&down_cpumask_lock, flags);
+
+ for_each_cpu(cpu, &tmp_mask) {
+ unsigned int j;
+ unsigned int max_freq = 0;
+
+ pcpu = &per_cpu(cpuinfo, cpu);
+ smp_rmb();
+
+ if (!pcpu->governor_enabled)
+ continue;
+
+ mutex_lock(&set_speed_lock);
+
+ for_each_cpu(j, pcpu->policy->cpus) {
+ struct cpufreq_interactive_cpuinfo *pjcpu =
+ &per_cpu(cpuinfo, j);
+
+ if (pjcpu->target_freq > max_freq)
+ max_freq = pjcpu->target_freq;
+ }
+
+ if (max_freq != pcpu->policy->cur)
+ __cpufreq_driver_target(pcpu->policy, max_freq,
+ CPUFREQ_RELATION_H);
+
+ mutex_unlock(&set_speed_lock);
+ pcpu->freq_change_time_in_idle =
+ get_cpu_idle_time_us(cpu,
+ &pcpu->freq_change_time);
+ }
+}
+
+static ssize_t show_hispeed_freq(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%llu\n", hispeed_freq);
+}
+
+static ssize_t store_hispeed_freq(struct kobject *kobj,
+ struct attribute *attr, const char *buf,
+ size_t count)
+{
+ int ret;
+ u64 val;
+
+ ret = strict_strtoull(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ hispeed_freq = val;
+ return count;
+}
+
+static struct global_attr hispeed_freq_attr = __ATTR(hispeed_freq, 0644,
+ show_hispeed_freq, store_hispeed_freq);
+
+
+static ssize_t show_go_hispeed_load(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", go_hispeed_load);
+}
+
+static ssize_t store_go_hispeed_load(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = strict_strtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ go_hispeed_load = val;
+ return count;
+}
+
+static struct global_attr go_hispeed_load_attr = __ATTR(go_hispeed_load, 0644,
+ show_go_hispeed_load, store_go_hispeed_load);
+
+static ssize_t show_min_sample_time(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", min_sample_time);
+}
+
+static ssize_t store_min_sample_time(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = strict_strtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ min_sample_time = val;
+ return count;
+}
+
+static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644,
+ show_min_sample_time, store_min_sample_time);
+
+static ssize_t show_timer_rate(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", timer_rate);
+}
+
+static ssize_t store_timer_rate(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t count)
+{
+ int ret;
+ unsigned long val;
+
+ ret = strict_strtoul(buf, 0, &val);
+ if (ret < 0)
+ return ret;
+ timer_rate = val;
+ return count;
+}
+
+static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644,
+ show_timer_rate, store_timer_rate);
+
+static struct attribute *interactive_attributes[] = {
+ &hispeed_freq_attr.attr,
+ &go_hispeed_load_attr.attr,
+ &min_sample_time_attr.attr,
+ &timer_rate_attr.attr,
+ NULL,
+};
+
+static struct attribute_group interactive_attr_group = {
+ .attrs = interactive_attributes,
+ .name = "interactive",
+};
+
+static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ int rc;
+ unsigned int j;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+ struct cpufreq_frequency_table *freq_table;
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ if (!cpu_online(policy->cpu))
+ return -EINVAL;
+
+ freq_table =
+ cpufreq_frequency_get_table(policy->cpu);
+
+ for_each_cpu(j, policy->cpus) {
+ pcpu = &per_cpu(cpuinfo, j);
+ pcpu->policy = policy;
+ pcpu->target_freq = policy->cur;
+ pcpu->freq_table = freq_table;
+ pcpu->freq_change_time_in_idle =
+ get_cpu_idle_time_us(j,
+ &pcpu->freq_change_time);
+ pcpu->governor_enabled = 1;
+ smp_wmb();
+ }
+
+ if (!hispeed_freq)
+ hispeed_freq = policy->max;
+
+ /*
+ * Do not register the idle hook and create sysfs
+ * entries if we have already done so.
+ */
+ if (atomic_inc_return(&active_count) > 1)
+ return 0;
+
+ rc = sysfs_create_group(cpufreq_global_kobject,
+ &interactive_attr_group);
+ if (rc)
+ return rc;
+
+ break;
+
+ case CPUFREQ_GOV_STOP:
+ for_each_cpu(j, policy->cpus) {
+ pcpu = &per_cpu(cpuinfo, j);
+ pcpu->governor_enabled = 0;
+ smp_wmb();
+ del_timer_sync(&pcpu->cpu_timer);
+
+ /*
+ * Reset idle exit time since we may cancel the timer
+ * before it can run after the last idle exit time,
+ * to avoid tripping the check in idle exit for a timer
+ * that is trying to run.
+ */
+ pcpu->idle_exit_time = 0;
+ }
+
+ flush_work(&freq_scale_down_work);
+ if (atomic_dec_return(&active_count) > 0)
+ return 0;
+
+ sysfs_remove_group(cpufreq_global_kobject,
+ &interactive_attr_group);
+
+ break;
+
+ case CPUFREQ_GOV_LIMITS:
+ if (policy->max < policy->cur)
+ __cpufreq_driver_target(policy,
+ policy->max, CPUFREQ_RELATION_H);
+ else if (policy->min > policy->cur)
+ __cpufreq_driver_target(policy,
+ policy->min, CPUFREQ_RELATION_L);
+ break;
+ }
+ return 0;
+}
+
+static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
+ unsigned long val,
+ void *data)
+{
+ switch (val) {
+ case IDLE_START:
+ cpufreq_interactive_idle_start();
+ break;
+ case IDLE_END:
+ cpufreq_interactive_idle_end();
+ break;
+ }
+
+ return 0;
+}
+
+static struct notifier_block cpufreq_interactive_idle_nb = {
+ .notifier_call = cpufreq_interactive_idle_notifier,
+};
+
+static int __init cpufreq_interactive_init(void)
+{
+ unsigned int i;
+ struct cpufreq_interactive_cpuinfo *pcpu;
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+ go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
+ min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
+ timer_rate = DEFAULT_TIMER_RATE;
+
+ /* Initalize per-cpu timers */
+ for_each_possible_cpu(i) {
+ pcpu = &per_cpu(cpuinfo, i);
+ init_timer(&pcpu->cpu_timer);
+ pcpu->cpu_timer.function = cpufreq_interactive_timer;
+ pcpu->cpu_timer.data = i;
+ }
+
+ up_task = kthread_create(cpufreq_interactive_up_task, NULL,
+ "kinteractiveup");
+ if (IS_ERR(up_task))
+ return PTR_ERR(up_task);
+
+ sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
+ get_task_struct(up_task);
+
+ /* No rescuer thread, bind to CPU queuing the work for possibly
+ warm cache (probably doesn't matter much). */
+ down_wq = alloc_workqueue("knteractive_down", 0, 1);
+
+ if (!down_wq)
+ goto err_freeuptask;
+
+ INIT_WORK(&freq_scale_down_work,
+ cpufreq_interactive_freq_down);
+
+ spin_lock_init(&up_cpumask_lock);
+ spin_lock_init(&down_cpumask_lock);
+ mutex_init(&set_speed_lock);
+
+ idle_notifier_register(&cpufreq_interactive_idle_nb);
+
+ return cpufreq_register_governor(&cpufreq_gov_interactive);
+
+err_freeuptask:
+ put_task_struct(up_task);
+ return -ENOMEM;
+}
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+fs_initcall(cpufreq_interactive_init);
+#else
+module_init(cpufreq_interactive_init);
+#endif
+
+static void __exit cpufreq_interactive_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_interactive);
+ kthread_stop(up_task);
+ put_task_struct(up_task);
+ destroy_workqueue(down_wq);
+}
+
+module_exit(cpufreq_interactive_exit);
+
+MODULE_AUTHOR("Mike Chan <mike@android.com>");
+MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
+ "Latency sensitive workloads");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 891360e..68a15b6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -22,6 +22,7 @@
#include <linux/tick.h>
#include <linux/ktime.h>
#include <linux/sched.h>
+#include <linux/pm_qos_params.h>
/*
* dbs is used in this file as a shortform for demandbased switching
@@ -93,6 +94,10 @@ struct cpu_dbs_info_s {
* when user is changing the governor or limits.
*/
struct mutex timer_mutex;
+ bool activated; /* dbs_timer_init is in effect */
+#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_FLEXRATE
+ unsigned int flex_duration;
+#endif
};
static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
@@ -102,6 +107,9 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */
* dbs_mutex protects dbs_enable in governor start/stop.
*/
static DEFINE_MUTEX(dbs_mutex);
+#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_FLEXRATE
+static DEFINE_MUTEX(flex_mutex);
+#endif
static struct dbs_tuners {
unsigned int sampling_rate;
@@ -111,12 +119,20 @@ static struct dbs_tuners {
unsigned int sampling_down_factor;
unsigned int powersave_bias;
unsigned int io_is_busy;
+ struct notifier_block dvfs_lat_qos_db;
+ unsigned int dvfs_lat_qos_wants;
+ unsigned int freq_step;
+#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_FLEXRATE
+ unsigned int flex_sampling_rate;
+ unsigned int flex_duration;
+#endif
} dbs_tuners_ins = {
.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
.down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
.ignore_nice = 0,
.powersave_bias = 0,
+ .freq_step = 100,
};
static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
@@ -163,6 +179,23 @@ static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wal
}
/*
+ * Find right sampling rate based on sampling_rate and
+ * QoS requests on dvfs latency.
+ */
+static unsigned int effective_sampling_rate(void)
+{
+ unsigned int effective;
+
+ if (dbs_tuners_ins.dvfs_lat_qos_wants)
+ effective = min(dbs_tuners_ins.dvfs_lat_qos_wants,
+ dbs_tuners_ins.sampling_rate);
+ else
+ effective = dbs_tuners_ins.sampling_rate;
+
+ return max(effective, min_sampling_rate);
+}
+
+/*
* Find right freq to be set now with powersave_bias on.
* Returns the freq_hi to be used right now and will set freq_hi_jiffies,
* freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
@@ -206,7 +239,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
dbs_info->freq_lo_jiffies = 0;
return freq_lo;
}
- jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+ jiffies_total = usecs_to_jiffies(effective_sampling_rate());
jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
jiffies_hi += ((freq_hi - freq_lo) / 2);
jiffies_hi /= (freq_hi - freq_lo);
@@ -255,6 +288,95 @@ show_one(up_threshold, up_threshold);
show_one(sampling_down_factor, sampling_down_factor);
show_one(ignore_nice_load, ignore_nice);
show_one(powersave_bias, powersave_bias);
+show_one(down_differential, down_differential);
+show_one(freq_step, freq_step);
+
+/**
+ * update_sampling_rate - update sampling rate effective immediately if needed.
+ * @new_rate: new sampling rate. If it is 0, regard sampling rate is not
+ * changed and assume that qos request value is changed.
+ *
+ * If new rate is smaller than the old, simply updaing
+ * dbs_tuners_int.sampling_rate might not be appropriate. For example,
+ * if the original sampling_rate was 1 second and the requested new sampling
+ * rate is 10 ms because the user needs immediate reaction from ondemand
+ * governor, but not sure if higher frequency will be required or not,
+ * then, the governor may change the sampling rate too late; up to 1 second
+ * later. Thus, if we are reducing the sampling rate, we need to make the
+ * new value effective immediately.
+ */
+static void update_sampling_rate(unsigned int new_rate)
+{
+ int cpu;
+ unsigned int effective;
+
+ if (new_rate)
+ dbs_tuners_ins.sampling_rate = max(new_rate, min_sampling_rate);
+
+ effective = effective_sampling_rate();
+
+ for_each_online_cpu(cpu) {
+ struct cpufreq_policy *policy;
+ struct cpu_dbs_info_s *dbs_info;
+ unsigned long next_sampling, appointed_at;
+
+ /*
+ * mutex_destory(&dbs_info->timer_mutex) should not happen
+ * in this context. dbs_mutex is locked/unlocked at GOV_START
+ * and GOV_STOP context only other than here.
+ */
+ mutex_lock(&dbs_mutex);
+
+ policy = cpufreq_cpu_get(cpu);
+ if (!policy) {
+ mutex_unlock(&dbs_mutex);
+ continue;
+ }
+ dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu);
+ cpufreq_cpu_put(policy);
+
+ /* timer_mutex is destroyed or will be destroyed soon */
+ if (!dbs_info->activated) {
+ mutex_unlock(&dbs_mutex);
+ continue;
+ }
+
+ mutex_lock(&dbs_info->timer_mutex);
+
+ if (!delayed_work_pending(&dbs_info->work)) {
+ mutex_unlock(&dbs_info->timer_mutex);
+ mutex_unlock(&dbs_mutex);
+ continue;
+ }
+
+ next_sampling = jiffies + usecs_to_jiffies(new_rate);
+ appointed_at = dbs_info->work.timer.expires;
+
+ if (time_before(next_sampling, appointed_at)) {
+ mutex_unlock(&dbs_info->timer_mutex);
+ cancel_delayed_work_sync(&dbs_info->work);
+ mutex_lock(&dbs_info->timer_mutex);
+
+ schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work,
+ usecs_to_jiffies(effective));
+ }
+ mutex_unlock(&dbs_info->timer_mutex);
+
+ /*
+ * For the little possiblity that dbs_timer_exit() has been
+ * called after checking dbs_info->activated above.
+ * If cancel_delayed_work_syn() has been calld by
+ * dbs_timer_exit() before schedule_delayed_work_on() of this
+ * function, it should be revoked by calling cancel again
+ * before releasing dbs_mutex, which will trigger mutex_destroy
+ * to be called.
+ */
+ if (!dbs_info->activated)
+ cancel_delayed_work_sync(&dbs_info->work);
+
+ mutex_unlock(&dbs_mutex);
+ }
+}
static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
const char *buf, size_t count)
@@ -264,7 +386,7 @@ static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
ret = sscanf(buf, "%u", &input);
if (ret != 1)
return -EINVAL;
- dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
+ update_sampling_rate(input);
return count;
}
@@ -367,12 +489,46 @@ static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
return count;
}
+static ssize_t store_down_differential(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.down_differential = min(input, 100u);
+ return count;
+}
+
+static ssize_t store_freq_step(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.freq_step = min(input, 100u);
+ return count;
+}
+
+
define_one_global_rw(sampling_rate);
define_one_global_rw(io_is_busy);
define_one_global_rw(up_threshold);
define_one_global_rw(sampling_down_factor);
define_one_global_rw(ignore_nice_load);
define_one_global_rw(powersave_bias);
+define_one_global_rw(down_differential);
+define_one_global_rw(freq_step);
+#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_FLEXRATE
+static struct global_attr flexrate_request;
+static struct global_attr flexrate_duration;
+static struct global_attr flexrate_enable;
+static struct global_attr flexrate_forcerate;
+static struct global_attr flexrate_num_effective_usage;
+#endif
static struct attribute *dbs_attributes[] = {
&sampling_rate_min.attr,
@@ -382,6 +538,15 @@ static struct attribute *dbs_attributes[] = {
&ignore_nice_load.attr,
&powersave_bias.attr,
&io_is_busy.attr,
+ &down_differential.attr,
+ &freq_step.attr,
+#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_FLEXRATE
+ &flexrate_request.attr,
+ &flexrate_duration.attr,
+ &flexrate_enable.attr,
+ &flexrate_forcerate.attr,
+ &flexrate_num_effective_usage.attr,
+#endif
NULL
};
@@ -396,8 +561,10 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
{
if (dbs_tuners_ins.powersave_bias)
freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
+#if !defined(CONFIG_ARCH_EXYNOS4) && !defined(CONFIG_ARCH_EXYNOS5)
else if (p->cur == p->max)
return;
+#endif
__cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ?
CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
@@ -495,18 +662,22 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
/* Check for frequency increase */
if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
+ int inc = (policy->max * dbs_tuners_ins.freq_step) / 100;
+ int target = min(policy->max, policy->cur + inc);
/* If switching to max speed, apply sampling_down_factor */
- if (policy->cur < policy->max)
+ if (policy->cur < policy->max && target == policy->max)
this_dbs_info->rate_mult =
dbs_tuners_ins.sampling_down_factor;
- dbs_freq_increase(policy, policy->max);
+ dbs_freq_increase(policy, target);
return;
}
/* Check for frequency decrease */
+#if !defined(CONFIG_ARCH_EXYNOS4) && !defined(CONFIG_ARCH_EXYNOS5)
/* if we cannot reduce the frequency anymore, break out early */
if (policy->cur == policy->min)
return;
+#endif
/*
* The optimal frequency is the frequency that is the lowest that
@@ -563,7 +734,7 @@ static void do_dbs_timer(struct work_struct *work)
/* We want all CPUs to do sampling nearly on
* same jiffy
*/
- delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate
+ delay = usecs_to_jiffies(effective_sampling_rate()
* dbs_info->rate_mult);
if (num_online_cpus() > 1)
@@ -574,6 +745,23 @@ static void do_dbs_timer(struct work_struct *work)
dbs_info->freq_lo, CPUFREQ_RELATION_H);
delay = dbs_info->freq_lo_jiffies;
}
+#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_FLEXRATE
+ if (dbs_info->flex_duration) {
+ struct cpufreq_policy *policy = dbs_info->cur_policy;
+
+ mutex_lock(&flex_mutex);
+ delay = usecs_to_jiffies(dbs_tuners_ins.flex_sampling_rate);
+
+ /* If it's already max, we don't need to iterate fast */
+ if (policy->cur >= policy->max)
+ dbs_info->flex_duration = 1;
+
+ if (--dbs_info->flex_duration < dbs_tuners_ins.flex_duration) {
+ dbs_tuners_ins.flex_duration = dbs_info->flex_duration;
+ }
+ mutex_unlock(&flex_mutex);
+ }
+#endif /* CONFIG_CPU_FREQ_GOV_ONDEMAND_FLEXRATE */
schedule_delayed_work_on(cpu, &dbs_info->work, delay);
mutex_unlock(&dbs_info->timer_mutex);
}
@@ -581,18 +769,20 @@ static void do_dbs_timer(struct work_struct *work)
static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
{
/* We want all CPUs to do sampling nearly on same jiffy */
- int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+ int delay = usecs_to_jiffies(effective_sampling_rate());
if (num_online_cpus() > 1)
delay -= jiffies % delay;
dbs_info->sample_type = DBS_NORMAL_SAMPLE;
INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
- schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
+ schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, 10 * delay);
+ dbs_info->activated = true;
}
static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
{
+ dbs_info->activated = false;
cancel_delayed_work_sync(&dbs_info->work);
}
@@ -711,11 +901,40 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
return 0;
}
+/**
+ * qos_dvfs_lat_notify - PM QoS Notifier for DVFS_LATENCY QoS Request
+ * @nb notifier block struct
+ * @value QoS value
+ * @dummy
+ */
+static int qos_dvfs_lat_notify(struct notifier_block *nb, unsigned long value,
+ void *dummy)
+{
+ /*
+ * In the worst case, with a continuous up-treshold + e cpu load
+ * from up-threshold - e load, the ondemand governor will react
+ * sampling_rate * 2.
+ *
+ * Thus, based on the worst case scenario, we use value / 2;
+ */
+ dbs_tuners_ins.dvfs_lat_qos_wants = value / 2;
+
+ /* Update sampling rate */
+ update_sampling_rate(0);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block ondemand_qos_dvfs_lat_nb = {
+ .notifier_call = qos_dvfs_lat_notify,
+};
+
static int __init cpufreq_gov_dbs_init(void)
{
cputime64_t wall;
u64 idle_time;
int cpu = get_cpu();
+ int err = 0;
idle_time = get_cpu_idle_time_us(cpu, &wall);
put_cpu();
@@ -736,14 +955,241 @@ static int __init cpufreq_gov_dbs_init(void)
MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
}
- return cpufreq_register_governor(&cpufreq_gov_ondemand);
+ err = pm_qos_add_notifier(PM_QOS_DVFS_RESPONSE_LATENCY,
+ &ondemand_qos_dvfs_lat_nb);
+ if (err)
+ return err;
+
+ err = cpufreq_register_governor(&cpufreq_gov_ondemand);
+ if (err) {
+ pm_qos_remove_notifier(PM_QOS_DVFS_RESPONSE_LATENCY,
+ &ondemand_qos_dvfs_lat_nb);
+ }
+
+ return err;
}
static void __exit cpufreq_gov_dbs_exit(void)
{
+ pm_qos_remove_notifier(PM_QOS_DVFS_RESPONSE_LATENCY,
+ &ondemand_qos_dvfs_lat_nb);
+
cpufreq_unregister_governor(&cpufreq_gov_ondemand);
}
+#ifdef CONFIG_CPU_FREQ_GOV_ONDEMAND_FLEXRATE
+static unsigned int max_duration =
+ (CONFIG_CPU_FREQ_GOV_ONDEMAND_FLEXRATE_MAX_DURATION);
+#define DEFAULT_DURATION (5)
+static unsigned int sysfs_duration = DEFAULT_DURATION;
+static bool flexrate_enabled = true;
+static unsigned int forced_rate;
+static unsigned int flexrate_num_effective;
+
+static int cpufreq_ondemand_flexrate_do(struct cpufreq_policy *policy,
+ bool now)
+{
+ unsigned int cpu = policy->cpu;
+ bool using_ondemand;
+ struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
+
+ WARN(!mutex_is_locked(&flex_mutex), "flex_mutex not locked\n");
+
+ dbs_info->flex_duration = dbs_tuners_ins.flex_duration;
+
+ if (now) {
+ flexrate_num_effective++;
+
+ mutex_lock(&dbs_mutex);
+ using_ondemand = dbs_enable && !strncmp(policy->governor->name, "ondemand", 8);
+ mutex_unlock(&dbs_mutex);
+
+ if (!using_ondemand)
+ return 0;
+
+ mutex_unlock(&flex_mutex);
+ mutex_lock(&dbs_info->timer_mutex);
+
+ /* Do It! */
+ cancel_delayed_work_sync(&dbs_info->work);
+ schedule_delayed_work_on(cpu, &dbs_info->work, 1);
+
+ mutex_unlock(&dbs_info->timer_mutex);
+ mutex_lock(&flex_mutex);
+ }
+
+ return 0;
+}
+
+int cpufreq_ondemand_flexrate_request(unsigned int rate_us,
+ unsigned int duration)
+{
+ int err = 0;
+
+ if (!flexrate_enabled)
+ return 0;
+
+ if (forced_rate)
+ rate_us = forced_rate;
+
+ mutex_lock(&flex_mutex);
+
+ /* Unnecessary requests are dropped */
+ if (rate_us >= dbs_tuners_ins.sampling_rate)
+ goto out;
+ if (rate_us >= dbs_tuners_ins.flex_sampling_rate &&
+ duration <= dbs_tuners_ins.flex_duration)
+ goto out;
+
+ duration = min(max_duration, duration);
+ if (rate_us > 0 && rate_us < min_sampling_rate)
+ rate_us = min_sampling_rate;
+
+ err = 1; /* Need update */
+
+ /* Cancel the active flexrate requests */
+ if (rate_us == 0 || duration == 0) {
+ dbs_tuners_ins.flex_duration = 0;
+ dbs_tuners_ins.flex_sampling_rate = 0;
+ goto out;
+ }
+
+ if (dbs_tuners_ins.flex_sampling_rate == 0 ||
+ dbs_tuners_ins.flex_sampling_rate > rate_us)
+ err = 2; /* Need to poll faster */
+
+ /* Set new flexrate per the request */
+ dbs_tuners_ins.flex_sampling_rate =
+ min(dbs_tuners_ins.flex_sampling_rate, rate_us);
+ dbs_tuners_ins.flex_duration =
+ max(dbs_tuners_ins.flex_duration, duration);
+out:
+ /* Apply new flexrate */
+ if (err > 0) {
+ bool now = (err == 2);
+ int cpu = 0;
+
+ /* TODO: For every CPU using ONDEMAND */
+ err = cpufreq_ondemand_flexrate_do(cpufreq_cpu_get(cpu), now);
+ }
+ mutex_unlock(&flex_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(cpufreq_ondemand_flexrate_request);
+
+static ssize_t store_flexrate_request(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int rate;
+ int ret;
+
+ ret = sscanf(buf, "%u", &rate);
+ if (ret != 1)
+ return -EINVAL;
+
+ ret = cpufreq_ondemand_flexrate_request(rate, sysfs_duration);
+ if (ret)
+ return ret;
+ return count;
+}
+
+static ssize_t show_flexrate_request(struct kobject *a, struct attribute *b,
+ char *buf)
+{
+ return sprintf(buf, "Flexrate decreases CPUFreq Ondemand governor's polling rate temporaily.\n"
+ "Usage Example:\n"
+ "# echo 8 > flexrate_duration\n"
+ "# echo 10000 > flexrate_request\n"
+ "With the second statement, Ondemand polls with 10ms(10000us) interval 8 times.\n"
+ "run \"echo flexrate_duration\" to see the currecnt duration setting.\n");
+}
+
+static ssize_t store_flexrate_duration(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int duration;
+ int ret;
+
+ /* mutex not needed for flexrate_sysfs_duration */
+ ret = sscanf(buf, "%u", &duration);
+ if (ret != 1)
+ return -EINVAL;
+
+ if (duration == 0)
+ duration = DEFAULT_DURATION;
+ if (duration > max_duration)
+ duration = max_duration;
+
+ sysfs_duration = duration;
+ return count;
+}
+
+static ssize_t show_flexrate_duration(struct kobject *a, struct attribute *b,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", sysfs_duration);
+}
+
+static ssize_t store_flexrate_enable(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+
+ if (input > 0)
+ flexrate_enabled = true;
+ else
+ flexrate_enabled = false;
+
+ return count;
+}
+
+static ssize_t show_flexrate_enable(struct kobject *a, struct attribute *b,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", !!flexrate_enabled);
+}
+
+static ssize_t store_flexrate_forcerate(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int rate;
+ int ret;
+
+ ret = sscanf(buf, "%u", &rate);
+ if (ret != 1)
+ return -EINVAL;
+
+ forced_rate = rate;
+
+ pr_info("CAUTION: flexrate_forcerate is for debugging/benchmarking only.\n");
+ return count;
+}
+
+static ssize_t show_flexrate_forcerate(struct kobject *a, struct attribute *b,
+ char *buf)
+{
+ return sprintf(buf, "%u\n", forced_rate);
+}
+
+static ssize_t show_flexrate_num_effective_usage(struct kobject *a,
+ struct attribute *b,
+ char *buf)
+{
+ return sprintf(buf, "%u\n", flexrate_num_effective);
+}
+
+define_one_global_rw(flexrate_request);
+define_one_global_rw(flexrate_duration);
+define_one_global_rw(flexrate_enable);
+define_one_global_rw(flexrate_forcerate);
+define_one_global_ro(flexrate_num_effective_usage);
+#endif
+
MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
diff --git a/drivers/cpufreq/cpufreq_pegasusq.c b/drivers/cpufreq/cpufreq_pegasusq.c
new file mode 100644
index 0000000..4a90a01
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_pegasusq.c
@@ -0,0 +1,1411 @@
+/*
+ * drivers/cpufreq/cpufreq_pegasusq.c
+ *
+ * Copyright (C) 2011 Samsung Electronics co. ltd
+ * ByungChang Cha <bc.cha@samsung.com>
+ *
+ * Based on ondemand governor
+ * Copyright (C) 2001 Russell King
+ * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
+ * Jun Nakajima <jun.nakajima@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/jiffies.h>
+#include <linux/kernel_stat.h>
+#include <linux/mutex.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/suspend.h>
+#include <linux/reboot.h>
+
+#ifdef CONFIG_HAS_EARLYSUSPEND
+#include <linux/earlysuspend.h>
+#endif
+
+/*
+ * runqueue average
+ */
+
+#define RQ_AVG_TIMER_RATE 10
+
+struct runqueue_data {
+ unsigned int nr_run_avg;
+ unsigned int update_rate;
+ int64_t last_time;
+ int64_t total_time;
+ struct delayed_work work;
+ struct workqueue_struct *nr_run_wq;
+ spinlock_t lock;
+};
+
+static struct runqueue_data *rq_data;
+static void rq_work_fn(struct work_struct *work);
+
+static void start_rq_work(void)
+{
+ rq_data->nr_run_avg = 0;
+ rq_data->last_time = 0;
+ rq_data->total_time = 0;
+ if (rq_data->nr_run_wq == NULL)
+ rq_data->nr_run_wq =
+ create_singlethread_workqueue("nr_run_avg");
+
+ queue_delayed_work(rq_data->nr_run_wq, &rq_data->work,
+ msecs_to_jiffies(rq_data->update_rate));
+ return;
+}
+
+static void stop_rq_work(void)
+{
+ if (rq_data->nr_run_wq)
+ cancel_delayed_work(&rq_data->work);
+ return;
+}
+
+static int __init init_rq_avg(void)
+{
+ rq_data = kzalloc(sizeof(struct runqueue_data), GFP_KERNEL);
+ if (rq_data == NULL) {
+ pr_err("%s cannot allocate memory\n", __func__);
+ return -ENOMEM;
+ }
+ spin_lock_init(&rq_data->lock);
+ rq_data->update_rate = RQ_AVG_TIMER_RATE;
+ INIT_DELAYED_WORK_DEFERRABLE(&rq_data->work, rq_work_fn);
+
+ return 0;
+}
+
+static void rq_work_fn(struct work_struct *work)
+{
+ int64_t time_diff = 0;
+ int64_t nr_run = 0;
+ unsigned long flags = 0;
+ int64_t cur_time = ktime_to_ns(ktime_get());
+
+ spin_lock_irqsave(&rq_data->lock, flags);
+
+ if (rq_data->last_time == 0)
+ rq_data->last_time = cur_time;
+ if (rq_data->nr_run_avg == 0)
+ rq_data->total_time = 0;
+
+ nr_run = nr_running() * 100;
+ time_diff = cur_time - rq_data->last_time;
+ do_div(time_diff, 1000 * 1000);
+
+ if (time_diff != 0 && rq_data->total_time != 0) {
+ nr_run = (nr_run * time_diff) +
+ (rq_data->nr_run_avg * rq_data->total_time);
+ do_div(nr_run, rq_data->total_time + time_diff);
+ }
+ rq_data->nr_run_avg = nr_run;
+ rq_data->total_time += time_diff;
+ rq_data->last_time = cur_time;
+
+ if (rq_data->update_rate != 0)
+ queue_delayed_work(rq_data->nr_run_wq, &rq_data->work,
+ msecs_to_jiffies(rq_data->update_rate));
+
+ spin_unlock_irqrestore(&rq_data->lock, flags);
+}
+
+static unsigned int get_nr_run_avg(void)
+{
+ unsigned int nr_run_avg;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&rq_data->lock, flags);
+ nr_run_avg = rq_data->nr_run_avg;
+ rq_data->nr_run_avg = 0;
+ spin_unlock_irqrestore(&rq_data->lock, flags);
+
+ return nr_run_avg;
+}
+
+
+/*
+ * dbs is used in this file as a shortform for demandbased switching
+ * It helps to keep variable names smaller, simpler
+ */
+
+#define DEF_SAMPLING_DOWN_FACTOR (2)
+#define MAX_SAMPLING_DOWN_FACTOR (100000)
+#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (5)
+#define DEF_FREQUENCY_UP_THRESHOLD (85)
+#define DEF_FREQUENCY_MIN_SAMPLE_RATE (10000)
+#define MIN_FREQUENCY_UP_THRESHOLD (11)
+#define MAX_FREQUENCY_UP_THRESHOLD (100)
+#define DEF_SAMPLING_RATE (50000)
+#define MIN_SAMPLING_RATE (10000)
+#define MAX_HOTPLUG_RATE (40u)
+
+#define DEF_MAX_CPU_LOCK (0)
+#define DEF_CPU_UP_FREQ (500000)
+#define DEF_CPU_DOWN_FREQ (200000)
+#define DEF_UP_NR_CPUS (1)
+#define DEF_CPU_UP_RATE (10)
+#define DEF_CPU_DOWN_RATE (20)
+#define DEF_FREQ_STEP (40)
+#define DEF_START_DELAY (0)
+
+#define UP_THRESHOLD_AT_MIN_FREQ (40)
+#define FREQ_FOR_RESPONSIVENESS (500000)
+
+#define HOTPLUG_DOWN_INDEX (0)
+#define HOTPLUG_UP_INDEX (1)
+
+#ifdef CONFIG_MACH_MIDAS
+static int hotplug_rq[4][2] = {
+ {0, 100}, {100, 200}, {200, 300}, {300, 0}
+};
+
+static int hotplug_freq[4][2] = {
+ {0, 500000},
+ {200000, 500000},
+ {200000, 500000},
+ {200000, 0}
+};
+#else
+static int hotplug_rq[4][2] = {
+ {0, 100}, {100, 200}, {200, 300}, {300, 0}
+};
+
+static int hotplug_freq[4][2] = {
+ {0, 500000},
+ {200000, 500000},
+ {200000, 500000},
+ {200000, 0}
+};
+#endif
+
+static unsigned int min_sampling_rate;
+
+static void do_dbs_timer(struct work_struct *work);
+static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
+ unsigned int event);
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_PEGASUSQ
+static
+#endif
+struct cpufreq_governor cpufreq_gov_pegasusq = {
+ .name = "pegasusq",
+ .governor = cpufreq_governor_dbs,
+ .owner = THIS_MODULE,
+};
+
+/* Sampling types */
+enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
+
+struct cpu_dbs_info_s {
+ cputime64_t prev_cpu_idle;
+ cputime64_t prev_cpu_iowait;
+ cputime64_t prev_cpu_wall;
+ cputime64_t prev_cpu_nice;
+ struct cpufreq_policy *cur_policy;
+ struct delayed_work work;
+ struct work_struct up_work;
+ struct work_struct down_work;
+ struct cpufreq_frequency_table *freq_table;
+ unsigned int rate_mult;
+ int cpu;
+ /*
+ * percpu mutex that serializes governor limit change with
+ * do_dbs_timer invocation. We do not want do_dbs_timer to run
+ * when user is changing the governor or limits.
+ */
+ struct mutex timer_mutex;
+};
+static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
+
+struct workqueue_struct *dvfs_workqueue;
+
+static unsigned int dbs_enable; /* number of CPUs using this policy */
+
+/*
+ * dbs_mutex protects dbs_enable in governor start/stop.
+ */
+static DEFINE_MUTEX(dbs_mutex);
+
+static struct dbs_tuners {
+ unsigned int sampling_rate;
+ unsigned int up_threshold;
+ unsigned int down_differential;
+ unsigned int ignore_nice;
+ unsigned int sampling_down_factor;
+ unsigned int io_is_busy;
+ /* pegasusq tuners */
+ unsigned int freq_step;
+ unsigned int cpu_up_rate;
+ unsigned int cpu_down_rate;
+ unsigned int cpu_up_freq;
+ unsigned int cpu_down_freq;
+ unsigned int up_nr_cpus;
+ unsigned int max_cpu_lock;
+ atomic_t hotplug_lock;
+ unsigned int dvfs_debug;
+ unsigned int max_freq;
+ unsigned int min_freq;
+#ifdef CONFIG_HAS_EARLYSUSPEND
+ int early_suspend;
+#endif
+} dbs_tuners_ins = {
+ .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
+ .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
+ .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
+ .ignore_nice = 0,
+ .freq_step = DEF_FREQ_STEP,
+ .cpu_up_rate = DEF_CPU_UP_RATE,
+ .cpu_down_rate = DEF_CPU_DOWN_RATE,
+ .cpu_up_freq = DEF_CPU_UP_FREQ,
+ .cpu_down_freq = DEF_CPU_DOWN_FREQ,
+ .up_nr_cpus = DEF_UP_NR_CPUS,
+ .max_cpu_lock = DEF_MAX_CPU_LOCK,
+ .hotplug_lock = ATOMIC_INIT(0),
+ .dvfs_debug = 0,
+#ifdef CONFIG_HAS_EARLYSUSPEND
+ .early_suspend = -1,
+#endif
+};
+
+
+/*
+ * CPU hotplug lock interface
+ */
+
+static atomic_t g_hotplug_count = ATOMIC_INIT(0);
+static atomic_t g_hotplug_lock = ATOMIC_INIT(0);
+
+static void apply_hotplug_lock(void)
+{
+ int online, possible, lock, flag;
+ struct work_struct *work;
+ struct cpu_dbs_info_s *dbs_info;
+
+ /* do turn_on/off cpus */
+ dbs_info = &per_cpu(od_cpu_dbs_info, 0); /* from CPU0 */
+ online = num_online_cpus();
+ possible = num_possible_cpus();
+ lock = atomic_read(&g_hotplug_lock);
+ flag = lock - online;
+
+ if (flag == 0)
+ return;
+
+ work = flag > 0 ? &dbs_info->up_work : &dbs_info->down_work;
+
+ pr_debug("%s online %d possible %d lock %d flag %d %d\n",
+ __func__, online, possible, lock, flag, (int)abs(flag));
+
+ queue_work_on(dbs_info->cpu, dvfs_workqueue, work);
+}
+
+int cpufreq_pegasusq_cpu_lock(int num_core)
+{
+ int prev_lock;
+
+ if (num_core < 1 || num_core > num_possible_cpus())
+ return -EINVAL;
+
+ prev_lock = atomic_read(&g_hotplug_lock);
+
+ if (prev_lock != 0 && prev_lock < num_core)
+ return -EINVAL;
+ else if (prev_lock == num_core)
+ atomic_inc(&g_hotplug_count);
+
+ atomic_set(&g_hotplug_lock, num_core);
+ atomic_set(&g_hotplug_count, 1);
+ apply_hotplug_lock();
+
+ return 0;
+}
+
+int cpufreq_pegasusq_cpu_unlock(int num_core)
+{
+ int prev_lock = atomic_read(&g_hotplug_lock);
+
+ if (prev_lock < num_core)
+ return 0;
+ else if (prev_lock == num_core)
+ atomic_dec(&g_hotplug_count);
+
+ if (atomic_read(&g_hotplug_count) == 0)
+ atomic_set(&g_hotplug_lock, 0);
+
+ return 0;
+}
+
+
+/*
+ * History of CPU usage
+ */
+struct cpu_usage {
+ unsigned int freq;
+ unsigned int load[NR_CPUS];
+ unsigned int rq_avg;
+};
+
+struct cpu_usage_history {
+ struct cpu_usage usage[MAX_HOTPLUG_RATE];
+ unsigned int num_hist;
+};
+
+struct cpu_usage_history *hotplug_history;
+
+static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
+ cputime64_t *wall)
+{
+ cputime64_t idle_time;
+ cputime64_t cur_wall_time;
+ cputime64_t busy_time;
+
+ cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+ busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
+ kstat_cpu(cpu).cpustat.system);
+
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
+ busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
+
+ idle_time = cputime64_sub(cur_wall_time, busy_time);
+ if (wall)
+ *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
+
+ return (cputime64_t)jiffies_to_usecs(idle_time);
+}
+
+static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
+{
+ u64 idle_time = get_cpu_idle_time_us(cpu, wall);
+
+ if (idle_time == -1ULL)
+ return get_cpu_idle_time_jiffy(cpu, wall);
+
+ return idle_time;
+}
+
+static inline cputime64_t get_cpu_iowait_time(unsigned int cpu,
+ cputime64_t *wall)
+{
+ u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
+
+ if (iowait_time == -1ULL)
+ return 0;
+
+ return iowait_time;
+}
+
+/************************** sysfs interface ************************/
+
+static ssize_t show_sampling_rate_min(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", min_sampling_rate);
+}
+
+define_one_global_ro(sampling_rate_min);
+
+/* cpufreq_pegasusq Governor Tunables */
+#define show_one(file_name, object) \
+static ssize_t show_##file_name \
+(struct kobject *kobj, struct attribute *attr, char *buf) \
+{ \
+ return sprintf(buf, "%u\n", dbs_tuners_ins.object); \
+}
+show_one(sampling_rate, sampling_rate);
+show_one(io_is_busy, io_is_busy);
+show_one(up_threshold, up_threshold);
+show_one(sampling_down_factor, sampling_down_factor);
+show_one(ignore_nice_load, ignore_nice);
+show_one(down_differential, down_differential);
+show_one(freq_step, freq_step);
+show_one(cpu_up_rate, cpu_up_rate);
+show_one(cpu_down_rate, cpu_down_rate);
+show_one(cpu_up_freq, cpu_up_freq);
+show_one(cpu_down_freq, cpu_down_freq);
+show_one(up_nr_cpus, up_nr_cpus);
+show_one(max_cpu_lock, max_cpu_lock);
+show_one(dvfs_debug, dvfs_debug);
+static ssize_t show_hotplug_lock(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", atomic_read(&g_hotplug_lock));
+}
+
+#define show_hotplug_param(file_name, num_core, up_down) \
+static ssize_t show_##file_name##_##num_core##_##up_down \
+(struct kobject *kobj, struct attribute *attr, char *buf) \
+{ \
+ return sprintf(buf, "%u\n", file_name[num_core - 1][up_down]); \
+}
+
+#define store_hotplug_param(file_name, num_core, up_down) \
+static ssize_t store_##file_name##_##num_core##_##up_down \
+(struct kobject *kobj, struct attribute *attr, \
+ const char *buf, size_t count) \
+{ \
+ unsigned int input; \
+ int ret; \
+ ret = sscanf(buf, "%u", &input); \
+ if (ret != 1) \
+ return -EINVAL; \
+ file_name[num_core - 1][up_down] = input; \
+ return count; \
+}
+
+show_hotplug_param(hotplug_freq, 1, 1);
+show_hotplug_param(hotplug_freq, 2, 0);
+show_hotplug_param(hotplug_freq, 2, 1);
+show_hotplug_param(hotplug_freq, 3, 0);
+show_hotplug_param(hotplug_freq, 3, 1);
+show_hotplug_param(hotplug_freq, 4, 0);
+
+show_hotplug_param(hotplug_rq, 1, 1);
+show_hotplug_param(hotplug_rq, 2, 0);
+show_hotplug_param(hotplug_rq, 2, 1);
+show_hotplug_param(hotplug_rq, 3, 0);
+show_hotplug_param(hotplug_rq, 3, 1);
+show_hotplug_param(hotplug_rq, 4, 0);
+
+store_hotplug_param(hotplug_freq, 1, 1);
+store_hotplug_param(hotplug_freq, 2, 0);
+store_hotplug_param(hotplug_freq, 2, 1);
+store_hotplug_param(hotplug_freq, 3, 0);
+store_hotplug_param(hotplug_freq, 3, 1);
+store_hotplug_param(hotplug_freq, 4, 0);
+
+store_hotplug_param(hotplug_rq, 1, 1);
+store_hotplug_param(hotplug_rq, 2, 0);
+store_hotplug_param(hotplug_rq, 2, 1);
+store_hotplug_param(hotplug_rq, 3, 0);
+store_hotplug_param(hotplug_rq, 3, 1);
+store_hotplug_param(hotplug_rq, 4, 0);
+
+define_one_global_rw(hotplug_freq_1_1);
+define_one_global_rw(hotplug_freq_2_0);
+define_one_global_rw(hotplug_freq_2_1);
+define_one_global_rw(hotplug_freq_3_0);
+define_one_global_rw(hotplug_freq_3_1);
+define_one_global_rw(hotplug_freq_4_0);
+
+define_one_global_rw(hotplug_rq_1_1);
+define_one_global_rw(hotplug_rq_2_0);
+define_one_global_rw(hotplug_rq_2_1);
+define_one_global_rw(hotplug_rq_3_0);
+define_one_global_rw(hotplug_rq_3_1);
+define_one_global_rw(hotplug_rq_4_0);
+
+static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
+ return count;
+}
+
+static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+
+ dbs_tuners_ins.io_is_busy = !!input;
+ return count;
+}
+
+static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+
+ if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
+ input < MIN_FREQUENCY_UP_THRESHOLD) {
+ return -EINVAL;
+ }
+ dbs_tuners_ins.up_threshold = input;
+ return count;
+}
+
+static ssize_t store_sampling_down_factor(struct kobject *a,
+ struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input, j;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+
+ if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
+ return -EINVAL;
+ dbs_tuners_ins.sampling_down_factor = input;
+
+ /* Reset down sampling multiplier in case it was active */
+ for_each_online_cpu(j) {
+ struct cpu_dbs_info_s *dbs_info;
+ dbs_info = &per_cpu(od_cpu_dbs_info, j);
+ dbs_info->rate_mult = 1;
+ }
+ return count;
+}
+
+static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+
+ unsigned int j;
+
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+
+ if (input > 1)
+ input = 1;
+
+ if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
+ return count;
+ }
+ dbs_tuners_ins.ignore_nice = input;
+
+ /* we need to re-evaluate prev_cpu_idle */
+ for_each_online_cpu(j) {
+ struct cpu_dbs_info_s *dbs_info;
+ dbs_info = &per_cpu(od_cpu_dbs_info, j);
+ dbs_info->prev_cpu_idle =
+ get_cpu_idle_time(j, &dbs_info->prev_cpu_wall);
+ if (dbs_tuners_ins.ignore_nice)
+ dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+ }
+ return count;
+}
+
+static ssize_t store_down_differential(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.down_differential = min(input, 100u);
+ return count;
+}
+
+static ssize_t store_freq_step(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.freq_step = min(input, 100u);
+ return count;
+}
+
+static ssize_t store_cpu_up_rate(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.cpu_up_rate = min(input, MAX_HOTPLUG_RATE);
+ return count;
+}
+
+static ssize_t store_cpu_down_rate(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.cpu_down_rate = min(input, MAX_HOTPLUG_RATE);
+ return count;
+}
+
+static ssize_t store_cpu_up_freq(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.cpu_up_freq = min(input, dbs_tuners_ins.max_freq);
+ return count;
+}
+
+static ssize_t store_cpu_down_freq(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.cpu_down_freq = max(input, dbs_tuners_ins.min_freq);
+ return count;
+}
+
+static ssize_t store_up_nr_cpus(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.up_nr_cpus = min(input, num_possible_cpus());
+ return count;
+}
+
+static ssize_t store_max_cpu_lock(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.max_cpu_lock = min(input, num_possible_cpus());
+ return count;
+}
+
+static ssize_t store_hotplug_lock(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ int prev_lock;
+
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ input = min(input, num_possible_cpus());
+ prev_lock = atomic_read(&dbs_tuners_ins.hotplug_lock);
+
+ if (prev_lock)
+ cpufreq_pegasusq_cpu_unlock(prev_lock);
+
+ if (input == 0) {
+ atomic_set(&dbs_tuners_ins.hotplug_lock, 0);
+ return count;
+ }
+
+ ret = cpufreq_pegasusq_cpu_lock(input);
+ if (ret) {
+ printk(KERN_ERR "[HOTPLUG] already locked with smaller value %d < %d\n",
+ atomic_read(&g_hotplug_lock), input);
+ return ret;
+ }
+
+ atomic_set(&dbs_tuners_ins.hotplug_lock, input);
+
+ return count;
+}
+
+static ssize_t store_dvfs_debug(struct kobject *a, struct attribute *b,
+ const char *buf, size_t count)
+{
+ unsigned int input;
+ int ret;
+ ret = sscanf(buf, "%u", &input);
+ if (ret != 1)
+ return -EINVAL;
+ dbs_tuners_ins.dvfs_debug = input > 0;
+ return count;
+}
+
+define_one_global_rw(sampling_rate);
+define_one_global_rw(io_is_busy);
+define_one_global_rw(up_threshold);
+define_one_global_rw(sampling_down_factor);
+define_one_global_rw(ignore_nice_load);
+define_one_global_rw(down_differential);
+define_one_global_rw(freq_step);
+define_one_global_rw(cpu_up_rate);
+define_one_global_rw(cpu_down_rate);
+define_one_global_rw(cpu_up_freq);
+define_one_global_rw(cpu_down_freq);
+define_one_global_rw(up_nr_cpus);
+define_one_global_rw(max_cpu_lock);
+define_one_global_rw(hotplug_lock);
+define_one_global_rw(dvfs_debug);
+
+static struct attribute *dbs_attributes[] = {
+ &sampling_rate_min.attr,
+ &sampling_rate.attr,
+ &up_threshold.attr,
+ &sampling_down_factor.attr,
+ &ignore_nice_load.attr,
+ &io_is_busy.attr,
+ &down_differential.attr,
+ &freq_step.attr,
+ &cpu_up_rate.attr,
+ &cpu_down_rate.attr,
+ &cpu_up_freq.attr,
+ &cpu_down_freq.attr,
+ &up_nr_cpus.attr,
+ /* priority: hotplug_lock > max_cpu_lock */
+ &max_cpu_lock.attr,
+ &hotplug_lock.attr,
+ &dvfs_debug.attr,
+ &hotplug_freq_1_1.attr,
+ &hotplug_freq_2_0.attr,
+ &hotplug_freq_2_1.attr,
+ &hotplug_freq_3_0.attr,
+ &hotplug_freq_3_1.attr,
+ &hotplug_freq_4_0.attr,
+ &hotplug_rq_1_1.attr,
+ &hotplug_rq_2_0.attr,
+ &hotplug_rq_2_1.attr,
+ &hotplug_rq_3_0.attr,
+ &hotplug_rq_3_1.attr,
+ &hotplug_rq_4_0.attr,
+ NULL
+};
+
+static struct attribute_group dbs_attr_group = {
+ .attrs = dbs_attributes,
+ .name = "pegasusq",
+};
+
+/************************** sysfs end ************************/
+
+static void cpu_up_work(struct work_struct *work)
+{
+ int cpu;
+ int online = num_online_cpus();
+ int nr_up = dbs_tuners_ins.up_nr_cpus;
+ int hotplug_lock = atomic_read(&g_hotplug_lock);
+ if (hotplug_lock)
+ nr_up = hotplug_lock - online;
+
+ if (online == 1) {
+ printk(KERN_ERR "CPU_UP 3\n");
+ cpu_up(num_possible_cpus() - 1);
+ nr_up -= 1;
+ }
+
+ for_each_cpu_not(cpu, cpu_online_mask) {
+ if (nr_up-- == 0)
+ break;
+ if (cpu == 0)
+ continue;
+ printk(KERN_ERR "CPU_UP %d\n", cpu);
+ cpu_up(cpu);
+ }
+}
+
+static void cpu_down_work(struct work_struct *work)
+{
+ int cpu;
+ int online = num_online_cpus();
+ int nr_down = 1;
+ int hotplug_lock = atomic_read(&g_hotplug_lock);
+
+ if (hotplug_lock)
+ nr_down = online - hotplug_lock;
+
+ for_each_online_cpu(cpu) {
+ if (cpu == 0)
+ continue;
+ printk(KERN_ERR "CPU_DOWN %d\n", cpu);
+ cpu_down(cpu);
+ if (--nr_down == 0)
+ break;
+ }
+}
+
+static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
+{
+#ifndef CONFIG_ARCH_EXYNOS4
+ if (p->cur == p->max)
+ return;
+#endif
+
+ __cpufreq_driver_target(p, freq, CPUFREQ_RELATION_L);
+}
+
+/*
+ * print hotplug debugging info.
+ * which 1 : UP, 0 : DOWN
+ */
+static void debug_hotplug_check(int which, int rq_avg, int freq,
+ struct cpu_usage *usage)
+{
+ int cpu;
+ printk(KERN_ERR "CHECK %s rq %d.%02d freq %d [", which ? "up" : "down",
+ rq_avg / 100, rq_avg % 100, freq);
+ for_each_online_cpu(cpu) {
+ printk(KERN_ERR "(%d, %d), ", cpu, usage->load[cpu]);
+ }
+ printk(KERN_ERR "]\n");
+}
+
+static int check_up(void)
+{
+ int num_hist = hotplug_history->num_hist;
+ struct cpu_usage *usage;
+ int freq, rq_avg;
+ int i;
+ int up_rate = dbs_tuners_ins.cpu_up_rate;
+ int up_freq, up_rq;
+ int min_freq = INT_MAX;
+ int min_rq_avg = INT_MAX;
+ int online;
+ int hotplug_lock = atomic_read(&g_hotplug_lock);
+
+ if (hotplug_lock > 0)
+ return 0;
+
+ online = num_online_cpus();
+ up_freq = hotplug_freq[online - 1][HOTPLUG_UP_INDEX];
+ up_rq = hotplug_rq[online - 1][HOTPLUG_UP_INDEX];
+
+ if (online == num_possible_cpus())
+ return 0;
+ if (dbs_tuners_ins.max_cpu_lock != 0
+ && online >= dbs_tuners_ins.max_cpu_lock)
+ return 0;
+
+ if (num_hist == 0 || num_hist % up_rate)
+ return 0;
+
+ for (i = num_hist - 1; i >= num_hist - up_rate; --i) {
+ usage = &hotplug_history->usage[i];
+
+ freq = usage->freq;
+ rq_avg = usage->rq_avg;
+
+ min_freq = min(min_freq, freq);
+ min_rq_avg = min(min_rq_avg, rq_avg);
+
+ if (dbs_tuners_ins.dvfs_debug)
+ debug_hotplug_check(1, rq_avg, freq, usage);
+ }
+
+ if (min_freq >= up_freq && min_rq_avg > up_rq) {
+ printk(KERN_ERR "[HOTPLUG IN] %s %d>=%d && %d>%d\n",
+ __func__, min_freq, up_freq, min_rq_avg, up_rq);
+ hotplug_history->num_hist = 0;
+ return 1;
+ }
+ return 0;
+}
+
+static int check_down(void)
+{
+ int num_hist = hotplug_history->num_hist;
+ struct cpu_usage *usage;
+ int freq, rq_avg;
+ int i;
+ int down_rate = dbs_tuners_ins.cpu_down_rate;
+ int down_freq, down_rq;
+ int max_freq = 0;
+ int max_rq_avg = 0;
+ int online;
+ int hotplug_lock = atomic_read(&g_hotplug_lock);
+
+ if (hotplug_lock > 0)
+ return 0;
+
+ online = num_online_cpus();
+ down_freq = hotplug_freq[online - 1][HOTPLUG_DOWN_INDEX];
+ down_rq = hotplug_rq[online - 1][HOTPLUG_DOWN_INDEX];
+
+ if (online == 1)
+ return 0;
+
+ if (dbs_tuners_ins.max_cpu_lock != 0
+ && online > dbs_tuners_ins.max_cpu_lock)
+ return 1;
+
+ if (num_hist == 0 || num_hist % down_rate)
+ return 0;
+
+ for (i = num_hist - 1; i >= num_hist - down_rate; --i) {
+ usage = &hotplug_history->usage[i];
+
+ freq = usage->freq;
+ rq_avg = usage->rq_avg;
+
+ max_freq = max(max_freq, freq);
+ max_rq_avg = max(max_rq_avg, rq_avg);
+
+ if (dbs_tuners_ins.dvfs_debug)
+ debug_hotplug_check(0, rq_avg, freq, usage);
+ }
+
+ if (max_freq <= down_freq && max_rq_avg <= down_rq) {
+ printk(KERN_ERR "[HOTPLUG OUT] %s %d<=%d && %d<%d\n",
+ __func__, max_freq, down_freq, max_rq_avg, down_rq);
+ hotplug_history->num_hist = 0;
+ return 1;
+ }
+
+ return 0;
+}
+
+static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
+{
+ unsigned int max_load_freq;
+
+ struct cpufreq_policy *policy;
+ unsigned int j;
+ int num_hist = hotplug_history->num_hist;
+ int max_hotplug_rate = max(dbs_tuners_ins.cpu_up_rate,
+ dbs_tuners_ins.cpu_down_rate);
+ int up_threshold = dbs_tuners_ins.up_threshold;
+
+ policy = this_dbs_info->cur_policy;
+
+ hotplug_history->usage[num_hist].freq = policy->cur;
+ hotplug_history->usage[num_hist].rq_avg = get_nr_run_avg();
+ ++hotplug_history->num_hist;
+
+ /* Get Absolute Load - in terms of freq */
+ max_load_freq = 0;
+
+ for_each_cpu(j, policy->cpus) {
+ struct cpu_dbs_info_s *j_dbs_info;
+ cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
+ cputime64_t prev_wall_time, prev_idle_time, prev_iowait_time;
+ unsigned int idle_time, wall_time, iowait_time;
+ unsigned int load, load_freq;
+ int freq_avg;
+
+ j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
+ prev_wall_time = j_dbs_info->prev_cpu_wall;
+ prev_idle_time = j_dbs_info->prev_cpu_idle;
+ prev_iowait_time = j_dbs_info->prev_cpu_iowait;
+
+ cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
+ cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
+
+ wall_time = (unsigned int) cputime64_sub(cur_wall_time,
+ prev_wall_time);
+ j_dbs_info->prev_cpu_wall = cur_wall_time;
+
+ idle_time = (unsigned int) cputime64_sub(cur_idle_time,
+ prev_idle_time);
+ j_dbs_info->prev_cpu_idle = cur_idle_time;
+
+ iowait_time = (unsigned int) cputime64_sub(cur_iowait_time,
+ prev_iowait_time);
+ j_dbs_info->prev_cpu_iowait = cur_iowait_time;
+
+ if (dbs_tuners_ins.ignore_nice) {
+ cputime64_t cur_nice;
+ unsigned long cur_nice_jiffies;
+
+ cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
+ j_dbs_info->prev_cpu_nice);
+ /*
+ * Assumption: nice time between sampling periods will
+ * be less than 2^32 jiffies for 32 bit sys
+ */
+ cur_nice_jiffies = (unsigned long)
+ cputime64_to_jiffies64(cur_nice);
+
+ j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+ idle_time += jiffies_to_usecs(cur_nice_jiffies);
+ }
+
+ if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time)
+ idle_time -= iowait_time;
+
+ if (unlikely(!wall_time || wall_time < idle_time))
+ continue;
+
+ load = 100 * (wall_time - idle_time) / wall_time;
+ hotplug_history->usage[num_hist].load[j] = load;
+
+ freq_avg = __cpufreq_driver_getavg(policy, j);
+ if (freq_avg <= 0)
+ freq_avg = policy->cur;
+
+ load_freq = load * freq_avg;
+ if (load_freq > max_load_freq)
+ max_load_freq = load_freq;
+ }
+
+ /* Check for CPU hotplug */
+ if (check_up()) {
+ queue_work_on(this_dbs_info->cpu, dvfs_workqueue,
+ &this_dbs_info->up_work);
+ } else if (check_down()) {
+ queue_work_on(this_dbs_info->cpu, dvfs_workqueue,
+ &this_dbs_info->down_work);
+ }
+ if (hotplug_history->num_hist == max_hotplug_rate)
+ hotplug_history->num_hist = 0;
+
+ /* Check for frequency increase */
+ if (policy->cur < FREQ_FOR_RESPONSIVENESS) {
+ up_threshold = UP_THRESHOLD_AT_MIN_FREQ;
+ }
+
+ if (max_load_freq > up_threshold * policy->cur) {
+ int inc = (policy->max * dbs_tuners_ins.freq_step) / 100;
+ int target = min(policy->max, policy->cur + inc);
+ /* If switching to max speed, apply sampling_down_factor */
+ if (policy->cur < policy->max && target == policy->max)
+ this_dbs_info->rate_mult =
+ dbs_tuners_ins.sampling_down_factor;
+ dbs_freq_increase(policy, target);
+ return;
+ }
+
+ /* Check for frequency decrease */
+#ifndef CONFIG_ARCH_EXYNOS4
+ /* if we cannot reduce the frequency anymore, break out early */
+ if (policy->cur == policy->min)
+ return;
+#endif
+
+ /*
+ * The optimal frequency is the frequency that is the lowest that
+ * can support the current CPU usage without triggering the up
+ * policy. To be safe, we focus DOWN_DIFFERENTIAL points under
+ * the threshold.
+ */
+ if (max_load_freq <
+ (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
+ policy->cur) {
+ unsigned int freq_next;
+ unsigned int down_thres;
+
+ freq_next = max_load_freq /
+ (dbs_tuners_ins.up_threshold -
+ dbs_tuners_ins.down_differential);
+
+ /* No longer fully busy, reset rate_mult */
+ this_dbs_info->rate_mult = 1;
+
+ if (freq_next < policy->min)
+ freq_next = policy->min;
+
+
+ down_thres = UP_THRESHOLD_AT_MIN_FREQ
+ - dbs_tuners_ins.down_differential;
+
+ if (freq_next < FREQ_FOR_RESPONSIVENESS
+ && (max_load_freq / freq_next) > down_thres)
+ freq_next = FREQ_FOR_RESPONSIVENESS;
+
+ if (policy->cur == freq_next)
+ return;
+
+ __cpufreq_driver_target(policy, freq_next,
+ CPUFREQ_RELATION_L);
+ }
+}
+
+static void do_dbs_timer(struct work_struct *work)
+{
+ struct cpu_dbs_info_s *dbs_info =
+ container_of(work, struct cpu_dbs_info_s, work.work);
+ unsigned int cpu = dbs_info->cpu;
+ int delay;
+
+ mutex_lock(&dbs_info->timer_mutex);
+
+ dbs_check_cpu(dbs_info);
+ /* We want all CPUs to do sampling nearly on
+ * same jiffy
+ */
+ delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate
+ * dbs_info->rate_mult);
+
+ if (num_online_cpus() > 1)
+ delay -= jiffies % delay;
+
+ queue_delayed_work_on(cpu, dvfs_workqueue, &dbs_info->work, delay);
+ mutex_unlock(&dbs_info->timer_mutex);
+}
+
+static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
+{
+ /* We want all CPUs to do sampling nearly on same jiffy */
+ int delay = usecs_to_jiffies(DEF_START_DELAY * 1000 * 1000
+ + dbs_tuners_ins.sampling_rate);
+ if (num_online_cpus() > 1)
+ delay -= jiffies % delay;
+
+ INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
+ INIT_WORK(&dbs_info->up_work, cpu_up_work);
+ INIT_WORK(&dbs_info->down_work, cpu_down_work);
+
+ queue_delayed_work_on(dbs_info->cpu, dvfs_workqueue,
+ &dbs_info->work, delay + 2 * HZ);
+}
+
+static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
+{
+ cancel_delayed_work_sync(&dbs_info->work);
+ cancel_work_sync(&dbs_info->up_work);
+ cancel_work_sync(&dbs_info->down_work);
+}
+
+static int pm_notifier_call(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ static unsigned int prev_hotplug_lock;
+ switch (event) {
+ case PM_SUSPEND_PREPARE:
+ prev_hotplug_lock = atomic_read(&g_hotplug_lock);
+ atomic_set(&g_hotplug_lock, 1);
+ apply_hotplug_lock();
+ pr_debug("%s enter suspend\n", __func__);
+ return NOTIFY_OK;
+ case PM_POST_RESTORE:
+ case PM_POST_SUSPEND:
+ atomic_set(&g_hotplug_lock, prev_hotplug_lock);
+ if (prev_hotplug_lock)
+ apply_hotplug_lock();
+ prev_hotplug_lock = 0;
+ pr_debug("%s exit suspend\n", __func__);
+ return NOTIFY_OK;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block pm_notifier = {
+ .notifier_call = pm_notifier_call,
+};
+
+static int reboot_notifier_call(struct notifier_block *this,
+ unsigned long code, void *_cmd)
+{
+ atomic_set(&g_hotplug_lock, 1);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block reboot_notifier = {
+ .notifier_call = reboot_notifier_call,
+};
+
+#ifdef CONFIG_HAS_EARLYSUSPEND
+static struct early_suspend early_suspend;
+unsigned int prev_freq_step;
+unsigned int prev_sampling_rate;
+static void cpufreq_pegasusq_early_suspend(struct early_suspend *h)
+{
+ dbs_tuners_ins.early_suspend =
+ atomic_read(&g_hotplug_lock);
+ prev_freq_step = dbs_tuners_ins.freq_step;
+ prev_sampling_rate = dbs_tuners_ins.sampling_rate;
+ dbs_tuners_ins.freq_step = 20;
+ dbs_tuners_ins.sampling_rate *= 4;
+ atomic_set(&g_hotplug_lock, 1);
+ apply_hotplug_lock();
+ stop_rq_work();
+}
+static void cpufreq_pegasusq_late_resume(struct early_suspend *h)
+{
+ atomic_set(&g_hotplug_lock, dbs_tuners_ins.early_suspend);
+ dbs_tuners_ins.early_suspend = -1;
+ dbs_tuners_ins.freq_step = prev_freq_step;
+ dbs_tuners_ins.sampling_rate = prev_sampling_rate;
+ apply_hotplug_lock();
+ start_rq_work();
+}
+#endif
+
+static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ unsigned int cpu = policy->cpu;
+ struct cpu_dbs_info_s *this_dbs_info;
+ unsigned int j;
+ int rc;
+
+ this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ if ((!cpu_online(cpu)) || (!policy->cur))
+ return -EINVAL;
+
+ dbs_tuners_ins.max_freq = policy->max;
+ dbs_tuners_ins.min_freq = policy->min;
+ hotplug_history->num_hist = 0;
+ start_rq_work();
+
+ mutex_lock(&dbs_mutex);
+
+ dbs_enable++;
+ for_each_cpu(j, policy->cpus) {
+ struct cpu_dbs_info_s *j_dbs_info;
+ j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
+ j_dbs_info->cur_policy = policy;
+
+ j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
+ &j_dbs_info->prev_cpu_wall);
+ if (dbs_tuners_ins.ignore_nice) {
+ j_dbs_info->prev_cpu_nice =
+ kstat_cpu(j).cpustat.nice;
+ }
+ }
+ this_dbs_info->cpu = cpu;
+ this_dbs_info->rate_mult = 1;
+ /*
+ * Start the timerschedule work, when this governor
+ * is used for first time
+ */
+ if (dbs_enable == 1) {
+ rc = sysfs_create_group(cpufreq_global_kobject,
+ &dbs_attr_group);
+ if (rc) {
+ mutex_unlock(&dbs_mutex);
+ return rc;
+ }
+
+ min_sampling_rate = MIN_SAMPLING_RATE;
+ dbs_tuners_ins.sampling_rate = DEF_SAMPLING_RATE;
+ dbs_tuners_ins.io_is_busy = 0;
+ }
+ mutex_unlock(&dbs_mutex);
+
+ register_reboot_notifier(&reboot_notifier);
+
+ mutex_init(&this_dbs_info->timer_mutex);
+ dbs_timer_init(this_dbs_info);
+
+#ifdef CONFIG_HAS_EARLYSUSPEND
+ register_early_suspend(&early_suspend);
+#endif
+ break;
+
+ case CPUFREQ_GOV_STOP:
+#ifdef CONFIG_HAS_EARLYSUSPEND
+ unregister_early_suspend(&early_suspend);
+#endif
+
+ dbs_timer_exit(this_dbs_info);
+
+ mutex_lock(&dbs_mutex);
+ mutex_destroy(&this_dbs_info->timer_mutex);
+
+ unregister_reboot_notifier(&reboot_notifier);
+
+ dbs_enable--;
+ mutex_unlock(&dbs_mutex);
+
+ stop_rq_work();
+
+ if (!dbs_enable)
+ sysfs_remove_group(cpufreq_global_kobject,
+ &dbs_attr_group);
+
+ break;
+
+ case CPUFREQ_GOV_LIMITS:
+ mutex_lock(&this_dbs_info->timer_mutex);
+
+ if (policy->max < this_dbs_info->cur_policy->cur)
+ __cpufreq_driver_target(this_dbs_info->cur_policy,
+ policy->max,
+ CPUFREQ_RELATION_H);
+ else if (policy->min > this_dbs_info->cur_policy->cur)
+ __cpufreq_driver_target(this_dbs_info->cur_policy,
+ policy->min,
+ CPUFREQ_RELATION_L);
+
+ mutex_unlock(&this_dbs_info->timer_mutex);
+ break;
+ }
+ return 0;
+}
+
+static int __init cpufreq_gov_dbs_init(void)
+{
+ int ret;
+
+ ret = init_rq_avg();
+ if (ret)
+ return ret;
+
+ hotplug_history = kzalloc(sizeof(struct cpu_usage_history), GFP_KERNEL);
+ if (!hotplug_history) {
+ pr_err("%s cannot create hotplug history array\n", __func__);
+ ret = -ENOMEM;
+ goto err_hist;
+ }
+
+ dvfs_workqueue = create_workqueue("kpegasusq");
+ if (!dvfs_workqueue) {
+ pr_err("%s cannot create workqueue\n", __func__);
+ ret = -ENOMEM;
+ goto err_queue;
+ }
+
+ ret = cpufreq_register_governor(&cpufreq_gov_pegasusq);
+ if (ret)
+ goto err_reg;
+
+#ifdef CONFIG_HAS_EARLYSUSPEND
+ early_suspend.level = EARLY_SUSPEND_LEVEL_DISABLE_FB;
+ early_suspend.suspend = cpufreq_pegasusq_early_suspend;
+ early_suspend.resume = cpufreq_pegasusq_late_resume;
+#endif
+
+ return ret;
+
+err_reg:
+ destroy_workqueue(dvfs_workqueue);
+err_queue:
+ kfree(hotplug_history);
+err_hist:
+ kfree(rq_data);
+ return ret;
+}
+
+static void __exit cpufreq_gov_dbs_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_pegasusq);
+ destroy_workqueue(dvfs_workqueue);
+ kfree(hotplug_history);
+ kfree(rq_data);
+}
+
+MODULE_AUTHOR("ByungChang Cha <bc.cha@samsung.com>");
+MODULE_DESCRIPTION("'cpufreq_pegasusq' - A dynamic cpufreq/cpuhotplug governor");
+MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PEGASUSQ
+fs_initcall(cpufreq_gov_dbs_init);
+#else
+module_init(cpufreq_gov_dbs_init);
+#endif
+module_exit(cpufreq_gov_dbs_exit);
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index faf7c52..c315ec9 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -317,6 +317,27 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb,
return 0;
}
+static int cpufreq_stats_create_table_cpu(unsigned int cpu)
+{
+ struct cpufreq_policy *policy;
+ struct cpufreq_frequency_table *table;
+ int ret = -ENODEV;
+
+ policy = cpufreq_cpu_get(cpu);
+ if (!policy)
+ return -ENODEV;
+
+ table = cpufreq_frequency_get_table(cpu);
+ if (!table)
+ goto out;
+
+ ret = cpufreq_stats_create_table(policy, table);
+
+out:
+ cpufreq_cpu_put(policy);
+ return ret;
+}
+
static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
@@ -335,6 +356,10 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
case CPU_DEAD_FROZEN:
cpufreq_stats_free_table(cpu);
break;
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
+ cpufreq_stats_create_table_cpu(cpu);
+ break;
}
return NOTIFY_OK;
}
diff --git a/drivers/cpufreq/dvfs_monitor.c b/drivers/cpufreq/dvfs_monitor.c
new file mode 100644
index 0000000..e1e02b4
--- /dev/null
+++ b/drivers/cpufreq/dvfs_monitor.c
@@ -0,0 +1,236 @@
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/spinlock.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/atomic.h>
+#include <linux/tick.h>
+
+struct cpufreq_load_data {
+ cputime64_t prev_idle;
+ cputime64_t prev_wall;
+ unsigned char load;
+};
+
+struct dvfs_data {
+ atomic_t opened;
+ atomic_t num_events;
+ unsigned char cpus[NR_CPUS];
+ unsigned int prev_freq[NR_CPUS];
+ unsigned int freq[NR_CPUS];
+ struct cpufreq_load_data load_data[NR_CPUS];
+ wait_queue_head_t wait_queue;
+ spinlock_t load_lock;
+};
+
+static struct dvfs_data *dvfs_info;
+
+static void init_dvfs_mon(void)
+{
+ int cpu;
+ int cur_freq = cpufreq_get(0);
+
+ for_each_possible_cpu(cpu) {
+ dvfs_info->cpus[cpu] = cpu_online(cpu);
+ dvfs_info->freq[cpu] = cur_freq;
+ }
+ atomic_set(&dvfs_info->num_events, 1);
+}
+
+static void calculate_load(void)
+{
+ int cpu;
+ cputime64_t cur_wall, cur_idle;
+ cputime64_t prev_wall, prev_idle;
+ unsigned int wall_time, idle_time;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dvfs_info->load_lock, flags);
+ for_each_online_cpu(cpu) {
+ cur_idle = get_cpu_idle_time_us(cpu, &cur_wall);
+ prev_idle = dvfs_info->load_data[cpu].prev_idle;
+ prev_wall = dvfs_info->load_data[cpu].prev_wall;
+
+ dvfs_info->load_data[cpu].prev_idle = cur_idle;
+ dvfs_info->load_data[cpu].prev_wall = cur_wall;
+
+ idle_time = (unsigned int)cputime64_sub(cur_idle, prev_idle);
+ wall_time = (unsigned int)cputime64_sub(cur_wall, prev_wall);
+
+ if (wall_time < idle_time) {
+ pr_err("%s walltime < idletime\n", __func__);
+ dvfs_info->load_data[cpu].load = 0;
+ }
+
+ dvfs_info->load_data[cpu].load = (wall_time - idle_time) * 100
+ / wall_time;
+ }
+ spin_unlock_irqrestore(&dvfs_info->load_lock, flags);
+ return;
+}
+
+static int dvfs_monitor_trans(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct cpufreq_freqs *freq = data;
+
+ if (val != CPUFREQ_POSTCHANGE)
+ return 0;
+
+ if (freq->new == freq->old)
+ return 0;
+
+ dvfs_info->prev_freq[freq->cpu] = freq->old;
+ dvfs_info->freq[freq->cpu] = freq->new;
+
+ calculate_load();
+
+ atomic_inc(&dvfs_info->num_events);
+ wake_up_interruptible(&dvfs_info->wait_queue);
+
+ return 0;
+}
+
+static int __cpuinit dvfs_monitor_hotplug(struct notifier_block *nb,
+ unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ int cpu_status = 0;
+
+ switch (action) {
+ case CPU_ONLINE:
+ cpu_status = 1;
+ break;
+ case CPU_DOWN_PREPARE:
+ cpu_status = 0;
+ break;
+ default:
+ return NOTIFY_OK;
+ }
+
+ dvfs_info->cpus[cpu] = cpu_status;
+ atomic_inc(&dvfs_info->num_events);
+ calculate_load();
+ wake_up_interruptible(&dvfs_info->wait_queue);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block notifier_trans_block = {
+ .notifier_call = dvfs_monitor_trans,
+};
+
+static struct notifier_block notifier_hotplug_block __refdata = {
+ .notifier_call = dvfs_monitor_hotplug,
+ .priority = 1,
+};
+
+static int dvfs_mon_open(struct inode *inode, struct file *file)
+{
+ int ret = 0;
+
+ if (atomic_xchg(&dvfs_info->opened, 1) != 0)
+ return -EBUSY;
+
+ init_dvfs_mon();
+ ret = cpufreq_register_notifier(&notifier_trans_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ if (ret)
+ return ret;
+
+ register_hotcpu_notifier(&notifier_hotplug_block);
+
+ return 0;
+}
+
+static int dvfs_mon_release(struct inode *inode, struct file *file)
+{
+ int ret = 0;
+
+ atomic_dec(&dvfs_info->opened);
+ ret = cpufreq_unregister_notifier(&notifier_trans_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ unregister_hotcpu_notifier(&notifier_hotplug_block);
+
+ return ret;
+}
+
+static ssize_t dvfs_mon_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long long t;
+ unsigned long nanosec_rem;
+ int freq, prev_freq;
+ char cpu_status[NR_CPUS * 8 + 1];
+ char temp[3];
+ int i;
+
+ wait_event_interruptible(dvfs_info->wait_queue,
+ atomic_read(&dvfs_info->num_events));
+
+ atomic_set(&dvfs_info->num_events, 0);
+
+ /* for now, assume that all cores run on same speed */
+ freq = dvfs_info->freq[0];
+ prev_freq = dvfs_info->prev_freq[0];
+ dvfs_info->prev_freq[0] = freq;
+
+ memset(cpu_status, 0, sizeof(cpu_status));
+ for (i = 0; i != num_possible_cpus(); ++i) {
+ unsigned char load = dvfs_info->cpus[i] ?
+ dvfs_info->load_data[i].load : 0;
+ sprintf(temp, "(%d,%3d),", dvfs_info->cpus[i], load);
+ strcat(cpu_status, temp);
+ }
+
+ t = cpu_clock(0);
+ nanosec_rem = do_div(t, 1000000000);
+
+ return sprintf(buf, "%lu.%06lu,%s%d,%d\n",
+ (unsigned long) t, nanosec_rem / 1000,
+ cpu_status, prev_freq, freq);
+}
+
+static const struct file_operations dvfs_mon_operations = {
+ .read = dvfs_mon_read,
+ .open = dvfs_mon_open,
+ .release = dvfs_mon_release,
+};
+
+static int __init dvfs_monitor_init(void)
+{
+ dvfs_info = kzalloc(sizeof(struct dvfs_data), GFP_KERNEL);
+ if (dvfs_info == NULL) {
+ pr_err("[DVFS_MON] cannot allocate memory\n");
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&dvfs_info->load_lock);
+
+ init_waitqueue_head(&dvfs_info->wait_queue);
+
+ proc_create("dvfs_mon", S_IRUSR, NULL, &dvfs_mon_operations);
+
+ return 0;
+}
+late_initcall(dvfs_monitor_init);
+
+static void __exit dvfs_monitor_exit(void)
+{
+ kfree(dvfs_info);
+ return;
+}
+module_exit(dvfs_monitor_exit);
+
+MODULE_AUTHOR("ByungChang Cha <bc.cha@samsung.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("DVFS Monitoring proc file");