PM / devfreq: Introduce a memory-latency governor
Use performance counters to detect the memory latency sensitivity of CPU workloads and vote for higher DDR frequency if required. Change-Id: Ie77a3523bc5713fc0315bd0abc3913f485a96e0e Signed-off-by: Rohit Gupta <rohgup@codeaurora.org> [avajid@codeaurora.org: updated attr definitions, removed exclude_idle flag and made minor styling changes] Signed-off-by: Amir Vajid <avajid@codeaurora.org>
This commit is contained in:
@@ -83,6 +83,15 @@ config QCOM_BIMC_BWMON
|
||||
has the capability to raise an IRQ when the count exceeds a
|
||||
programmable limit.
|
||||
|
||||
config ARM_MEMLAT_MON
|
||||
tristate "ARM CPU Memory Latency monitor hardware"
|
||||
depends on ARCH_QCOM
|
||||
help
|
||||
The PMU present on these ARM cores allow for the use of counters to
|
||||
monitor the memory latency characteristics of an ARM CPU workload.
|
||||
This driver uses these counters to implement the APIs needed by
|
||||
the mem_latency devfreq governor.
|
||||
|
||||
config DEVFREQ_GOV_QCOM_BW_HWMON
|
||||
tristate "HW monitor based governor for device BW"
|
||||
depends on QCOM_BIMC_BWMON
|
||||
@@ -102,6 +111,16 @@ config DEVFREQ_GOV_QCOM_CACHE_HWMON
|
||||
it can conflict with existing profiling tools. This governor is
|
||||
unlikely to be useful for other devices.
|
||||
|
||||
config DEVFREQ_GOV_MEMLAT
|
||||
tristate "HW monitor based governor for device BW"
|
||||
depends on ARM_MEMLAT_MON
|
||||
help
|
||||
HW monitor based governor for device to DDR bandwidth voting.
|
||||
This governor sets the CPU BW vote based on stats obtained from memalat
|
||||
monitor if it determines that a workload is memory latency bound. Since
|
||||
this uses target specific counters it can conflict with existing profiling
|
||||
tools.
|
||||
|
||||
comment "DEVFREQ Drivers"
|
||||
|
||||
config ARM_EXYNOS_BUS_DEVFREQ
|
||||
|
@@ -7,8 +7,10 @@ obj-$(CONFIG_DEVFREQ_GOV_POWERSAVE) += governor_powersave.o
|
||||
obj-$(CONFIG_DEVFREQ_GOV_USERSPACE) += governor_userspace.o
|
||||
obj-$(CONFIG_DEVFREQ_GOV_PASSIVE) += governor_passive.o
|
||||
obj-$(CONFIG_QCOM_BIMC_BWMON) += bimc-bwmon.o
|
||||
obj-$(CONFIG_ARM_MEMLAT_MON) += arm-memlat-mon.o
|
||||
obj-$(CONFIG_DEVFREQ_GOV_QCOM_BW_HWMON) += governor_bw_hwmon.o
|
||||
obj-$(CONFIG_DEVFREQ_GOV_QCOM_CACHE_HWMON) += governor_cache_hwmon.o
|
||||
obj-$(CONFIG_DEVFREQ_GOV_MEMLAT) += governor_memlat.o
|
||||
|
||||
# DEVFREQ Drivers
|
||||
obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o
|
||||
|
314
drivers/devfreq/arm-memlat-mon.c
Normal file
314
drivers/devfreq/arm-memlat-mon.c
Normal file
@@ -0,0 +1,314 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2014-2017, 2019, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "arm-memlat-mon: " fmt
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_irq.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/cpu_pm.h>
|
||||
#include <linux/cpu.h>
|
||||
#include "governor.h"
|
||||
#include "governor_memlat.h"
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
enum ev_index {
|
||||
INST_IDX,
|
||||
CM_IDX,
|
||||
CYC_IDX,
|
||||
NUM_EVENTS
|
||||
};
|
||||
#define INST_EV 0x08
|
||||
#define L2DM_EV 0x17
|
||||
#define CYC_EV 0x11
|
||||
|
||||
struct event_data {
|
||||
struct perf_event *pevent;
|
||||
unsigned long prev_count;
|
||||
};
|
||||
|
||||
struct cpu_pmu_stats {
|
||||
struct event_data events[NUM_EVENTS];
|
||||
ktime_t prev_ts;
|
||||
};
|
||||
|
||||
struct cpu_grp_info {
|
||||
cpumask_t cpus;
|
||||
unsigned int event_ids[NUM_EVENTS];
|
||||
struct cpu_pmu_stats *cpustats;
|
||||
struct memlat_hwmon hw;
|
||||
};
|
||||
|
||||
#define to_cpustats(cpu_grp, cpu) \
|
||||
(&cpu_grp->cpustats[cpu - cpumask_first(&cpu_grp->cpus)])
|
||||
#define to_devstats(cpu_grp, cpu) \
|
||||
(&cpu_grp->hw.core_stats[cpu - cpumask_first(&cpu_grp->cpus)])
|
||||
#define to_cpu_grp(hwmon) container_of(hwmon, struct cpu_grp_info, hw)
|
||||
|
||||
|
||||
static unsigned long compute_freq(struct cpu_pmu_stats *cpustats,
|
||||
unsigned long cyc_cnt)
|
||||
{
|
||||
ktime_t ts;
|
||||
unsigned int diff;
|
||||
unsigned long freq = 0;
|
||||
|
||||
ts = ktime_get();
|
||||
diff = ktime_to_us(ktime_sub(ts, cpustats->prev_ts));
|
||||
if (!diff)
|
||||
diff = 1;
|
||||
cpustats->prev_ts = ts;
|
||||
freq = cyc_cnt;
|
||||
do_div(freq, diff);
|
||||
|
||||
return freq;
|
||||
}
|
||||
|
||||
#define MAX_COUNT_LIM 0xFFFFFFFFFFFFFFFF
|
||||
static inline unsigned long read_event(struct event_data *event)
|
||||
{
|
||||
unsigned long ev_count;
|
||||
u64 total, enabled, running;
|
||||
|
||||
total = perf_event_read_value(event->pevent, &enabled, &running);
|
||||
ev_count = total - event->prev_count;
|
||||
event->prev_count = total;
|
||||
return ev_count;
|
||||
}
|
||||
|
||||
static void read_perf_counters(int cpu, struct cpu_grp_info *cpu_grp)
|
||||
{
|
||||
struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu);
|
||||
struct dev_stats *devstats = to_devstats(cpu_grp, cpu);
|
||||
unsigned long cyc_cnt;
|
||||
|
||||
devstats->inst_count = read_event(&cpustats->events[INST_IDX]);
|
||||
devstats->mem_count = read_event(&cpustats->events[CM_IDX]);
|
||||
cyc_cnt = read_event(&cpustats->events[CYC_IDX]);
|
||||
devstats->freq = compute_freq(cpustats, cyc_cnt);
|
||||
}
|
||||
|
||||
static unsigned long get_cnt(struct memlat_hwmon *hw)
|
||||
{
|
||||
int cpu;
|
||||
struct cpu_grp_info *cpu_grp = to_cpu_grp(hw);
|
||||
|
||||
for_each_cpu(cpu, &cpu_grp->cpus)
|
||||
read_perf_counters(cpu, cpu_grp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void delete_events(struct cpu_pmu_stats *cpustats)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) {
|
||||
cpustats->events[i].prev_count = 0;
|
||||
perf_event_release_kernel(cpustats->events[i].pevent);
|
||||
}
|
||||
}
|
||||
|
||||
static void stop_hwmon(struct memlat_hwmon *hw)
|
||||
{
|
||||
int cpu;
|
||||
struct cpu_grp_info *cpu_grp = to_cpu_grp(hw);
|
||||
struct dev_stats *devstats;
|
||||
|
||||
for_each_cpu(cpu, &cpu_grp->cpus) {
|
||||
delete_events(to_cpustats(cpu_grp, cpu));
|
||||
|
||||
/* Clear governor data */
|
||||
devstats = to_devstats(cpu_grp, cpu);
|
||||
devstats->inst_count = 0;
|
||||
devstats->mem_count = 0;
|
||||
devstats->freq = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static struct perf_event_attr *alloc_attr(void)
|
||||
{
|
||||
struct perf_event_attr *attr;
|
||||
|
||||
attr = kzalloc(sizeof(struct perf_event_attr), GFP_KERNEL);
|
||||
if (!attr)
|
||||
return attr;
|
||||
|
||||
attr->type = PERF_TYPE_RAW;
|
||||
attr->size = sizeof(struct perf_event_attr);
|
||||
attr->pinned = 1;
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
static int set_events(struct cpu_grp_info *cpu_grp, int cpu)
|
||||
{
|
||||
struct perf_event *pevent;
|
||||
struct perf_event_attr *attr;
|
||||
int err, i;
|
||||
struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu);
|
||||
|
||||
/* Allocate an attribute for event initialization */
|
||||
attr = alloc_attr();
|
||||
if (!attr)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) {
|
||||
attr->config = cpu_grp->event_ids[i];
|
||||
pevent = perf_event_create_kernel_counter(attr, cpu, NULL,
|
||||
NULL, NULL);
|
||||
if (IS_ERR(pevent))
|
||||
goto err_out;
|
||||
cpustats->events[i].pevent = pevent;
|
||||
perf_event_enable(pevent);
|
||||
}
|
||||
|
||||
kfree(attr);
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
err = PTR_ERR(pevent);
|
||||
kfree(attr);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int start_hwmon(struct memlat_hwmon *hw)
|
||||
{
|
||||
int cpu, ret = 0;
|
||||
struct cpu_grp_info *cpu_grp = to_cpu_grp(hw);
|
||||
|
||||
for_each_cpu(cpu, &cpu_grp->cpus) {
|
||||
ret = set_events(cpu_grp, cpu);
|
||||
if (ret < 0) {
|
||||
pr_warn("Perf event init failed on CPU%d: %d\n", cpu,
|
||||
ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_mask_from_dev_handle(struct platform_device *pdev,
|
||||
cpumask_t *mask)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct device_node *dev_phandle;
|
||||
struct device *cpu_dev;
|
||||
int cpu, i = 0;
|
||||
int ret = -ENOENT;
|
||||
|
||||
dev_phandle = of_parse_phandle(dev->of_node, "qcom,cpulist", i++);
|
||||
while (dev_phandle) {
|
||||
for_each_possible_cpu(cpu) {
|
||||
cpu_dev = get_cpu_device(cpu);
|
||||
if (cpu_dev && cpu_dev->of_node == dev_phandle) {
|
||||
cpumask_set_cpu(cpu, mask);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dev_phandle = of_parse_phandle(dev->of_node,
|
||||
"qcom,cpulist", i++);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int arm_memlat_mon_driver_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct memlat_hwmon *hw;
|
||||
struct cpu_grp_info *cpu_grp;
|
||||
int cpu, ret;
|
||||
u32 event_id;
|
||||
|
||||
cpu_grp = devm_kzalloc(dev, sizeof(*cpu_grp), GFP_KERNEL);
|
||||
if (!cpu_grp)
|
||||
return -ENOMEM;
|
||||
hw = &cpu_grp->hw;
|
||||
|
||||
hw->dev = dev;
|
||||
hw->of_node = of_parse_phandle(dev->of_node, "qcom,target-dev", 0);
|
||||
if (!hw->of_node) {
|
||||
dev_err(dev, "Couldn't find a target device\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (get_mask_from_dev_handle(pdev, &cpu_grp->cpus)) {
|
||||
dev_err(dev, "CPU list is empty\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
hw->num_cores = cpumask_weight(&cpu_grp->cpus);
|
||||
hw->core_stats = devm_kzalloc(dev, hw->num_cores *
|
||||
sizeof(*(hw->core_stats)), GFP_KERNEL);
|
||||
if (!hw->core_stats)
|
||||
return -ENOMEM;
|
||||
|
||||
cpu_grp->cpustats = devm_kzalloc(dev, hw->num_cores *
|
||||
sizeof(*(cpu_grp->cpustats)), GFP_KERNEL);
|
||||
if (!cpu_grp->cpustats)
|
||||
return -ENOMEM;
|
||||
|
||||
cpu_grp->event_ids[CYC_IDX] = CYC_EV;
|
||||
|
||||
ret = of_property_read_u32(dev->of_node, "qcom,cachemiss-ev",
|
||||
&event_id);
|
||||
if (ret < 0) {
|
||||
dev_dbg(dev, "Cache Miss event not specified. Using def:0x%x\n",
|
||||
L2DM_EV);
|
||||
event_id = L2DM_EV;
|
||||
}
|
||||
cpu_grp->event_ids[CM_IDX] = event_id;
|
||||
|
||||
ret = of_property_read_u32(dev->of_node, "qcom,inst-ev", &event_id);
|
||||
if (ret < 0) {
|
||||
dev_dbg(dev, "Inst event not specified. Using def:0x%x\n",
|
||||
INST_EV);
|
||||
event_id = INST_EV;
|
||||
}
|
||||
cpu_grp->event_ids[INST_IDX] = event_id;
|
||||
|
||||
for_each_cpu(cpu, &cpu_grp->cpus)
|
||||
to_devstats(cpu_grp, cpu)->id = cpu;
|
||||
|
||||
hw->start_hwmon = &start_hwmon;
|
||||
hw->stop_hwmon = &stop_hwmon;
|
||||
hw->get_cnt = &get_cnt;
|
||||
|
||||
ret = register_memlat(dev, hw);
|
||||
if (ret < 0) {
|
||||
pr_err("Mem Latency Gov registration failed: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct of_device_id memlat_match_table[] = {
|
||||
{ .compatible = "qcom,arm-memlat-mon" },
|
||||
{}
|
||||
};
|
||||
|
||||
static struct platform_driver arm_memlat_mon_driver = {
|
||||
.probe = arm_memlat_mon_driver_probe,
|
||||
.driver = {
|
||||
.name = "arm-memlat-mon",
|
||||
.of_match_table = memlat_match_table,
|
||||
},
|
||||
};
|
||||
|
||||
module_platform_driver(arm_memlat_mon_driver);
|
411
drivers/devfreq/governor_memlat.c
Normal file
411
drivers/devfreq/governor_memlat.c
Normal file
@@ -0,0 +1,411 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2015-2017, 2019, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "mem_lat: " fmt
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/devfreq.h>
|
||||
#include "governor.h"
|
||||
#include "governor_memlat.h"
|
||||
|
||||
#include <trace/events/power.h>
|
||||
|
||||
struct memlat_node {
|
||||
unsigned int ratio_ceil;
|
||||
bool mon_started;
|
||||
bool already_zero;
|
||||
struct list_head list;
|
||||
void *orig_data;
|
||||
struct memlat_hwmon *hw;
|
||||
struct devfreq_governor *gov;
|
||||
struct attribute_group *attr_grp;
|
||||
};
|
||||
|
||||
static LIST_HEAD(memlat_list);
|
||||
static DEFINE_MUTEX(list_lock);
|
||||
|
||||
static int use_cnt;
|
||||
static DEFINE_MUTEX(state_lock);
|
||||
|
||||
#define show_attr(name) \
|
||||
static ssize_t name##_show(struct device *dev, \
|
||||
struct device_attribute *attr, char *buf) \
|
||||
{ \
|
||||
struct devfreq *df = to_devfreq(dev); \
|
||||
struct memlat_node *hw = df->data; \
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n", hw->name); \
|
||||
}
|
||||
|
||||
#define store_attr(name, _min, _max) \
|
||||
static ssize_t name##_store(struct device *dev, \
|
||||
struct device_attribute *attr, const char *buf, \
|
||||
size_t count) \
|
||||
{ \
|
||||
struct devfreq *df = to_devfreq(dev); \
|
||||
struct memlat_node *hw = df->data; \
|
||||
int ret; \
|
||||
unsigned int val; \
|
||||
ret = kstrtouint(buf, 10, &val); \
|
||||
if (ret < 0) \
|
||||
return ret; \
|
||||
val = max(val, _min); \
|
||||
val = min(val, _max); \
|
||||
hw->name = val; \
|
||||
return count; \
|
||||
}
|
||||
|
||||
static ssize_t freq_map_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct devfreq *df = to_devfreq(dev);
|
||||
struct memlat_node *n = df->data;
|
||||
struct core_dev_map *map = n->hw->freq_map;
|
||||
unsigned int cnt = 0;
|
||||
|
||||
cnt += scnprintf(buf, PAGE_SIZE, "Core freq (MHz)\tDevice BW\n");
|
||||
|
||||
while (map->core_mhz && cnt < PAGE_SIZE) {
|
||||
cnt += scnprintf(buf + cnt, PAGE_SIZE - cnt, "%15u\t%9u\n",
|
||||
map->core_mhz, map->target_freq);
|
||||
map++;
|
||||
}
|
||||
if (cnt < PAGE_SIZE)
|
||||
cnt += scnprintf(buf + cnt, PAGE_SIZE - cnt, "\n");
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(freq_map);
|
||||
|
||||
static unsigned long core_to_dev_freq(struct memlat_node *node,
|
||||
unsigned long coref)
|
||||
{
|
||||
struct memlat_hwmon *hw = node->hw;
|
||||
struct core_dev_map *map = hw->freq_map;
|
||||
unsigned long freq = 0;
|
||||
|
||||
if (!map)
|
||||
goto out;
|
||||
|
||||
while (map->core_mhz && map->core_mhz < coref)
|
||||
map++;
|
||||
if (!map->core_mhz)
|
||||
map--;
|
||||
freq = map->target_freq;
|
||||
|
||||
out:
|
||||
pr_debug("freq: %lu -> dev: %lu\n", coref, freq);
|
||||
return freq;
|
||||
}
|
||||
|
||||
static struct memlat_node *find_memlat_node(struct devfreq *df)
|
||||
{
|
||||
struct memlat_node *node, *found = NULL;
|
||||
|
||||
mutex_lock(&list_lock);
|
||||
list_for_each_entry(node, &memlat_list, list)
|
||||
if (node->hw->dev == df->dev.parent ||
|
||||
node->hw->of_node == df->dev.parent->of_node) {
|
||||
found = node;
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&list_lock);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
static int start_monitor(struct devfreq *df)
|
||||
{
|
||||
struct memlat_node *node = df->data;
|
||||
struct memlat_hwmon *hw = node->hw;
|
||||
struct device *dev = df->dev.parent;
|
||||
int ret;
|
||||
|
||||
ret = hw->start_hwmon(hw);
|
||||
|
||||
if (ret < 0) {
|
||||
dev_err(dev, "Unable to start HW monitor! (%d)\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
devfreq_monitor_start(df);
|
||||
|
||||
node->mon_started = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void stop_monitor(struct devfreq *df)
|
||||
{
|
||||
struct memlat_node *node = df->data;
|
||||
struct memlat_hwmon *hw = node->hw;
|
||||
|
||||
node->mon_started = false;
|
||||
|
||||
devfreq_monitor_stop(df);
|
||||
hw->stop_hwmon(hw);
|
||||
}
|
||||
|
||||
static int gov_start(struct devfreq *df)
|
||||
{
|
||||
int ret = 0;
|
||||
struct device *dev = df->dev.parent;
|
||||
struct memlat_node *node;
|
||||
struct memlat_hwmon *hw;
|
||||
|
||||
node = find_memlat_node(df);
|
||||
if (!node) {
|
||||
dev_err(dev, "Unable to find HW monitor!\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
hw = node->hw;
|
||||
|
||||
hw->df = df;
|
||||
node->orig_data = df->data;
|
||||
df->data = node;
|
||||
|
||||
if (start_monitor(df))
|
||||
goto err_start;
|
||||
|
||||
ret = sysfs_create_group(&df->dev.kobj, node->attr_grp);
|
||||
if (ret < 0)
|
||||
goto err_sysfs;
|
||||
|
||||
return 0;
|
||||
|
||||
err_sysfs:
|
||||
stop_monitor(df);
|
||||
err_start:
|
||||
df->data = node->orig_data;
|
||||
node->orig_data = NULL;
|
||||
hw->df = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void gov_stop(struct devfreq *df)
|
||||
{
|
||||
struct memlat_node *node = df->data;
|
||||
struct memlat_hwmon *hw = node->hw;
|
||||
|
||||
sysfs_remove_group(&df->dev.kobj, node->attr_grp);
|
||||
stop_monitor(df);
|
||||
df->data = node->orig_data;
|
||||
node->orig_data = NULL;
|
||||
hw->df = NULL;
|
||||
}
|
||||
|
||||
static int devfreq_memlat_get_freq(struct devfreq *df,
|
||||
unsigned long *freq)
|
||||
{
|
||||
int i, lat_dev = 0;
|
||||
struct memlat_node *node = df->data;
|
||||
struct memlat_hwmon *hw = node->hw;
|
||||
unsigned long max_freq = 0;
|
||||
unsigned int ratio;
|
||||
|
||||
hw->get_cnt(hw);
|
||||
|
||||
for (i = 0; i < hw->num_cores; i++) {
|
||||
ratio = hw->core_stats[i].inst_count;
|
||||
|
||||
if (hw->core_stats[i].mem_count)
|
||||
ratio /= hw->core_stats[i].mem_count;
|
||||
|
||||
if (!hw->core_stats[i].inst_count
|
||||
|| !hw->core_stats[i].freq)
|
||||
continue;
|
||||
|
||||
trace_memlat_dev_meas(dev_name(df->dev.parent),
|
||||
hw->core_stats[i].id,
|
||||
hw->core_stats[i].inst_count,
|
||||
hw->core_stats[i].mem_count,
|
||||
hw->core_stats[i].freq, ratio);
|
||||
|
||||
if (ratio <= node->ratio_ceil
|
||||
&& hw->core_stats[i].freq > max_freq) {
|
||||
lat_dev = i;
|
||||
max_freq = hw->core_stats[i].freq;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_freq)
|
||||
max_freq = core_to_dev_freq(node, max_freq);
|
||||
|
||||
if (max_freq || !node->already_zero) {
|
||||
trace_memlat_dev_update(dev_name(df->dev.parent),
|
||||
hw->core_stats[lat_dev].id,
|
||||
hw->core_stats[lat_dev].inst_count,
|
||||
hw->core_stats[lat_dev].mem_count,
|
||||
hw->core_stats[lat_dev].freq,
|
||||
max_freq);
|
||||
}
|
||||
|
||||
node->already_zero = !max_freq;
|
||||
|
||||
*freq = max_freq;
|
||||
return 0;
|
||||
}
|
||||
|
||||
show_attr(ratio_ceil);
|
||||
store_attr(ratio_ceil, 1U, 20000U);
|
||||
static DEVICE_ATTR_RW(ratio_ceil);
|
||||
|
||||
static struct attribute *dev_attr[] = {
|
||||
&dev_attr_ratio_ceil.attr,
|
||||
&dev_attr_freq_map.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group dev_attr_group = {
|
||||
.name = "mem_latency",
|
||||
.attrs = dev_attr,
|
||||
};
|
||||
|
||||
#define MIN_MS 10U
|
||||
#define MAX_MS 500U
|
||||
static int devfreq_memlat_ev_handler(struct devfreq *df,
|
||||
unsigned int event, void *data)
|
||||
{
|
||||
int ret;
|
||||
unsigned int sample_ms;
|
||||
|
||||
switch (event) {
|
||||
case DEVFREQ_GOV_START:
|
||||
sample_ms = df->profile->polling_ms;
|
||||
sample_ms = max(MIN_MS, sample_ms);
|
||||
sample_ms = min(MAX_MS, sample_ms);
|
||||
df->profile->polling_ms = sample_ms;
|
||||
|
||||
ret = gov_start(df);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
dev_dbg(df->dev.parent,
|
||||
"Enabled Memory Latency governor\n");
|
||||
break;
|
||||
|
||||
case DEVFREQ_GOV_STOP:
|
||||
gov_stop(df);
|
||||
dev_dbg(df->dev.parent,
|
||||
"Disabled Memory Latency governor\n");
|
||||
break;
|
||||
|
||||
case DEVFREQ_GOV_INTERVAL:
|
||||
sample_ms = *(unsigned int *)data;
|
||||
sample_ms = max(MIN_MS, sample_ms);
|
||||
sample_ms = min(MAX_MS, sample_ms);
|
||||
devfreq_interval_update(df, &sample_ms);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct devfreq_governor devfreq_gov_memlat = {
|
||||
.name = "mem_latency",
|
||||
.get_target_freq = devfreq_memlat_get_freq,
|
||||
.event_handler = devfreq_memlat_ev_handler,
|
||||
};
|
||||
|
||||
#define NUM_COLS 2
|
||||
static struct core_dev_map *init_core_dev_map(struct device *dev,
|
||||
char *prop_name)
|
||||
{
|
||||
int len, nf, i, j;
|
||||
u32 data;
|
||||
struct core_dev_map *tbl;
|
||||
int ret;
|
||||
|
||||
if (!of_find_property(dev->of_node, prop_name, &len))
|
||||
return NULL;
|
||||
len /= sizeof(data);
|
||||
|
||||
if (len % NUM_COLS || len == 0)
|
||||
return NULL;
|
||||
nf = len / NUM_COLS;
|
||||
|
||||
tbl = devm_kzalloc(dev, (nf + 1) * sizeof(struct core_dev_map),
|
||||
GFP_KERNEL);
|
||||
if (!tbl)
|
||||
return NULL;
|
||||
|
||||
for (i = 0, j = 0; i < nf; i++, j += 2) {
|
||||
ret = of_property_read_u32_index(dev->of_node, prop_name, j,
|
||||
&data);
|
||||
if (ret < 0)
|
||||
return NULL;
|
||||
tbl[i].core_mhz = data / 1000;
|
||||
|
||||
ret = of_property_read_u32_index(dev->of_node, prop_name, j + 1,
|
||||
&data);
|
||||
if (ret < 0)
|
||||
return NULL;
|
||||
tbl[i].target_freq = data;
|
||||
pr_debug("Entry%d CPU:%u, Dev:%u\n", i, tbl[i].core_mhz,
|
||||
tbl[i].target_freq);
|
||||
}
|
||||
tbl[i].core_mhz = 0;
|
||||
|
||||
return tbl;
|
||||
}
|
||||
|
||||
int register_memlat(struct device *dev, struct memlat_hwmon *hw)
|
||||
{
|
||||
int ret = 0;
|
||||
struct memlat_node *node;
|
||||
|
||||
if (!hw->dev && !hw->of_node)
|
||||
return -EINVAL;
|
||||
|
||||
node = devm_kzalloc(dev, sizeof(*node), GFP_KERNEL);
|
||||
if (!node)
|
||||
return -ENOMEM;
|
||||
|
||||
node->gov = &devfreq_gov_memlat;
|
||||
node->attr_grp = &dev_attr_group;
|
||||
|
||||
node->ratio_ceil = 10;
|
||||
node->hw = hw;
|
||||
|
||||
hw->freq_map = init_core_dev_map(dev, "qcom,core-dev-table");
|
||||
if (!hw->freq_map) {
|
||||
dev_err(dev, "Couldn't find the core-dev freq table!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock(&list_lock);
|
||||
list_add_tail(&node->list, &memlat_list);
|
||||
mutex_unlock(&list_lock);
|
||||
|
||||
mutex_lock(&state_lock);
|
||||
if (!use_cnt)
|
||||
ret = devfreq_add_governor(&devfreq_gov_memlat);
|
||||
if (!ret)
|
||||
use_cnt++;
|
||||
mutex_unlock(&state_lock);
|
||||
|
||||
if (!ret)
|
||||
dev_info(dev, "Memory Latency governor registered.\n");
|
||||
else
|
||||
dev_err(dev, "Memory Latency governor registration failed!\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
MODULE_DESCRIPTION("HW monitor based dev DDR bandwidth voting driver");
|
||||
MODULE_LICENSE("GPL v2");
|
81
drivers/devfreq/governor_memlat.h
Normal file
81
drivers/devfreq/governor_memlat.h
Normal file
@@ -0,0 +1,81 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2015-2017, 2019, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _GOVERNOR_MEMLAT_H
|
||||
#define _GOVERNOR_MEMLAT_H
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/devfreq.h>
|
||||
|
||||
/**
|
||||
* struct dev_stats - Device stats
|
||||
* @inst_count: Number of instructions executed.
|
||||
* @mem_count: Number of memory accesses made.
|
||||
* @freq: Effective frequency of the device in the
|
||||
* last interval.
|
||||
*/
|
||||
struct dev_stats {
|
||||
int id;
|
||||
unsigned long inst_count;
|
||||
unsigned long mem_count;
|
||||
unsigned long freq;
|
||||
};
|
||||
|
||||
struct core_dev_map {
|
||||
unsigned int core_mhz;
|
||||
unsigned int target_freq;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct memlat_hwmon - Memory Latency HW monitor info
|
||||
* @start_hwmon: Start the HW monitoring
|
||||
* @stop_hwmon: Stop the HW monitoring
|
||||
* @get_cnt: Return the number of intructions executed,
|
||||
* memory accesses and effective frequency
|
||||
* @dev: Pointer to device that this HW monitor can
|
||||
* monitor.
|
||||
* @of_node: OF node of device that this HW monitor can
|
||||
* monitor.
|
||||
* @df: Devfreq node that this HW monitor is being
|
||||
* used for. NULL when not actively in use and
|
||||
* non-NULL when in use.
|
||||
* @num_cores: Number of cores that are monitored by the
|
||||
* hardware monitor.
|
||||
* @core_stats: Array containing instruction count, memory
|
||||
* accesses and effective frequency for each core.
|
||||
*
|
||||
* One of dev or of_node needs to be specified for a successful registration.
|
||||
*
|
||||
*/
|
||||
struct memlat_hwmon {
|
||||
int (*start_hwmon)(struct memlat_hwmon *hw);
|
||||
void (*stop_hwmon)(struct memlat_hwmon *hw);
|
||||
unsigned long (*get_cnt)(struct memlat_hwmon *hw);
|
||||
struct device *dev;
|
||||
struct device_node *of_node;
|
||||
|
||||
unsigned int num_cores;
|
||||
struct dev_stats *core_stats;
|
||||
|
||||
struct devfreq *df;
|
||||
struct core_dev_map *freq_map;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DEVFREQ_GOV_MEMLAT
|
||||
int register_memlat(struct device *dev, struct memlat_hwmon *hw);
|
||||
int update_memlat(struct memlat_hwmon *hw);
|
||||
#else
|
||||
static inline int register_memlat(struct device *dev,
|
||||
struct memlat_hwmon *hw)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int update_memlat(struct memlat_hwmon *hw)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _GOVERNOR_BW_HWMON_H */
|
@@ -628,6 +628,74 @@ TRACE_EVENT(cache_hwmon_update,
|
||||
TP_printk("dev=%s freq=%lu", __get_str(name), __entry->freq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(memlat_dev_meas,
|
||||
|
||||
TP_PROTO(const char *name, unsigned int dev_id, unsigned long inst,
|
||||
unsigned long mem, unsigned long freq, unsigned int ratio),
|
||||
|
||||
TP_ARGS(name, dev_id, inst, mem, freq, ratio),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(name, name)
|
||||
__field(unsigned int, dev_id)
|
||||
__field(unsigned long, inst)
|
||||
__field(unsigned long, mem)
|
||||
__field(unsigned long, freq)
|
||||
__field(unsigned int, ratio)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(name, name);
|
||||
__entry->dev_id = dev_id;
|
||||
__entry->inst = inst;
|
||||
__entry->mem = mem;
|
||||
__entry->freq = freq;
|
||||
__entry->ratio = ratio;
|
||||
),
|
||||
|
||||
TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, ratio=%u",
|
||||
__get_str(name),
|
||||
__entry->dev_id,
|
||||
__entry->inst,
|
||||
__entry->mem,
|
||||
__entry->freq,
|
||||
__entry->ratio)
|
||||
);
|
||||
|
||||
TRACE_EVENT(memlat_dev_update,
|
||||
|
||||
TP_PROTO(const char *name, unsigned int dev_id, unsigned long inst,
|
||||
unsigned long mem, unsigned long freq, unsigned long vote),
|
||||
|
||||
TP_ARGS(name, dev_id, inst, mem, freq, vote),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__string(name, name)
|
||||
__field(unsigned int, dev_id)
|
||||
__field(unsigned long, inst)
|
||||
__field(unsigned long, mem)
|
||||
__field(unsigned long, freq)
|
||||
__field(unsigned long, vote)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__assign_str(name, name);
|
||||
__entry->dev_id = dev_id;
|
||||
__entry->inst = inst;
|
||||
__entry->mem = mem;
|
||||
__entry->freq = freq;
|
||||
__entry->vote = vote;
|
||||
),
|
||||
|
||||
TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, vote=%lu",
|
||||
__get_str(name),
|
||||
__entry->dev_id,
|
||||
__entry->inst,
|
||||
__entry->mem,
|
||||
__entry->freq,
|
||||
__entry->vote)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_POWER_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
Reference in New Issue
Block a user