Merge branch 'x86/cache' into perf/core, to pick up fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2018-10-23 12:30:19 +02:00
316 changed files with 2969 additions and 1834 deletions

View File

@@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c)
static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
/* AMD errata T13 (order #21922) */
if ((c->x86 == 6)) {
if (c->x86 == 6) {
/* Duron Rev A0 */
if (c->x86_model == 3 && c->x86_stepping == 0)
size = 64;

View File

@@ -485,9 +485,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
size_t tsize;
if (is_llc_occupancy_enabled()) {
d->rmid_busy_llc = kcalloc(BITS_TO_LONGS(r->num_rmid),
sizeof(unsigned long),
GFP_KERNEL);
d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
@@ -496,7 +494,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
tsize = sizeof(*d->mbm_total);
d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
if (!d->mbm_total) {
kfree(d->rmid_busy_llc);
bitmap_free(d->rmid_busy_llc);
return -ENOMEM;
}
}
@@ -504,7 +502,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
tsize = sizeof(*d->mbm_local);
d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
if (!d->mbm_local) {
kfree(d->rmid_busy_llc);
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
return -ENOMEM;
}
@@ -610,9 +608,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cancel_delayed_work(&d->cqm_limbo);
}
/*
* rdt_domain "d" is going to be freed below, so clear
* its pointer from pseudo_lock_region struct.
*/
if (d->plr)
d->plr->d = NULL;
kfree(d->ctrl_val);
kfree(d->mbps_val);
kfree(d->rmid_busy_llc);
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
kfree(d);

View File

@@ -529,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
u32 _cbm, int closid, bool exclusive);
unsigned long cbm, int closid, bool exclusive);
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
u32 cbm);
unsigned long cbm);
enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
int rdtgroup_tasks_assigned(struct rdtgroup *r);
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm);
bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
int rdt_pseudo_lock_init(void);
void rdt_pseudo_lock_release(void);

View File

@@ -404,8 +404,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
for_each_alloc_enabled_rdt_resource(r)
seq_printf(s, "%s:uninitialized\n", r->name);
} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
seq_printf(s, "%s:%d=%x\n", rdtgrp->plr->r->name,
rdtgrp->plr->d->id, rdtgrp->plr->cbm);
if (!rdtgrp->plr->d) {
rdt_last_cmd_clear();
rdt_last_cmd_puts("Cache domain offline\n");
ret = -ENODEV;
} else {
seq_printf(s, "%s:%d=%x\n",
rdtgrp->plr->r->name,
rdtgrp->plr->d->id,
rdtgrp->plr->cbm);
}
} else {
closid = rdtgrp->closid;
for_each_alloc_enabled_rdt_resource(r) {

View File

@@ -789,25 +789,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
/**
* rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
* @d: RDT domain
* @_cbm: CBM to test
* @cbm: CBM to test
*
* @d represents a cache instance and @_cbm a capacity bitmask that is
* considered for it. Determine if @_cbm overlaps with any existing
* @d represents a cache instance and @cbm a capacity bitmask that is
* considered for it. Determine if @cbm overlaps with any existing
* pseudo-locked region on @d.
*
* Return: true if @_cbm overlaps with pseudo-locked region on @d, false
* @cbm is unsigned long, even if only 32 bits are used, to make the
* bitmap functions work correctly.
*
* Return: true if @cbm overlaps with pseudo-locked region on @d, false
* otherwise.
*/
bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm)
bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
{
unsigned long *cbm = (unsigned long *)&_cbm;
unsigned long *cbm_b;
unsigned int cbm_len;
unsigned long cbm_b;
if (d->plr) {
cbm_len = d->plr->r->cache.cbm_len;
cbm_b = (unsigned long *)&d->plr->cbm;
if (bitmap_intersects(cbm, cbm_b, cbm_len))
cbm_b = d->plr->cbm;
if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
return true;
}
return false;
@@ -1172,6 +1174,11 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
goto out;
}
if (!plr->d) {
ret = -ENODEV;
goto out;
}
plr->thread_done = 0;
cpu = cpumask_first(&plr->d->cpu_mask);
if (!cpu_online(cpu)) {
@@ -1236,7 +1243,7 @@ static ssize_t pseudo_lock_measure_trigger(struct file *file,
buf[buf_size] = '\0';
ret = kstrtoint(buf, 10, &sel);
if (ret == 0) {
if (sel != 1)
if (sel != 1 && sel != 2 && sel != 3)
return -EINVAL;
ret = debugfs_file_get(file->f_path.dentry);
if (ret)
@@ -1492,6 +1499,11 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
plr = rdtgrp->plr;
if (!plr->d) {
mutex_unlock(&rdtgroup_mutex);
return -ENODEV;
}
/*
* Task is required to run with affinity to the cpus associated
* with the pseudo-locked region. If this is not the case the task

View File

@@ -268,17 +268,27 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
struct rdtgroup *rdtgrp;
struct cpumask *mask;
int ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (rdtgrp) {
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
cpumask_pr_args(&rdtgrp->plr->d->cpu_mask));
else
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
if (!rdtgrp->plr->d) {
rdt_last_cmd_clear();
rdt_last_cmd_puts("Cache domain offline\n");
ret = -ENODEV;
} else {
mask = &rdtgrp->plr->d->cpu_mask;
seq_printf(s, is_cpu_list(of) ?
"%*pbl\n" : "%*pb\n",
cpumask_pr_args(mask));
}
} else {
seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
cpumask_pr_args(&rdtgrp->cpu_mask));
}
} else {
ret = -ENOENT;
}
@@ -961,7 +971,78 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
}
/**
* rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
* rdt_cdp_peer_get - Retrieve CDP peer if it exists
* @r: RDT resource to which RDT domain @d belongs
* @d: Cache instance for which a CDP peer is requested
* @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
* Used to return the result.
* @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
* Used to return the result.
*
* RDT resources are managed independently and by extension the RDT domains
* (RDT resource instances) are managed independently also. The Code and
* Data Prioritization (CDP) RDT resources, while managed independently,
* could refer to the same underlying hardware. For example,
* RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
*
* When provided with an RDT resource @r and an instance of that RDT
* resource @d rdt_cdp_peer_get() will return if there is a peer RDT
* resource and the exact instance that shares the same hardware.
*
* Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
* If a CDP peer was found, @r_cdp will point to the peer RDT resource
* and @d_cdp will point to the peer RDT domain.
*/
static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
struct rdt_resource **r_cdp,
struct rdt_domain **d_cdp)
{
struct rdt_resource *_r_cdp = NULL;
struct rdt_domain *_d_cdp = NULL;
int ret = 0;
switch (r->rid) {
case RDT_RESOURCE_L3DATA:
_r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
break;
case RDT_RESOURCE_L3CODE:
_r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA];
break;
case RDT_RESOURCE_L2DATA:
_r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE];
break;
case RDT_RESOURCE_L2CODE:
_r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA];
break;
default:
ret = -ENOENT;
goto out;
}
/*
* When a new CPU comes online and CDP is enabled then the new
* RDT domains (if any) associated with both CDP RDT resources
* are added in the same CPU online routine while the
* rdtgroup_mutex is held. It should thus not happen for one
* RDT domain to exist and be associated with its RDT CDP
* resource but there is no RDT domain associated with the
* peer RDT CDP resource. Hence the WARN.
*/
_d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
if (WARN_ON(!_d_cdp)) {
_r_cdp = NULL;
ret = -EINVAL;
}
out:
*r_cdp = _r_cdp;
*d_cdp = _d_cdp;
return ret;
}
/**
* __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
* @r: Resource to which domain instance @d belongs.
* @d: The domain instance for which @closid is being tested.
* @cbm: Capacity bitmask being tested.
@@ -975,33 +1056,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
* is false then overlaps with any resource group or hardware entities
* will be considered.
*
* @cbm is unsigned long, even if only 32 bits are used, to make the
* bitmap functions work correctly.
*
* Return: false if CBM does not overlap, true if it does.
*/
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
u32 _cbm, int closid, bool exclusive)
static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
unsigned long cbm, int closid, bool exclusive)
{
unsigned long *cbm = (unsigned long *)&_cbm;
unsigned long *ctrl_b;
enum rdtgrp_mode mode;
unsigned long ctrl_b;
u32 *ctrl;
int i;
/* Check for any overlap with regions used by hardware directly */
if (!exclusive) {
if (bitmap_intersects(cbm,
(unsigned long *)&r->cache.shareable_bits,
r->cache.cbm_len))
ctrl_b = r->cache.shareable_bits;
if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
return true;
}
/* Check for overlap with other resource groups */
ctrl = d->ctrl_val;
for (i = 0; i < closids_supported(); i++, ctrl++) {
ctrl_b = (unsigned long *)ctrl;
ctrl_b = *ctrl;
mode = rdtgroup_mode_by_closid(i);
if (closid_allocated(i) && i != closid &&
mode != RDT_MODE_PSEUDO_LOCKSETUP) {
if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) {
if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
if (exclusive) {
if (mode == RDT_MODE_EXCLUSIVE)
return true;
@@ -1015,6 +1097,41 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
return false;
}
/**
* rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
* @r: Resource to which domain instance @d belongs.
* @d: The domain instance for which @closid is being tested.
* @cbm: Capacity bitmask being tested.
* @closid: Intended closid for @cbm.
* @exclusive: Only check if overlaps with exclusive resource groups
*
* Resources that can be allocated using a CBM can use the CBM to control
* the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
* for overlap. Overlap test is not limited to the specific resource for
* which the CBM is intended though - when dealing with CDP resources that
* share the underlying hardware the overlap check should be performed on
* the CDP resource sharing the hardware also.
*
* Refer to description of __rdtgroup_cbm_overlaps() for the details of the
* overlap test.
*
* Return: true if CBM overlap detected, false if there is no overlap
*/
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
unsigned long cbm, int closid, bool exclusive)
{
struct rdt_resource *r_cdp;
struct rdt_domain *d_cdp;
if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
return true;
if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
return false;
return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
}
/**
* rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
*
@@ -1138,15 +1255,18 @@ out:
* computed by first dividing the total cache size by the CBM length to
* determine how many bytes each bit in the bitmask represents. The result
* is multiplied with the number of bits set in the bitmask.
*
* @cbm is unsigned long, even if only 32 bits are used to make the
* bitmap functions work correctly.
*/
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
struct rdt_domain *d, u32 cbm)
struct rdt_domain *d, unsigned long cbm)
{
struct cpu_cacheinfo *ci;
unsigned int size = 0;
int num_b, i;
num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len);
num_b = bitmap_weight(&cbm, r->cache.cbm_len);
ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
for (i = 0; i < ci->num_leaves; i++) {
if (ci->info_list[i].level == r->cache_level) {
@@ -1172,6 +1292,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
struct rdt_resource *r;
struct rdt_domain *d;
unsigned int size;
int ret = 0;
bool sep;
u32 ctrl;
@@ -1182,11 +1303,18 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
}
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
seq_printf(s, "%*s:", max_name_width, rdtgrp->plr->r->name);
size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
rdtgrp->plr->d,
rdtgrp->plr->cbm);
seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
if (!rdtgrp->plr->d) {
rdt_last_cmd_clear();
rdt_last_cmd_puts("Cache domain offline\n");
ret = -ENODEV;
} else {
seq_printf(s, "%*s:", max_name_width,
rdtgrp->plr->r->name);
size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
rdtgrp->plr->d,
rdtgrp->plr->cbm);
seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
}
goto out;
}
@@ -1216,7 +1344,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
out:
rdtgroup_kn_unlock(of->kn);
return 0;
return ret;
}
/* rdtgroup information files for one cache resource. */
@@ -2350,13 +2478,16 @@ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
*/
static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
{
struct rdt_resource *r_cdp = NULL;
struct rdt_domain *d_cdp = NULL;
u32 used_b = 0, unused_b = 0;
u32 closid = rdtgrp->closid;
struct rdt_resource *r;
unsigned long tmp_cbm;
enum rdtgrp_mode mode;
struct rdt_domain *d;
u32 peer_ctl, *ctrl;
int i, ret;
u32 *ctrl;
for_each_alloc_enabled_rdt_resource(r) {
/*
@@ -2366,6 +2497,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
if (r->rid == RDT_RESOURCE_MBA)
continue;
list_for_each_entry(d, &r->domains, list) {
rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
d->have_new_ctrl = false;
d->new_ctrl = r->cache.shareable_bits;
used_b = r->cache.shareable_bits;
@@ -2375,9 +2507,19 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
mode = rdtgroup_mode_by_closid(i);
if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
break;
used_b |= *ctrl;
/*
* If CDP is active include peer
* domain's usage to ensure there
* is no overlap with an exclusive
* group.
*/
if (d_cdp)
peer_ctl = d_cdp->ctrl_val[i];
else
peer_ctl = 0;
used_b |= *ctrl | peer_ctl;
if (mode == RDT_MODE_SHAREABLE)
d->new_ctrl |= *ctrl;
d->new_ctrl |= *ctrl | peer_ctl;
}
}
if (d->plr && d->plr->cbm > 0)
@@ -2390,9 +2532,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
* modify the CBM based on system availability.
*/
cbm_ensure_valid(&d->new_ctrl, r);
if (bitmap_weight((unsigned long *) &d->new_ctrl,
r->cache.cbm_len) <
r->cache.min_cbm_bits) {
/*
* Assign the u32 CBM to an unsigned long to ensure
* that bitmap_weight() does not access out-of-bound
* memory.
*/
tmp_cbm = d->new_ctrl;
if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
r->cache.min_cbm_bits) {
rdt_last_cmd_printf("no space on %s:%d\n",
r->name, d->id);
return -ENOSPC;
@@ -2795,6 +2942,13 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
seq_puts(seq, ",cdp");
if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
seq_puts(seq, ",cdpl2");
if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
seq_puts(seq, ",mba_MBps");
return 0;
}

View File

@@ -26,6 +26,7 @@
#include <asm/apic.h>
#include <asm/intel-family.h>
#include <asm/i8259.h>
#include <asm/uv/uv.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -1433,6 +1434,9 @@ void __init tsc_early_init(void)
{
if (!boot_cpu_has(X86_FEATURE_TSC))
return;
/* Don't change UV TSC multi-chassis synchronization */
if (is_early_uv_system())
return;
if (!determine_cpu_tsc_frequencies(true))
return;
loops_per_jiffy = get_loops_per_jiffy();