Merge 1e6d1d9646 ("Merge tag 'x86_core_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip") into android-mainline

Steps on the way to 5.10-rc1

Change-Id: Ifce134a16bfdfa5f729c6b32b50bc06e11781997
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2020-10-21 08:44:33 +02:00
19 changed files with 247 additions and 206 deletions

View File

@@ -577,7 +577,7 @@
loops can be debugged more effectively on production loops can be debugged more effectively on production
systems. systems.
clearcpuid=BITNUM [X86] clearcpuid=BITNUM[,BITNUM...] [X86]
Disable CPUID feature X for the kernel. See Disable CPUID feature X for the kernel. See
arch/x86/include/asm/cpufeatures.h for the valid bit arch/x86/include/asm/cpufeatures.h for the valid bit
numbers. Note the Linux specific bits are not necessarily numbers. Note the Linux specific bits are not necessarily

View File

@@ -138,6 +138,18 @@ with respect to allocation:
non-linear. This field is purely informational non-linear. This field is purely informational
only. only.
"thread_throttle_mode":
Indicator on Intel systems of how tasks running on threads
of a physical core are throttled in cases where they
request different memory bandwidth percentages:
"max":
the smallest percentage is applied
to all threads
"per-thread":
bandwidth percentages are directly applied to
the threads running on the core
If RDT monitoring is available there will be an "L3_MON" directory If RDT monitoring is available there will be an "L3_MON" directory
with the following files: with the following files:
@@ -364,8 +376,10 @@ to the next control step available on the hardware.
The bandwidth throttling is a core specific mechanism on some of Intel The bandwidth throttling is a core specific mechanism on some of Intel
SKUs. Using a high bandwidth and a low bandwidth setting on two threads SKUs. Using a high bandwidth and a low bandwidth setting on two threads
sharing a core will result in both threads being throttled to use the sharing a core may result in both threads being throttled to use the
low bandwidth. The fact that Memory bandwidth allocation(MBA) is a core low bandwidth (see "thread_throttle_mode").
The fact that Memory bandwidth allocation(MBA) may be a core
specific mechanism where as memory bandwidth monitoring(MBM) is done at specific mechanism where as memory bandwidth monitoring(MBM) is done at
the package level may lead to confusion when users try to apply control the package level may lead to confusion when users try to apply control
via the MBA and then monitor the bandwidth to see if the controls are via the MBA and then monitor the bandwidth to see if the controls are

View File

@@ -6,7 +6,6 @@
#include <asm/percpu.h> #include <asm/percpu.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
#include <asm/inst.h>
/* /*

View File

@@ -288,6 +288,7 @@
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */

View File

@@ -23,6 +23,7 @@
#include <linux/syscore_ops.h> #include <linux/syscore_ops.h>
#include <linux/pgtable.h> #include <linux/pgtable.h>
#include <asm/cmdline.h>
#include <asm/stackprotector.h> #include <asm/stackprotector.h>
#include <asm/perf_event.h> #include <asm/perf_event.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
@@ -1220,6 +1221,59 @@ static void detect_nopl(void)
#endif #endif
} }
/*
* We parse cpu parameters early because fpu__init_system() is executed
* before parse_early_param().
*/
static void __init cpu_parse_early_param(void)
{
char arg[128];
char *argptr = arg;
int arglen, res, bit;
#ifdef CONFIG_X86_32
if (cmdline_find_option_bool(boot_command_line, "no387"))
#ifdef CONFIG_MATH_EMULATION
setup_clear_cpu_cap(X86_FEATURE_FPU);
#else
pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
#endif
if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
setup_clear_cpu_cap(X86_FEATURE_FXSR);
#endif
if (cmdline_find_option_bool(boot_command_line, "noxsave"))
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
if (arglen <= 0)
return;
pr_info("Clearing CPUID bits:");
do {
res = get_option(&argptr, &bit);
if (res == 0 || res == 3)
break;
/* If the argument was too long, the last bit may be cut off */
if (res == 1 && arglen >= sizeof(arg))
break;
if (bit >= 0 && bit < NCAPINTS * 32) {
pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
setup_clear_cpu_cap(bit);
}
} while (res == 2);
pr_cont("\n");
}
/* /*
* Do minimum CPU detection early. * Do minimum CPU detection early.
* Fields really needed: vendor, cpuid_level, family, model, mask, * Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -1255,6 +1309,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
get_cpu_cap(c); get_cpu_cap(c);
get_cpu_address_sizes(c); get_cpu_address_sizes(c);
setup_force_cpu_cap(X86_FEATURE_CPUID); setup_force_cpu_cap(X86_FEATURE_CPUID);
cpu_parse_early_param();
if (this_cpu->c_early_init) if (this_cpu->c_early_init)
this_cpu->c_early_init(c); this_cpu->c_early_init(c);

View File

@@ -70,6 +70,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
{ X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
{ X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES },
{ X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA },
{} {}
}; };

View File

@@ -168,6 +168,7 @@ struct rdt_resource rdt_resources_all[] = {
.name = "MB", .name = "MB",
.domains = domain_init(RDT_RESOURCE_MBA), .domains = domain_init(RDT_RESOURCE_MBA),
.cache_level = 3, .cache_level = 3,
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u", .format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB, .fflags = RFTYPE_RES_MB,
}, },
@@ -254,22 +255,30 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
{ {
union cpuid_0x10_3_eax eax; union cpuid_0x10_3_eax eax;
union cpuid_0x10_x_edx edx; union cpuid_0x10_x_edx edx;
u32 ebx, ecx; u32 ebx, ecx, max_delay;
cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
r->num_closid = edx.split.cos_max + 1; r->num_closid = edx.split.cos_max + 1;
r->membw.max_delay = eax.split.max_delay + 1; max_delay = eax.split.max_delay + 1;
r->default_ctrl = MAX_MBA_BW; r->default_ctrl = MAX_MBA_BW;
r->membw.arch_needs_linear = true;
if (ecx & MBA_IS_LINEAR) { if (ecx & MBA_IS_LINEAR) {
r->membw.delay_linear = true; r->membw.delay_linear = true;
r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay; r->membw.min_bw = MAX_MBA_BW - max_delay;
r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay; r->membw.bw_gran = MAX_MBA_BW - max_delay;
} else { } else {
if (!rdt_get_mb_table(r)) if (!rdt_get_mb_table(r))
return false; return false;
r->membw.arch_needs_linear = false;
} }
r->data_width = 3; r->data_width = 3;
if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
else
r->membw.throttle_mode = THREAD_THROTTLE_MAX;
thread_throttle_mode_init();
r->alloc_capable = true; r->alloc_capable = true;
r->alloc_enabled = true; r->alloc_enabled = true;
@@ -288,7 +297,13 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
/* AMD does not use delay */ /* AMD does not use delay */
r->membw.delay_linear = false; r->membw.delay_linear = false;
r->membw.arch_needs_linear = false;
/*
* AMD does not use memory delay throttle model to control
* the allocation like Intel does.
*/
r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
r->membw.min_bw = 0; r->membw.min_bw = 0;
r->membw.bw_gran = 1; r->membw.bw_gran = 1;
/* Max value is 2048, Data width should be 4 in decimal */ /* Max value is 2048, Data width should be 4 in decimal */
@@ -346,19 +361,6 @@ static void rdt_get_cdp_l2_config(void)
rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE); rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
} }
static int get_cache_id(int cpu, int level)
{
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
int i;
for (i = 0; i < ci->num_leaves; i++) {
if (ci->info_list[i].level == level)
return ci->info_list[i].id;
}
return -1;
}
static void static void
mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
{ {
@@ -556,13 +558,13 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
*/ */
static void domain_add_cpu(int cpu, struct rdt_resource *r) static void domain_add_cpu(int cpu, struct rdt_resource *r)
{ {
int id = get_cache_id(cpu, r->cache_level); int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
struct list_head *add_pos = NULL; struct list_head *add_pos = NULL;
struct rdt_domain *d; struct rdt_domain *d;
d = rdt_find_domain(r, id, &add_pos); d = rdt_find_domain(r, id, &add_pos);
if (IS_ERR(d)) { if (IS_ERR(d)) {
pr_warn("Could't find cache id for cpu %d\n", cpu); pr_warn("Couldn't find cache id for CPU %d\n", cpu);
return; return;
} }
@@ -602,12 +604,12 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
static void domain_remove_cpu(int cpu, struct rdt_resource *r) static void domain_remove_cpu(int cpu, struct rdt_resource *r)
{ {
int id = get_cache_id(cpu, r->cache_level); int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
struct rdt_domain *d; struct rdt_domain *d;
d = rdt_find_domain(r, id, NULL); d = rdt_find_domain(r, id, NULL);
if (IS_ERR_OR_NULL(d)) { if (IS_ERR_OR_NULL(d)) {
pr_warn("Could't find cache id for cpu %d\n", cpu); pr_warn("Couldn't find cache id for CPU %d\n", cpu);
return; return;
} }
@@ -918,12 +920,12 @@ static __init void rdt_init_res_defs_intel(void)
r->rid == RDT_RESOURCE_L3CODE || r->rid == RDT_RESOURCE_L3CODE ||
r->rid == RDT_RESOURCE_L2 || r->rid == RDT_RESOURCE_L2 ||
r->rid == RDT_RESOURCE_L2DATA || r->rid == RDT_RESOURCE_L2DATA ||
r->rid == RDT_RESOURCE_L2CODE) r->rid == RDT_RESOURCE_L2CODE) {
r->cbm_validate = cbm_validate_intel; r->cache.arch_has_sparse_bitmaps = false;
else if (r->rid == RDT_RESOURCE_MBA) { r->cache.arch_has_empty_bitmaps = false;
} else if (r->rid == RDT_RESOURCE_MBA) {
r->msr_base = MSR_IA32_MBA_THRTL_BASE; r->msr_base = MSR_IA32_MBA_THRTL_BASE;
r->msr_update = mba_wrmsr_intel; r->msr_update = mba_wrmsr_intel;
r->parse_ctrlval = parse_bw_intel;
} }
} }
} }
@@ -938,12 +940,12 @@ static __init void rdt_init_res_defs_amd(void)
r->rid == RDT_RESOURCE_L3CODE || r->rid == RDT_RESOURCE_L3CODE ||
r->rid == RDT_RESOURCE_L2 || r->rid == RDT_RESOURCE_L2 ||
r->rid == RDT_RESOURCE_L2DATA || r->rid == RDT_RESOURCE_L2DATA ||
r->rid == RDT_RESOURCE_L2CODE) r->rid == RDT_RESOURCE_L2CODE) {
r->cbm_validate = cbm_validate_amd; r->cache.arch_has_sparse_bitmaps = true;
else if (r->rid == RDT_RESOURCE_MBA) { r->cache.arch_has_empty_bitmaps = true;
} else if (r->rid == RDT_RESOURCE_MBA) {
r->msr_base = MSR_IA32_MBA_BW_BASE; r->msr_base = MSR_IA32_MBA_BW_BASE;
r->msr_update = mba_wrmsr_amd; r->msr_update = mba_wrmsr_amd;
r->parse_ctrlval = parse_bw_amd;
} }
} }
} }

View File

@@ -21,53 +21,6 @@
#include <linux/slab.h> #include <linux/slab.h>
#include "internal.h" #include "internal.h"
/*
* Check whether MBA bandwidth percentage value is correct. The value is
* checked against the minimum and maximum bandwidth values specified by
* the hardware. The allocated bandwidth percentage is rounded to the next
* control step available on the hardware.
*/
static bool bw_validate_amd(char *buf, unsigned long *data,
struct rdt_resource *r)
{
unsigned long bw;
int ret;
ret = kstrtoul(buf, 10, &bw);
if (ret) {
rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
return false;
}
if (bw < r->membw.min_bw || bw > r->default_ctrl) {
rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
r->membw.min_bw, r->default_ctrl);
return false;
}
*data = roundup(bw, (unsigned long)r->membw.bw_gran);
return true;
}
int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d)
{
unsigned long bw_val;
if (d->have_new_ctrl) {
rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
return -EINVAL;
}
if (!bw_validate_amd(data->buf, &bw_val, r))
return -EINVAL;
d->new_ctrl = bw_val;
d->have_new_ctrl = true;
return 0;
}
/* /*
* Check whether MBA bandwidth percentage value is correct. The value is * Check whether MBA bandwidth percentage value is correct. The value is
* checked against the minimum and max bandwidth values specified by the * checked against the minimum and max bandwidth values specified by the
@@ -82,7 +35,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
/* /*
* Only linear delay values is supported for current Intel SKUs. * Only linear delay values is supported for current Intel SKUs.
*/ */
if (!r->membw.delay_linear) { if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
rdt_last_cmd_puts("No support for non-linear MB domains\n"); rdt_last_cmd_puts("No support for non-linear MB domains\n");
return false; return false;
} }
@@ -104,8 +57,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true; return true;
} }
int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r, int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d) struct rdt_domain *d)
{ {
unsigned long bw_val; unsigned long bw_val;
@@ -123,12 +76,14 @@ int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
} }
/* /*
* Check whether a cache bit mask is valid. The SDM says: * Check whether a cache bit mask is valid.
* For Intel the SDM says:
* Please note that all (and only) contiguous '1' combinations * Please note that all (and only) contiguous '1' combinations
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.). * are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
* Additionally Haswell requires at least two bits set. * Additionally Haswell requires at least two bits set.
* AMD allows non-contiguous bitmasks.
*/ */
bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r) static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
{ {
unsigned long first_bit, zero_bit, val; unsigned long first_bit, zero_bit, val;
unsigned int cbm_len = r->cache.cbm_len; unsigned int cbm_len = r->cache.cbm_len;
@@ -140,7 +95,8 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
return false; return false;
} }
if (val == 0 || val > r->default_ctrl) { if ((!r->cache.arch_has_empty_bitmaps && val == 0) ||
val > r->default_ctrl) {
rdt_last_cmd_puts("Mask out of range\n"); rdt_last_cmd_puts("Mask out of range\n");
return false; return false;
} }
@@ -148,7 +104,9 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
first_bit = find_first_bit(&val, cbm_len); first_bit = find_first_bit(&val, cbm_len);
zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) { /* Are non-contiguous bitmaps allowed? */
if (!r->cache.arch_has_sparse_bitmaps &&
(find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val); rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
return false; return false;
} }
@@ -163,30 +121,6 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
return true; return true;
} }
/*
* Check whether a cache bit mask is valid. AMD allows non-contiguous
* bitmasks
*/
bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
{
unsigned long val;
int ret;
ret = kstrtoul(buf, 16, &val);
if (ret) {
rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
return false;
}
if (val > r->default_ctrl) {
rdt_last_cmd_puts("Mask out of range\n");
return false;
}
*data = val;
return true;
}
/* /*
* Read one cache bit mask (hex). Check that it is valid for the current * Read one cache bit mask (hex). Check that it is valid for the current
* resource type. * resource type.
@@ -212,7 +146,7 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
return -EINVAL; return -EINVAL;
} }
if (!r->cbm_validate(data->buf, &cbm_val, r)) if (!cbm_validate(data->buf, &cbm_val, r))
return -EINVAL; return -EINVAL;
if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE || if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||

View File

@@ -283,7 +283,6 @@ struct rftype {
* struct mbm_state - status for each MBM counter in each domain * struct mbm_state - status for each MBM counter in each domain
* @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes) * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes)
* @prev_msr Value of IA32_QM_CTR for this RMID last time we read it * @prev_msr Value of IA32_QM_CTR for this RMID last time we read it
* @chunks_bw Total local data moved. Used for bandwidth calculation
* @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
* @prev_bw The most recent bandwidth in MBps * @prev_bw The most recent bandwidth in MBps
* @delta_bw Difference between the current and previous bandwidth * @delta_bw Difference between the current and previous bandwidth
@@ -292,7 +291,6 @@ struct rftype {
struct mbm_state { struct mbm_state {
u64 chunks; u64 chunks;
u64 prev_msr; u64 prev_msr;
u64 chunks_bw;
u64 prev_bw_msr; u64 prev_bw_msr;
u32 prev_bw; u32 prev_bw;
u32 delta_bw; u32 delta_bw;
@@ -360,6 +358,8 @@ struct msr_param {
* in a cache bit mask * in a cache bit mask
* @shareable_bits: Bitmask of shareable resource with other * @shareable_bits: Bitmask of shareable resource with other
* executing entities * executing entities
* @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid.
* @arch_has_empty_bitmaps: True if the '0' bitmap is valid.
*/ */
struct rdt_cache { struct rdt_cache {
unsigned int cbm_len; unsigned int cbm_len;
@@ -367,25 +367,43 @@ struct rdt_cache {
unsigned int cbm_idx_mult; unsigned int cbm_idx_mult;
unsigned int cbm_idx_offset; unsigned int cbm_idx_offset;
unsigned int shareable_bits; unsigned int shareable_bits;
bool arch_has_sparse_bitmaps;
bool arch_has_empty_bitmaps;
};
/**
* enum membw_throttle_mode - System's memory bandwidth throttling mode
* @THREAD_THROTTLE_UNDEFINED: Not relevant to the system
* @THREAD_THROTTLE_MAX: Memory bandwidth is throttled at the core
* always using smallest bandwidth percentage
* assigned to threads, aka "max throttling"
* @THREAD_THROTTLE_PER_THREAD: Memory bandwidth is throttled at the thread
*/
enum membw_throttle_mode {
THREAD_THROTTLE_UNDEFINED = 0,
THREAD_THROTTLE_MAX,
THREAD_THROTTLE_PER_THREAD,
}; };
/** /**
* struct rdt_membw - Memory bandwidth allocation related data * struct rdt_membw - Memory bandwidth allocation related data
* @max_delay: Max throttle delay. Delay is the hardware
* representation for memory bandwidth.
* @min_bw: Minimum memory bandwidth percentage user can request * @min_bw: Minimum memory bandwidth percentage user can request
* @bw_gran: Granularity at which the memory bandwidth is allocated * @bw_gran: Granularity at which the memory bandwidth is allocated
* @delay_linear: True if memory B/W delay is in linear scale * @delay_linear: True if memory B/W delay is in linear scale
* @arch_needs_linear: True if we can't configure non-linear resources
* @throttle_mode: Bandwidth throttling mode when threads request
* different memory bandwidths
* @mba_sc: True if MBA software controller(mba_sc) is enabled * @mba_sc: True if MBA software controller(mba_sc) is enabled
* @mb_map: Mapping of memory B/W percentage to memory B/W delay * @mb_map: Mapping of memory B/W percentage to memory B/W delay
*/ */
struct rdt_membw { struct rdt_membw {
u32 max_delay; u32 min_bw;
u32 min_bw; u32 bw_gran;
u32 bw_gran; u32 delay_linear;
u32 delay_linear; bool arch_needs_linear;
bool mba_sc; enum membw_throttle_mode throttle_mode;
u32 *mb_map; bool mba_sc;
u32 *mb_map;
}; };
static inline bool is_llc_occupancy_enabled(void) static inline bool is_llc_occupancy_enabled(void)
@@ -437,7 +455,6 @@ struct rdt_parse_data {
* @cache: Cache allocation related data * @cache: Cache allocation related data
* @format_str: Per resource format string to show domain value * @format_str: Per resource format string to show domain value
* @parse_ctrlval: Per resource function pointer to parse control values * @parse_ctrlval: Per resource function pointer to parse control values
* @cbm_validate Cache bitmask validate function
* @evt_list: List of monitoring events * @evt_list: List of monitoring events
* @num_rmid: Number of RMIDs available * @num_rmid: Number of RMIDs available
* @mon_scale: cqm counter * mon_scale = occupancy in bytes * @mon_scale: cqm counter * mon_scale = occupancy in bytes
@@ -464,7 +481,6 @@ struct rdt_resource {
int (*parse_ctrlval)(struct rdt_parse_data *data, int (*parse_ctrlval)(struct rdt_parse_data *data,
struct rdt_resource *r, struct rdt_resource *r,
struct rdt_domain *d); struct rdt_domain *d);
bool (*cbm_validate)(char *buf, u32 *data, struct rdt_resource *r);
struct list_head evt_list; struct list_head evt_list;
int num_rmid; int num_rmid;
unsigned int mon_scale; unsigned int mon_scale;
@@ -474,10 +490,8 @@ struct rdt_resource {
int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d); struct rdt_domain *d);
int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r, int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d); struct rdt_domain *d);
int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d);
extern struct mutex rdtgroup_mutex; extern struct mutex rdtgroup_mutex;
@@ -609,8 +623,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
void cqm_handle_limbo(struct work_struct *work); void cqm_handle_limbo(struct work_struct *work);
bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
void __check_limbo(struct rdt_domain *d, bool force_free); void __check_limbo(struct rdt_domain *d, bool force_free);
bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r);
bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r); void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
void __init thread_throttle_mode_init(void);
#endif /* _ASM_X86_RESCTRL_INTERNAL_H */ #endif /* _ASM_X86_RESCTRL_INTERNAL_H */

View File

@@ -279,8 +279,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
return; return;
chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width); chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
m->chunks_bw += chunks; m->chunks += chunks;
m->chunks = m->chunks_bw;
cur_bw = (chunks * r->mon_scale) >> 20; cur_bw = (chunks * r->mon_scale) >> 20;
if (m->delta_comp) if (m->delta_comp)
@@ -478,19 +477,13 @@ void cqm_handle_limbo(struct work_struct *work)
mutex_lock(&rdtgroup_mutex); mutex_lock(&rdtgroup_mutex);
r = &rdt_resources_all[RDT_RESOURCE_L3]; r = &rdt_resources_all[RDT_RESOURCE_L3];
d = get_domain_from_cpu(cpu, r); d = container_of(work, struct rdt_domain, cqm_limbo.work);
if (!d) {
pr_warn_once("Failure to get domain for limbo worker\n");
goto out_unlock;
}
__check_limbo(d, false); __check_limbo(d, false);
if (has_busy_rmid(r, d)) if (has_busy_rmid(r, d))
schedule_delayed_work_on(cpu, &d->cqm_limbo, delay); schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
out_unlock:
mutex_unlock(&rdtgroup_mutex); mutex_unlock(&rdtgroup_mutex);
} }
@@ -520,10 +513,7 @@ void mbm_handle_overflow(struct work_struct *work)
goto out_unlock; goto out_unlock;
r = &rdt_resources_all[RDT_RESOURCE_L3]; r = &rdt_resources_all[RDT_RESOURCE_L3];
d = container_of(work, struct rdt_domain, mbm_over.work);
d = get_domain_from_cpu(cpu, r);
if (!d)
goto out_unlock;
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
mbm_update(r, d, prgrp->mon.rmid); mbm_update(r, d, prgrp->mon.rmid);

View File

@@ -592,6 +592,18 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
return ret; return ret;
} }
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
{
return (rdt_alloc_capable &&
(r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
}
static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
{
return (rdt_mon_capable &&
(r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
}
/** /**
* rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
* @r: Resource group * @r: Resource group
@@ -607,8 +619,7 @@ int rdtgroup_tasks_assigned(struct rdtgroup *r)
rcu_read_lock(); rcu_read_lock();
for_each_process_thread(p, t) { for_each_process_thread(p, t) {
if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) || if (is_closid_match(t, r) || is_rmid_match(t, r)) {
(r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) {
ret = 1; ret = 1;
break; break;
} }
@@ -706,8 +717,7 @@ static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
rcu_read_lock(); rcu_read_lock();
for_each_process_thread(p, t) { for_each_process_thread(p, t) {
if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) || if (is_closid_match(t, r) || is_rmid_match(t, r))
(r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
seq_printf(s, "%d\n", t->pid); seq_printf(s, "%d\n", t->pid);
} }
rcu_read_unlock(); rcu_read_unlock();
@@ -1017,6 +1027,19 @@ static int max_threshold_occ_show(struct kernfs_open_file *of,
return 0; return 0;
} }
static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
seq_puts(seq, "per-thread\n");
else
seq_puts(seq, "max\n");
return 0;
}
static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off) char *buf, size_t nbytes, loff_t off)
{ {
@@ -1513,6 +1536,17 @@ static struct rftype res_common_files[] = {
.seq_show = rdt_delay_linear_show, .seq_show = rdt_delay_linear_show,
.fflags = RF_CTRL_INFO | RFTYPE_RES_MB, .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
}, },
/*
* Platform specific which (if any) capabilities are provided by
* thread_throttle_mode. Defer "fflags" initialization to platform
* discovery.
*/
{
.name = "thread_throttle_mode",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_thread_throttle_mode_show,
},
{ {
.name = "max_threshold_occupancy", .name = "max_threshold_occupancy",
.mode = 0644, .mode = 0644,
@@ -1583,7 +1617,7 @@ static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
lockdep_assert_held(&rdtgroup_mutex); lockdep_assert_held(&rdtgroup_mutex);
for (rft = rfts; rft < rfts + len; rft++) { for (rft = rfts; rft < rfts + len; rft++) {
if ((fflags & rft->fflags) == rft->fflags) { if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
ret = rdtgroup_add_file(kn, rft); ret = rdtgroup_add_file(kn, rft);
if (ret) if (ret)
goto error; goto error;
@@ -1600,6 +1634,33 @@ error:
return ret; return ret;
} }
static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
{
struct rftype *rfts, *rft;
int len;
rfts = res_common_files;
len = ARRAY_SIZE(res_common_files);
for (rft = rfts; rft < rfts + len; rft++) {
if (!strcmp(rft->name, name))
return rft;
}
return NULL;
}
void __init thread_throttle_mode_init(void)
{
struct rftype *rft;
rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
if (!rft)
return;
rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB;
}
/** /**
* rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
* @r: The resource group with which the file is associated. * @r: The resource group with which the file is associated.
@@ -2245,18 +2306,6 @@ static int reset_all_ctrls(struct rdt_resource *r)
return 0; return 0;
} }
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
{
return (rdt_alloc_capable &&
(r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
}
static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
{
return (rdt_mon_capable &&
(r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
}
/* /*
* Move tasks from one to the other group. If @from is NULL, then all tasks * Move tasks from one to the other group. If @from is NULL, then all tasks
* in the systems are moved unconditionally (used for teardown). * in the systems are moved unconditionally (used for teardown).
@@ -3196,7 +3245,7 @@ int __init rdtgroup_init(void)
* It may also be ok since that would enable debugging of RDT before * It may also be ok since that would enable debugging of RDT before
* resctrl is mounted. * resctrl is mounted.
* The reason why the debugfs directory is created here and not in * The reason why the debugfs directory is created here and not in
* rdt_mount() is because rdt_mount() takes rdtgroup_mutex and * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
* during the debugfs directory creation also &sb->s_type->i_mutex_key * during the debugfs directory creation also &sb->s_type->i_mutex_key
* (the lockdep class of inode->i_rwsem). Other filesystem * (the lockdep class of inode->i_rwsem). Other filesystem
* interactions (eg. SyS_getdents) have the lock ordering: * interactions (eg. SyS_getdents) have the lock ordering:

View File

@@ -35,6 +35,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
{ X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 }, { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 },
{ X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
{ X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },

View File

@@ -115,7 +115,8 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)
unsigned long prologue = regs->ip - PROLOGUE_SIZE; unsigned long prologue = regs->ip - PROLOGUE_SIZE;
if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
printk("%sCode: Bad RIP value.\n", loglvl); printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n",
loglvl, prologue);
} else { } else {
printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %" printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
__stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes, __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes,

View File

@@ -5,7 +5,6 @@
#include <asm/fpu/internal.h> #include <asm/fpu/internal.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/cmdline.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
@@ -237,52 +236,12 @@ static void __init fpu__init_system_ctx_switch(void)
on_boot_cpu = 0; on_boot_cpu = 0;
} }
/*
* We parse fpu parameters early because fpu__init_system() is executed
* before parse_early_param().
*/
static void __init fpu__init_parse_early_param(void)
{
char arg[32];
char *argptr = arg;
int bit;
#ifdef CONFIG_X86_32
if (cmdline_find_option_bool(boot_command_line, "no387"))
#ifdef CONFIG_MATH_EMULATION
setup_clear_cpu_cap(X86_FEATURE_FPU);
#else
pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
#endif
if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
setup_clear_cpu_cap(X86_FEATURE_FXSR);
#endif
if (cmdline_find_option_bool(boot_command_line, "noxsave"))
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
sizeof(arg)) &&
get_option(&argptr, &bit) &&
bit >= 0 &&
bit < NCAPINTS * 32)
setup_clear_cpu_cap(bit);
}
/* /*
* Called on the boot CPU once per system bootup, to set up the initial * Called on the boot CPU once per system bootup, to set up the initial
* FPU state that is later cloned into all processes: * FPU state that is later cloned into all processes:
*/ */
void __init fpu__init_system(struct cpuinfo_x86 *c) void __init fpu__init_system(struct cpuinfo_x86 *c)
{ {
fpu__init_parse_early_param();
fpu__init_system_early_generic(c); fpu__init_system_early_generic(c);
/* /*

View File

@@ -24,7 +24,6 @@
#include <asm/irqdomain.h> #include <asm/irqdomain.h>
#include <asm/mtrr.h> #include <asm/mtrr.h>
#include <asm/mpspec.h> #include <asm/mpspec.h>
#include <asm/io_apic.h>
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/bios_ebda.h> #include <asm/bios_ebda.h>
#include <asm/e820/api.h> #include <asm/e820/api.h>

View File

@@ -3,6 +3,7 @@
#define _LINUX_CACHEINFO_H #define _LINUX_CACHEINFO_H
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/cpu.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/smp.h> #include <linux/smp.h>
@@ -119,4 +120,24 @@ int acpi_find_last_cache_level(unsigned int cpu);
const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf);
/*
* Get the id of the cache associated with @cpu at level @level.
* cpuhp lock must be held.
*/
static inline int get_cpu_cacheinfo_id(int cpu, int level)
{
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
int i;
for (i = 0; i < ci->num_leaves; i++) {
if (ci->info_list[i].level == level) {
if (ci->info_list[i].attributes & CACHE_ID)
return ci->info_list[i].id;
return -1;
}
}
return -1;
}
#endif /* _LINUX_CACHEINFO_H */ #endif /* _LINUX_CACHEINFO_H */

View File

@@ -38,7 +38,7 @@
#endif #endif
/* /*
* TIF flags handled in syscall_enter_from_usermode() * TIF flags handled in syscall_enter_from_user_mode()
*/ */
#ifndef ARCH_SYSCALL_ENTER_WORK #ifndef ARCH_SYSCALL_ENTER_WORK
# define ARCH_SYSCALL_ENTER_WORK (0) # define ARCH_SYSCALL_ENTER_WORK (0)

View File

@@ -2,6 +2,8 @@
#ifndef _RESCTRL_H #ifndef _RESCTRL_H
#define _RESCTRL_H #define _RESCTRL_H
#include <linux/pid.h>
#ifdef CONFIG_PROC_CPU_RESCTRL #ifdef CONFIG_PROC_CPU_RESCTRL
int proc_resctrl_show(struct seq_file *m, int proc_resctrl_show(struct seq_file *m,

View File

@@ -208,7 +208,7 @@ static inline bool report_single_step(unsigned long ti_work)
/* /*
* If TIF_SYSCALL_EMU is set, then the only reason to report is when * If TIF_SYSCALL_EMU is set, then the only reason to report is when
* TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall * TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
* instruction has been already reported in syscall_enter_from_usermode(). * instruction has been already reported in syscall_enter_from_user_mode().
*/ */
#define SYSEMU_STEP (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU) #define SYSEMU_STEP (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)