Merge branch 'linus' into x86/urgent

Required to queue a dependent fix.
This commit is contained in:
Thomas Gleixner
2018-06-22 21:20:35 +02:00
12422 changed files with 511415 additions and 618243 deletions

View File

@@ -17,7 +17,7 @@ KCOV_INSTRUMENT_perf_event.o := n
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
obj-y := intel_cacheinfo.o scattered.o topology.o
obj-y := cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o

View File

@@ -9,6 +9,7 @@
#include <linux/random.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cacheinfo.h>
#include <asm/cpu.h>
#include <asm/spec-ctrl.h>
#include <asm/smp.h>
@@ -298,7 +299,6 @@ static int nearby_node(int apicid)
}
#endif
#ifdef CONFIG_SMP
/*
* Fix up cpu_core_id for pre-F17h systems to be in the
* [0 .. cores_per_node - 1] range. Not really needed but
@@ -328,6 +328,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
int err;
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
@@ -346,21 +347,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
}
/*
* We may have multiple LLCs if L3 caches exist, so check if we
* have an L3 cache by looking at the L3 cache CPUID leaf.
* In case leaf B is available, use it to derive
* topology information.
*/
if (cpuid_edx(0x80000006)) {
if (c->x86 == 0x17) {
/*
* LLC is at the core complex level.
* Core complex id is ApicId[3].
*/
per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
} else {
/* LLC is at the node level. */
per_cpu(cpu_llc_id, cpu) = node_id;
}
}
err = detect_extended_topology(c);
if (!err)
c->x86_coreid_bits = get_count_order(c->x86_max_cores);
cacheinfo_amd_init_llc_id(c, cpu, node_id);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
@@ -376,7 +371,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
legacy_fixup_core_id(c);
}
}
#endif
/*
* On a AMD dual core setup the lower bits of the APIC id distinguish the cores.
@@ -384,7 +378,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
*/
static void amd_detect_cmp(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned bits;
int cpu = smp_processor_id();
@@ -395,17 +388,11 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
c->phys_proc_id = c->initial_apicid >> bits;
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
amd_get_topology(c);
#endif
}
u16 amd_get_nb_id(int cpu)
{
u16 id = 0;
#ifdef CONFIG_SMP
id = per_cpu(cpu_llc_id, cpu);
#endif
return id;
return per_cpu(cpu_llc_id, cpu);
}
EXPORT_SYMBOL_GPL(amd_get_nb_id);
@@ -864,6 +851,7 @@ static void init_amd(struct cpuinfo_x86 *c)
/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x80000008) {
amd_detect_cmp(c);
amd_get_topology(c);
srat_detect_node(c);
}

View File

@@ -529,18 +529,15 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
if (mode == SPEC_STORE_BYPASS_DISABLE) {
setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
/*
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
* a completely different MSR and bit dependent on family.
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
* use a completely different MSR and bit dependent on family.
*/
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
x86_amd_ssb_disable();
else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
break;
case X86_VENDOR_AMD:
x86_amd_ssb_disable();
break;
}
}

View File

@@ -20,6 +20,8 @@
#include <asm/amd_nb.h>
#include <asm/smp.h>
#include "cpu.h"
#define LVL_1_INST 1
#define LVL_1_DATA 2
#define LVL_2 3
@@ -637,6 +639,45 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
return i;
}
void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
{
/*
* We may have multiple LLCs if L3 caches exist, so check if we
* have an L3 cache by looking at the L3 cache CPUID leaf.
*/
if (!cpuid_edx(0x80000006))
return;
if (c->x86 < 0x17) {
/* LLC is at the node level. */
per_cpu(cpu_llc_id, cpu) = node_id;
} else if (c->x86 == 0x17 &&
c->x86_model >= 0 && c->x86_model <= 0x1F) {
/*
* LLC is at the core complex level.
* Core complex ID is ApicId[3] for these processors.
*/
per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
} else {
/*
* LLC ID is calculated from the number of threads sharing the
* cache.
* */
u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
u32 llc_index = find_num_cache_leaves(c) - 1;
cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
if (eax)
num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
if (num_sharing_cache) {
int bits = get_count_order(num_sharing_cache) - 1;
per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
}
}
}
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
@@ -650,7 +691,7 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c)
}
}
unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
void init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
/* Cache sizes */
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
@@ -802,7 +843,8 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
return l2;
if (!l2)
cpu_detect_cache_sizes(c);
}
static int __cache_amd_cpumap_setup(unsigned int cpu, int index,

View File

@@ -18,6 +18,13 @@
#define RNG_ENABLED (1 << 3)
#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
static void init_c3(struct cpuinfo_x86 *c)
{
u32 lo, hi;
@@ -112,6 +119,31 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
}
}
static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c)
{
u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
msr_ctl = vmx_msr_high | vmx_msr_low;
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
set_cpu_cap(c, X86_FEATURE_VNMI);
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
vmx_msr_low, vmx_msr_high);
msr_ctl2 = vmx_msr_high | vmx_msr_low;
if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
(msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
set_cpu_cap(c, X86_FEATURE_EPT);
if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
set_cpu_cap(c, X86_FEATURE_VPID);
}
}
static void init_centaur(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
@@ -128,6 +160,24 @@ static void init_centaur(struct cpuinfo_x86 *c)
clear_cpu_cap(c, 0*32+31);
#endif
early_init_centaur(c);
init_intel_cacheinfo(c);
detect_num_cpu_cores(c);
#ifdef CONFIG_X86_32
detect_ht(c);
#endif
if (c->cpuid_level > 9) {
unsigned int eax = cpuid_eax(10);
/*
* Check for version and the number of counters
* Version(eax[7:0]) can't be 0;
* Counters(eax[15:8]) should be greater than 1;
*/
if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
}
switch (c->x86) {
#ifdef CONFIG_X86_32
case 5:
@@ -199,6 +249,9 @@ static void init_centaur(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
#endif
if (cpu_has(c, X86_FEATURE_VMX))
centaur_detect_vmx_virtcap(c);
}
#ifdef CONFIG_X86_32

View File

@@ -66,6 +66,13 @@ cpumask_var_t cpu_callin_mask;
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
@@ -577,6 +584,19 @@ static void get_model_name(struct cpuinfo_x86 *c)
*(s + 1) = '\0';
}
void detect_num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax, ebx, ecx, edx;
c->x86_max_cores = 1;
if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
return;
cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
if (eax & 0x1f)
c->x86_max_cores = (eax >> 26) + 1;
}
void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
@@ -783,6 +803,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_STIBP);
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
}
if (cpu_has(c, X86_FEATURE_AMD_SSBD)) {
set_cpu_cap(c, X86_FEATURE_SSBD);
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD);
}
}
void get_cpu_cap(struct cpuinfo_x86 *c)
@@ -972,7 +998,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
!(ia32_cap & ARCH_CAP_SSB_NO))
!(ia32_cap & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
if (x86_match_cpu(cpu_no_meltdown))
@@ -1044,6 +1071,21 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
*/
setup_clear_cpu_cap(X86_FEATURE_PCID);
#endif
/*
* Later in the boot process pgtable_l5_enabled() relies on
* cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not
* enabled by this point we need to clear the feature bit to avoid
* false-positives at the later stage.
*
* pgtable_l5_enabled() can be false here for several reasons:
* - 5-level paging is disabled compile-time;
* - it's 32-bit kernel;
* - machine doesn't support 5-level paging;
* - user specified 'no5lvl' in kernel command line.
*/
if (!pgtable_l5_enabled())
setup_clear_cpu_cap(X86_FEATURE_LA57);
}
void __init early_cpu_init(void)
@@ -1557,7 +1599,7 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
#ifdef CONFIG_CC_STACKPROTECTOR
#ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif

View File

@@ -47,6 +47,16 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
extern u32 get_scattered_cpuid_leaf(unsigned int level,
unsigned int sub_leaf,
enum cpuid_regs_idx reg);
extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
extern void detect_num_cpu_cores(struct cpuinfo_x86 *c);
extern int detect_extended_topology(struct cpuinfo_x86 *c);
extern void detect_ht(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);

View File

@@ -456,24 +456,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
/*
* find out the number of processor cores on the die
*/
static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax, ebx, ecx, edx;
if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
return 1;
/* Intel has a non-standard dependency on %ecx for this CPUID level. */
cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
if (eax & 0x1f)
return (eax >> 26) + 1;
else
return 1;
}
static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
{
/* Intel VMX MSR indicated features */
@@ -656,8 +638,6 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
static void init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
early_init_intel(c);
intel_workarounds(c);
@@ -674,19 +654,13 @@ static void init_intel(struct cpuinfo_x86 *c)
* let's use the legacy cpuid vector 0x1 and 0x4 for topology
* detection.
*/
c->x86_max_cores = intel_num_cpu_cores(c);
detect_num_cpu_cores(c);
#ifdef CONFIG_X86_32
detect_ht(c);
#endif
}
l2 = init_intel_cacheinfo(c);
/* Detect legacy cache sizes if init_intel_cacheinfo did not */
if (l2 == 0) {
cpu_detect_cache_sizes(c);
l2 = c->x86_cache_size;
}
init_intel_cacheinfo(c);
if (c->cpuid_level > 9) {
unsigned eax = cpuid_eax(10);
@@ -699,7 +673,8 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (boot_cpu_has(X86_FEATURE_DS)) {
unsigned int l1;
unsigned int l1, l2;
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
if (!(l1 & (1<<11)))
set_cpu_cap(c, X86_FEATURE_BTS);
@@ -727,6 +702,7 @@ static void init_intel(struct cpuinfo_x86 *c)
* Dixon is NOT a Celeron.
*/
if (c->x86 == 6) {
unsigned int l2 = c->x86_cache_size;
char *p = NULL;
switch (c->x86_model) {

View File

@@ -33,8 +33,8 @@
#include <asm/intel_rdt_sched.h>
#include "intel_rdt.h"
#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
#define MBA_MAX_MBPS U32_MAX
/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);
@@ -178,7 +178,7 @@ struct rdt_resource rdt_resources_all[] = {
.msr_update = mba_wrmsr,
.cache_level = 3,
.parse_ctrlval = parse_bw,
.format_str = "%d=%*d",
.format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
},
};
@@ -230,6 +230,14 @@ static inline void cache_alloc_hsw_probe(void)
rdt_alloc_capable = true;
}
bool is_mba_sc(struct rdt_resource *r)
{
if (!r)
return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc;
return r->membw.mba_sc;
}
/*
* rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
* exposed to user interface and the h/w understandable delay values.
@@ -341,7 +349,7 @@ static int get_cache_id(int cpu, int level)
* that can be written to QOS_MSRs.
* There are currently no SKUs which support non linear delay values.
*/
static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
{
if (r->membw.delay_linear)
return MAX_MBA_BW - bw;
@@ -431,25 +439,40 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
return NULL;
}
void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
{
int i;
/*
* Initialize the Control MSRs to having no control.
* For Cache Allocation: Set all bits in cbm
* For Memory Allocation: Set b/w requested to 100%
* and the bandwidth in MBps to U32_MAX
*/
for (i = 0; i < r->num_closid; i++, dc++, dm++) {
*dc = r->default_ctrl;
*dm = MBA_MAX_MBPS;
}
}
static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
{
struct msr_param m;
u32 *dc;
int i;
u32 *dc, *dm;
dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
if (!dc)
return -ENOMEM;
d->ctrl_val = dc;
dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL);
if (!dm) {
kfree(dc);
return -ENOMEM;
}
/*
* Initialize the Control MSRs to having no control.
* For Cache Allocation: Set all bits in cbm
* For Memory Allocation: Set b/w requested to 100
*/
for (i = 0; i < r->num_closid; i++, dc++)
*dc = r->default_ctrl;
d->ctrl_val = dc;
d->mbps_val = dm;
setup_default_ctrlval(r, dc, dm);
m.low = 0;
m.high = r->num_closid;
@@ -588,6 +611,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
}
kfree(d->ctrl_val);
kfree(d->mbps_val);
kfree(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);

View File

@@ -28,6 +28,7 @@
#define MBM_CNTR_WIDTH 24
#define MBM_OVERFLOW_INTERVAL 1000
#define MAX_MBA_BW 100u
#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
@@ -180,10 +181,20 @@ struct rftype {
* struct mbm_state - status for each MBM counter in each domain
* @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes)
* @prev_msr Value of IA32_QM_CTR for this RMID last time we read it
* @chunks_bw Total local data moved. Used for bandwidth calculation
* @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
* @prev_bw The most recent bandwidth in MBps
* @delta_bw Difference between the current and previous bandwidth
* @delta_comp Indicates whether to compute the delta_bw
*/
struct mbm_state {
u64 chunks;
u64 prev_msr;
u64 chunks_bw;
u64 prev_bw_msr;
u32 prev_bw;
u32 delta_bw;
bool delta_comp;
};
/**
@@ -202,6 +213,7 @@ struct mbm_state {
* @cqm_work_cpu:
* worker cpu for CQM h/w counters
* @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
* @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps
* @new_ctrl: new ctrl value to be loaded
* @have_new_ctrl: did user provide new_ctrl for this domain
*/
@@ -217,6 +229,7 @@ struct rdt_domain {
int mbm_work_cpu;
int cqm_work_cpu;
u32 *ctrl_val;
u32 *mbps_val;
u32 new_ctrl;
bool have_new_ctrl;
};
@@ -259,6 +272,7 @@ struct rdt_cache {
* @min_bw: Minimum memory bandwidth percentage user can request
* @bw_gran: Granularity at which the memory bandwidth is allocated
* @delay_linear: True if memory B/W delay is in linear scale
* @mba_sc: True if MBA software controller(mba_sc) is enabled
* @mb_map: Mapping of memory B/W percentage to memory B/W delay
*/
struct rdt_membw {
@@ -266,6 +280,7 @@ struct rdt_membw {
u32 min_bw;
u32 bw_gran;
u32 delay_linear;
bool mba_sc;
u32 *mb_map;
};
@@ -445,6 +460,9 @@ void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
void mbm_setup_overflow_handler(struct rdt_domain *dom,
unsigned long delay_ms);
void mbm_handle_overflow(struct work_struct *work);
bool is_mba_sc(struct rdt_resource *r);
void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
void cqm_handle_limbo(struct work_struct *work);
bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);

View File

@@ -53,7 +53,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return false;
}
if (bw < r->membw.min_bw || bw > r->default_ctrl) {
if ((bw < r->membw.min_bw || bw > r->default_ctrl) &&
!is_mba_sc(r)) {
rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
r->membw.min_bw, r->default_ctrl);
return false;
@@ -179,6 +180,8 @@ static int update_domains(struct rdt_resource *r, int closid)
struct msr_param msr_param;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
bool mba_sc;
u32 *dc;
int cpu;
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
@@ -188,13 +191,20 @@ static int update_domains(struct rdt_resource *r, int closid)
msr_param.high = msr_param.low + 1;
msr_param.res = r;
mba_sc = is_mba_sc(r);
list_for_each_entry(d, &r->domains, list) {
if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) {
dc = !mba_sc ? d->ctrl_val : d->mbps_val;
if (d->have_new_ctrl && d->new_ctrl != dc[closid]) {
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
d->ctrl_val[closid] = d->new_ctrl;
dc[closid] = d->new_ctrl;
}
}
if (cpumask_empty(cpu_mask))
/*
* Avoid writing the control msr with control values when
* MBA software controller is enabled
*/
if (cpumask_empty(cpu_mask) || mba_sc)
goto done;
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
@@ -282,13 +292,17 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
{
struct rdt_domain *dom;
bool sep = false;
u32 ctrl_val;
seq_printf(s, "%*s:", max_name_width, r->name);
list_for_each_entry(dom, &r->domains, list) {
if (sep)
seq_puts(s, ";");
ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] :
dom->mbps_val[closid]);
seq_printf(s, r->format_str, dom->id, max_data_width,
dom->ctrl_val[closid]);
ctrl_val);
sep = true;
}
seq_puts(s, "\n");

View File

@@ -225,10 +225,18 @@ void free_rmid(u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
}
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
{
u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
chunks = (cur_msr << shift) - (prev_msr << shift);
return chunks >>= shift;
}
static int __mon_event_count(u32 rmid, struct rmid_read *rr)
{
u64 chunks, shift, tval;
struct mbm_state *m;
u64 chunks, tval;
tval = __rmid_read(rmid, rr->evtid);
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
@@ -254,14 +262,12 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
}
if (rr->first) {
m->prev_msr = tval;
m->chunks = 0;
memset(m, 0, sizeof(struct mbm_state));
m->prev_bw_msr = m->prev_msr = tval;
return 0;
}
shift = 64 - MBM_CNTR_WIDTH;
chunks = (tval << shift) - (m->prev_msr << shift);
chunks >>= shift;
chunks = mbm_overflow_count(m->prev_msr, tval);
m->chunks += chunks;
m->prev_msr = tval;
@@ -269,6 +275,32 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
return 0;
}
/*
* Supporting function to calculate the memory bandwidth
* and delta bandwidth in MBps.
*/
static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
struct mbm_state *m = &rr->d->mbm_local[rmid];
u64 tval, cur_bw, chunks;
tval = __rmid_read(rmid, rr->evtid);
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
return;
chunks = mbm_overflow_count(m->prev_bw_msr, tval);
m->chunks_bw += chunks;
m->chunks = m->chunks_bw;
cur_bw = (chunks * r->mon_scale) >> 20;
if (m->delta_comp)
m->delta_bw = abs(cur_bw - m->prev_bw);
m->delta_comp = false;
m->prev_bw = cur_bw;
m->prev_bw_msr = tval;
}
/*
* This is called via IPI to read the CQM/MBM counters
* on a domain.
@@ -297,6 +329,118 @@ void mon_event_count(void *info)
}
}
/*
* Feedback loop for MBA software controller (mba_sc)
*
* mba_sc is a feedback loop where we periodically read MBM counters and
* adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
* that:
*
* current bandwdith(cur_bw) < user specified bandwidth(user_bw)
*
* This uses the MBM counters to measure the bandwidth and MBA throttle
* MSRs to control the bandwidth for a particular rdtgrp. It builds on the
* fact that resctrl rdtgroups have both monitoring and control.
*
* The frequency of the checks is 1s and we just tag along the MBM overflow
* timer. Having 1s interval makes the calculation of bandwidth simpler.
*
* Although MBA's goal is to restrict the bandwidth to a maximum, there may
* be a need to increase the bandwidth to avoid uncecessarily restricting
* the L2 <-> L3 traffic.
*
* Since MBA controls the L2 external bandwidth where as MBM measures the
* L3 external bandwidth the following sequence could lead to such a
* situation.
*
* Consider an rdtgroup which had high L3 <-> memory traffic in initial
* phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
* after some time rdtgroup has mostly L2 <-> L3 traffic.
*
* In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
* throttle MSRs already have low percentage values. To avoid
* unnecessarily restricting such rdtgroups, we also increase the bandwidth.
*/
static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
{
u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
u32 cur_bw, delta_bw, user_bw;
struct rdt_resource *r_mba;
struct rdt_domain *dom_mba;
struct list_head *head;
struct rdtgroup *entry;
r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
closid = rgrp->closid;
rmid = rgrp->mon.rmid;
pmbm_data = &dom_mbm->mbm_local[rmid];
dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
pr_warn_once("Failure to get domain for MBA update\n");
return;
}
cur_bw = pmbm_data->prev_bw;
user_bw = dom_mba->mbps_val[closid];
delta_bw = pmbm_data->delta_bw;
cur_msr_val = dom_mba->ctrl_val[closid];
/*
* For Ctrl groups read data from child monitor groups.
*/
head = &rgrp->mon.crdtgrp_list;
list_for_each_entry(entry, head, mon.crdtgrp_list) {
cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
cur_bw += cmbm_data->prev_bw;
delta_bw += cmbm_data->delta_bw;
}
/*
* Scale up/down the bandwidth linearly for the ctrl group. The
* bandwidth step is the bandwidth granularity specified by the
* hardware.
*
* The delta_bw is used when increasing the bandwidth so that we
* dont alternately increase and decrease the control values
* continuously.
*
* For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if
* bandwidth step is 20MBps(> user_bw - cur_bw), we would keep
* switching between 90 and 110 continuously if we only check
* cur_bw < user_bw.
*/
if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
} else if (cur_msr_val < MAX_MBA_BW &&
(user_bw > (cur_bw + delta_bw))) {
new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
} else {
return;
}
cur_msr = r_mba->msr_base + closid;
wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
dom_mba->ctrl_val[closid] = new_msr_val;
/*
* Delta values are updated dynamically package wise for each
* rdtgrp everytime the throttle MSR changes value.
*
* This is because (1)the increase in bandwidth is not perfectly
* linear and only "approximately" linear even when the hardware
* says it is linear.(2)Also since MBA is a core specific
* mechanism, the delta values vary based on number of cores used
* by the rdtgrp.
*/
pmbm_data->delta_comp = true;
list_for_each_entry(entry, head, mon.crdtgrp_list) {
cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
cmbm_data->delta_comp = true;
}
}
static void mbm_update(struct rdt_domain *d, int rmid)
{
struct rmid_read rr;
@@ -314,7 +458,16 @@ static void mbm_update(struct rdt_domain *d, int rmid)
}
if (is_mbm_local_enabled()) {
rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
__mon_event_count(rmid, &rr);
/*
* Call the MBA software controller only for the
* control groups and when user has enabled
* the software controller explicitly.
*/
if (!is_mba_sc(NULL))
__mon_event_count(rmid, &rr);
else
mbm_bw_count(rmid, &rr);
}
}
@@ -385,6 +538,9 @@ void mbm_handle_overflow(struct work_struct *work)
head = &prgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list)
mbm_update(d, crgrp->mon.rmid);
if (is_mba_sc(NULL))
update_mba_bw(prgrp, d);
}
schedule_delayed_work_on(cpu, &d->mbm_over, delay);

View File

@@ -1005,6 +1005,11 @@ static void l2_qos_cfg_update(void *arg)
wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
}
static inline bool is_mba_linear(void)
{
return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
}
static int set_cache_qos_cfg(int level, bool enable)
{
void (*update)(void *arg);
@@ -1041,6 +1046,28 @@ static int set_cache_qos_cfg(int level, bool enable)
return 0;
}
/*
* Enable or disable the MBA software controller
* which helps user specify bandwidth in MBps.
* MBA software controller is supported only if
* MBM is supported and MBA is in linear scale.
*/
static int set_mba_sc(bool mba_sc)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
struct rdt_domain *d;
if (!is_mbm_enabled() || !is_mba_linear() ||
mba_sc == is_mba_sc(r))
return -EINVAL;
r->membw.mba_sc = mba_sc;
list_for_each_entry(d, &r->domains, list)
setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
return 0;
}
static int cdp_enable(int level, int data_type, int code_type)
{
struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
@@ -1123,6 +1150,10 @@ static int parse_rdtgroupfs_options(char *data)
ret = cdpl2_enable();
if (ret)
goto out;
} else if (!strcmp(token, "mba_MBps")) {
ret = set_mba_sc(true);
if (ret)
goto out;
} else {
ret = -EINVAL;
goto out;
@@ -1445,6 +1476,8 @@ static void rdt_kill_sb(struct super_block *sb)
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
set_mba_sc(false);
/*Put everything back to default values. */
for_each_alloc_enabled_rdt_resource(r)
reset_all_ctrls(r);

View File

@@ -48,7 +48,7 @@ static struct dentry *dfs_inj;
static u8 n_banks;
#define MAX_FLAG_OPT_SIZE 3
#define MAX_FLAG_OPT_SIZE 4
#define NBCFG 0x44
enum injection_type {

View File

@@ -1457,7 +1457,7 @@ static int __mcheck_cpu_mce_banks_init(void)
int i;
u8 num_banks = mca_cfg.banks;
mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL);
mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL);
if (!mce_banks)
return -ENOMEM;
@@ -1727,6 +1727,21 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
}
}
static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
{
struct mca_config *cfg = &mca_cfg;
/*
* All newer Centaur CPUs support MCE broadcasting. Enable
* synchronization with a one second timeout.
*/
if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) ||
c->x86 > 6) {
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = USEC_PER_SEC;
}
}
static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
@@ -1739,6 +1754,9 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
mce_amd_feature_init(c);
break;
}
case X86_VENDOR_CENTAUR:
mce_centaur_feature_init(c);
break;
default:
break;

View File

@@ -436,8 +436,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
unsigned int block)
static u32 smca_get_block_address(unsigned int bank, unsigned int block)
{
u32 low, high;
u32 addr = 0;
@@ -456,13 +455,13 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
goto out;
if (!(low & MCI_CONFIG_MCAX))
goto out;
if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
(low & MASK_BLKPTR_LO))
addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
@@ -471,7 +470,7 @@ out:
return addr;
}
static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
u32 addr = 0, offset = 0;
@@ -480,7 +479,7 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
return addr;
if (mce_flags.smca)
return smca_get_block_address(cpu, bank, block);
return smca_get_block_address(bank, block);
/* Fall back to method we used for older processors: */
switch (block) {
@@ -558,7 +557,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
smca_configure(bank, cpu);
for (block = 0; block < NR_BLOCKS; ++block) {
address = get_block_address(cpu, address, low, high, bank, block);
address = get_block_address(address, low, high, bank, block);
if (!address)
break;
@@ -1175,7 +1174,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (err)
goto out_free;
recurse:
address = get_block_address(cpu, address, low, high, bank, ++block);
address = get_block_address(address, low, high, bank, ++block);
if (!address)
return 0;
@@ -1385,7 +1384,7 @@ int mce_threshold_create_device(unsigned int cpu)
if (bp)
return 0;
bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *),
GFP_KERNEL);
if (!bp)
return -ENOMEM;

View File

@@ -1,3 +1,3 @@
obj-y := main.o if.o generic.o cleanup.o
obj-y := mtrr.o if.o generic.o cleanup.o
obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o

View File

@@ -43,7 +43,7 @@ mtrr_file_add(unsigned long base, unsigned long size,
max = num_var_ranges;
if (fcount == NULL) {
fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL);
fcount = kcalloc(max, sizeof(*fcount), GFP_KERNEL);
if (!fcount)
return -ENOMEM;
FILE_FCOUNT(file) = fcount;

View File

@@ -46,6 +46,7 @@
#include <linux/pci.h>
#include <linux/smp.h>
#include <linux/syscore_ops.h>
#include <linux/rcupdate.h>
#include <asm/cpufeature.h>
#include <asm/e820/api.h>
@@ -100,7 +101,7 @@ static int have_wrcomb(void)
if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
dev->revision <= 5) {
pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
@@ -110,7 +111,7 @@ static int have_wrcomb(void)
*/
if (dev->vendor == PCI_VENDOR_ID_INTEL &&
dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
@@ -312,24 +313,24 @@ int mtrr_add_page(unsigned long base, unsigned long size,
return error;
if (type >= MTRR_NUM_TYPES) {
pr_warn("mtrr: type: %u invalid\n", type);
pr_warn("type: %u invalid\n", type);
return -EINVAL;
}
/* If the type is WC, check that this processor supports it */
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
pr_warn("mtrr: your processor doesn't support write-combining\n");
pr_warn("your processor doesn't support write-combining\n");
return -ENOSYS;
}
if (!size) {
pr_warn("mtrr: zero sized request\n");
pr_warn("zero sized request\n");
return -EINVAL;
}
if ((base | (base + size - 1)) >>
(boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
pr_warn("mtrr: base or size exceeds the MTRR width\n");
pr_warn("base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -360,8 +361,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
} else if (types_compatible(type, ltype))
continue;
}
pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
" 0x%lx000,0x%lx000\n", base, size, lbase,
pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
lsize);
goto out;
}
@@ -369,7 +369,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
if (ltype != type) {
if (types_compatible(type, ltype))
continue;
pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
base, size, mtrr_attrib_to_str(ltype),
mtrr_attrib_to_str(type));
goto out;
@@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
}
}
} else {
pr_info("mtrr: no more MTRRs available\n");
pr_info("no more MTRRs available\n");
}
error = i;
out:
@@ -407,8 +407,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
static int mtrr_check(unsigned long base, unsigned long size)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
pr_warn("size and base must be multiples of 4 kiB\n");
pr_debug("size: 0x%lx base: 0x%lx\n", size, base);
dump_stack();
return -1;
}
@@ -499,22 +499,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
}
}
if (reg < 0) {
pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
pr_debug("no MTRR for %lx000,%lx000 found\n",
base, size);
goto out;
}
}
if (reg >= max) {
pr_warn("mtrr: register: %d too big\n", reg);
pr_warn("register: %d too big\n", reg);
goto out;
}
mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) {
pr_warn("mtrr: MTRR %d not used\n", reg);
pr_warn("MTRR %d not used\n", reg);
goto out;
}
if (mtrr_usage_table[reg] < 1) {
pr_warn("mtrr: reg: %d has count=0\n", reg);
pr_warn("reg: %d has count=0\n", reg);
goto out;
}
if (--mtrr_usage_table[reg] < 1)
@@ -775,7 +775,7 @@ void __init mtrr_bp_init(void)
}
if (!mtrr_enabled()) {
pr_info("MTRR: Disabled\n");
pr_info("Disabled\n");
/*
* PAT initialization relies on MTRR's rendezvous handler.
@@ -793,6 +793,9 @@ void mtrr_ap_init(void)
if (!use_intel() || mtrr_aps_delayed_init)
return;
rcu_cpu_starting(smp_processor_id());
/*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
* changed, but this routine will be called in cpu boot time,

View File

@@ -27,7 +27,7 @@
* exists, use it for populating initial_apicid and cpu topology
* detection.
*/
void detect_extended_topology(struct cpuinfo_x86 *c)
int detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
@@ -36,7 +36,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
static bool printed;
if (c->cpuid_level < 0xb)
return;
return -1;
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
@@ -44,7 +44,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
* check if the cpuid leaf 0xb is actually implemented.
*/
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
return;
return -1;
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
@@ -95,6 +95,6 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
c->cpu_core_id);
printed = 1;
}
return;
#endif
return 0;
}