Merge branch 'perf/uprobes' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/uprobes
This commit is contained in:
@@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event)
|
||||
|
||||
/* mark unused */
|
||||
event->hw.extra_reg.idx = EXTRA_REG_NONE;
|
||||
|
||||
/* mark not used */
|
||||
event->hw.extra_reg.idx = EXTRA_REG_NONE;
|
||||
event->hw.branch_reg.idx = EXTRA_REG_NONE;
|
||||
|
||||
return x86_pmu.hw_config(event);
|
||||
@@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
|
||||
int idx, handled = 0;
|
||||
u64 val;
|
||||
|
||||
perf_sample_data_init(&data, 0);
|
||||
|
||||
cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
|
||||
/*
|
||||
@@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
|
||||
* event overflow
|
||||
*/
|
||||
handled++;
|
||||
data.period = event->hw.last_period;
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
if (!x86_perf_event_set_period(event))
|
||||
continue;
|
||||
|
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)
|
||||
|
||||
static int amd_pmu_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = x86_pmu_hw_config(event);
|
||||
int ret;
|
||||
|
||||
/* pass precise event sampling to ibs: */
|
||||
if (event->attr.precise_ip && get_ibs_caps())
|
||||
return -ENOENT;
|
||||
|
||||
ret = x86_pmu_hw_config(event);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||
* when we come here
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_counters; i++) {
|
||||
if (nb->owners[i] == event) {
|
||||
cmpxchg(nb->owners+i, event, NULL);
|
||||
if (cmpxchg(nb->owners + i, event, NULL) == event)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -9,6 +9,7 @@
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/ptrace.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
|
||||
@@ -16,36 +17,591 @@ static u32 ibs_caps;
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
|
||||
|
||||
static struct pmu perf_ibs;
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#include <asm/nmi.h>
|
||||
|
||||
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
|
||||
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
|
||||
|
||||
enum ibs_states {
|
||||
IBS_ENABLED = 0,
|
||||
IBS_STARTED = 1,
|
||||
IBS_STOPPING = 2,
|
||||
|
||||
IBS_MAX_STATES,
|
||||
};
|
||||
|
||||
struct cpu_perf_ibs {
|
||||
struct perf_event *event;
|
||||
unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
|
||||
};
|
||||
|
||||
struct perf_ibs {
|
||||
struct pmu pmu;
|
||||
unsigned int msr;
|
||||
u64 config_mask;
|
||||
u64 cnt_mask;
|
||||
u64 enable_mask;
|
||||
u64 valid_mask;
|
||||
u64 max_period;
|
||||
unsigned long offset_mask[1];
|
||||
int offset_max;
|
||||
struct cpu_perf_ibs __percpu *pcpu;
|
||||
u64 (*get_count)(u64 config);
|
||||
};
|
||||
|
||||
struct perf_ibs_data {
|
||||
u32 size;
|
||||
union {
|
||||
u32 data[0]; /* data buffer starts here */
|
||||
u32 caps;
|
||||
};
|
||||
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
|
||||
};
|
||||
|
||||
static int
|
||||
perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
|
||||
{
|
||||
s64 left = local64_read(&hwc->period_left);
|
||||
s64 period = hwc->sample_period;
|
||||
int overflow = 0;
|
||||
|
||||
/*
|
||||
* If we are way outside a reasonable range then just skip forward:
|
||||
*/
|
||||
if (unlikely(left <= -period)) {
|
||||
left = period;
|
||||
local64_set(&hwc->period_left, left);
|
||||
hwc->last_period = period;
|
||||
overflow = 1;
|
||||
}
|
||||
|
||||
if (unlikely(left < (s64)min)) {
|
||||
left += period;
|
||||
local64_set(&hwc->period_left, left);
|
||||
hwc->last_period = period;
|
||||
overflow = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the hw period that triggers the sw overflow is too short
|
||||
* we might hit the irq handler. This biases the results.
|
||||
* Thus we shorten the next-to-last period and set the last
|
||||
* period to the max period.
|
||||
*/
|
||||
if (left > max) {
|
||||
left -= max;
|
||||
if (left > max)
|
||||
left = max;
|
||||
else if (left < min)
|
||||
left = min;
|
||||
}
|
||||
|
||||
*hw_period = (u64)left;
|
||||
|
||||
return overflow;
|
||||
}
|
||||
|
||||
static int
|
||||
perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int shift = 64 - width;
|
||||
u64 prev_raw_count;
|
||||
u64 delta;
|
||||
|
||||
/*
|
||||
* Careful: an NMI might modify the previous event value.
|
||||
*
|
||||
* Our tactic to handle this is to first atomically read and
|
||||
* exchange a new raw count - then add that new-prev delta
|
||||
* count to the generic event atomically:
|
||||
*/
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
new_raw_count) != prev_raw_count)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Now we have the new raw value and have updated the prev
|
||||
* timestamp already. We can now calculate the elapsed delta
|
||||
* (event-)time and add that to the generic event.
|
||||
*
|
||||
* Careful, not all hw sign-extends above the physical width
|
||||
* of the count.
|
||||
*/
|
||||
delta = (new_raw_count << shift) - (prev_raw_count << shift);
|
||||
delta >>= shift;
|
||||
|
||||
local64_add(delta, &event->count);
|
||||
local64_sub(delta, &hwc->period_left);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct perf_ibs perf_ibs_fetch;
|
||||
static struct perf_ibs perf_ibs_op;
|
||||
|
||||
static struct perf_ibs *get_ibs_pmu(int type)
|
||||
{
|
||||
if (perf_ibs_fetch.pmu.type == type)
|
||||
return &perf_ibs_fetch;
|
||||
if (perf_ibs_op.pmu.type == type)
|
||||
return &perf_ibs_op;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use IBS for precise event sampling:
|
||||
*
|
||||
* perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
|
||||
* perf record -a -e r076:p ... # same as -e cpu-cycles:p
|
||||
* perf record -a -e r0C1:p ... # use ibs op counting micro-ops
|
||||
*
|
||||
* IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
|
||||
* MSRC001_1033) is used to select either cycle or micro-ops counting
|
||||
* mode.
|
||||
*
|
||||
* The rip of IBS samples has skid 0. Thus, IBS supports precise
|
||||
* levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
|
||||
* rip is invalid when IBS was not able to record the rip correctly.
|
||||
* We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
|
||||
*
|
||||
*/
|
||||
static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
|
||||
{
|
||||
switch (event->attr.precise_ip) {
|
||||
case 0:
|
||||
return -ENOENT;
|
||||
case 1:
|
||||
case 2:
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
switch (event->attr.type) {
|
||||
case PERF_TYPE_HARDWARE:
|
||||
switch (event->attr.config) {
|
||||
case PERF_COUNT_HW_CPU_CYCLES:
|
||||
*config = 0;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case PERF_TYPE_RAW:
|
||||
switch (event->attr.config) {
|
||||
case 0x0076:
|
||||
*config = 0;
|
||||
return 0;
|
||||
case 0x00C1:
|
||||
*config = IBS_OP_CNT_CTL;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int perf_ibs_init(struct perf_event *event)
|
||||
{
|
||||
if (perf_ibs.type != event->attr.type)
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_ibs *perf_ibs;
|
||||
u64 max_cnt, config;
|
||||
int ret;
|
||||
|
||||
perf_ibs = get_ibs_pmu(event->attr.type);
|
||||
if (perf_ibs) {
|
||||
config = event->attr.config;
|
||||
} else {
|
||||
perf_ibs = &perf_ibs_op;
|
||||
ret = perf_ibs_precise_event(event, &config);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (event->pmu != &perf_ibs->pmu)
|
||||
return -ENOENT;
|
||||
|
||||
if (config & ~perf_ibs->config_mask)
|
||||
return -EINVAL;
|
||||
|
||||
if (hwc->sample_period) {
|
||||
if (config & perf_ibs->cnt_mask)
|
||||
/* raw max_cnt may not be set */
|
||||
return -EINVAL;
|
||||
if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
|
||||
/*
|
||||
* lower 4 bits can not be set in ibs max cnt,
|
||||
* but allowing it in case we adjust the
|
||||
* sample period to set a frequency.
|
||||
*/
|
||||
return -EINVAL;
|
||||
hwc->sample_period &= ~0x0FULL;
|
||||
if (!hwc->sample_period)
|
||||
hwc->sample_period = 0x10;
|
||||
} else {
|
||||
max_cnt = config & perf_ibs->cnt_mask;
|
||||
config &= ~perf_ibs->cnt_mask;
|
||||
event->attr.sample_period = max_cnt << 4;
|
||||
hwc->sample_period = event->attr.sample_period;
|
||||
}
|
||||
|
||||
if (!hwc->sample_period)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If we modify hwc->sample_period, we also need to update
|
||||
* hwc->last_period and hwc->period_left.
|
||||
*/
|
||||
hwc->last_period = hwc->sample_period;
|
||||
local64_set(&hwc->period_left, hwc->sample_period);
|
||||
|
||||
hwc->config_base = perf_ibs->msr;
|
||||
hwc->config = config;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
|
||||
struct hw_perf_event *hwc, u64 *period)
|
||||
{
|
||||
int overflow;
|
||||
|
||||
/* ignore lower 4 bits in min count: */
|
||||
overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
|
||||
local64_set(&hwc->prev_count, 0);
|
||||
|
||||
return overflow;
|
||||
}
|
||||
|
||||
static u64 get_ibs_fetch_count(u64 config)
|
||||
{
|
||||
return (config & IBS_FETCH_CNT) >> 12;
|
||||
}
|
||||
|
||||
static u64 get_ibs_op_count(u64 config)
|
||||
{
|
||||
u64 count = 0;
|
||||
|
||||
if (config & IBS_OP_VAL)
|
||||
count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
|
||||
|
||||
if (ibs_caps & IBS_CAPS_RDWROPCNT)
|
||||
count += (config & IBS_OP_CUR_CNT) >> 32;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static void
|
||||
perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
|
||||
u64 *config)
|
||||
{
|
||||
u64 count = perf_ibs->get_count(*config);
|
||||
|
||||
/*
|
||||
* Set width to 64 since we do not overflow on max width but
|
||||
* instead on max count. In perf_ibs_set_period() we clear
|
||||
* prev count manually on overflow.
|
||||
*/
|
||||
while (!perf_event_try_update(event, count, 64)) {
|
||||
rdmsrl(event->hw.config_base, *config);
|
||||
count = perf_ibs->get_count(*config);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
|
||||
struct hw_perf_event *hwc, u64 config)
|
||||
{
|
||||
wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Erratum #420 Instruction-Based Sampling Engine May Generate
|
||||
* Interrupt that Cannot Be Cleared:
|
||||
*
|
||||
* Must clear counter mask first, then clear the enable bit. See
|
||||
* Revision Guide for AMD Family 10h Processors, Publication #41322.
|
||||
*/
|
||||
static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
|
||||
struct hw_perf_event *hwc, u64 config)
|
||||
{
|
||||
config &= ~perf_ibs->cnt_mask;
|
||||
wrmsrl(hwc->config_base, config);
|
||||
config &= ~perf_ibs->enable_mask;
|
||||
wrmsrl(hwc->config_base, config);
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot restore the ibs pmu state, so we always needs to update
|
||||
* the event while stopping it and then reset the state when starting
|
||||
* again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
|
||||
* perf_ibs_start()/perf_ibs_stop() and instead always do it.
|
||||
*/
|
||||
static void perf_ibs_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
u64 period;
|
||||
|
||||
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
|
||||
hwc->state = 0;
|
||||
|
||||
perf_ibs_set_period(perf_ibs, hwc, &period);
|
||||
set_bit(IBS_STARTED, pcpu->state);
|
||||
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static void perf_ibs_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
u64 config;
|
||||
int stopping;
|
||||
|
||||
stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
|
||||
|
||||
if (!stopping && (hwc->state & PERF_HES_UPTODATE))
|
||||
return;
|
||||
|
||||
rdmsrl(hwc->config_base, config);
|
||||
|
||||
if (stopping) {
|
||||
set_bit(IBS_STOPPING, pcpu->state);
|
||||
perf_ibs_disable_event(perf_ibs, hwc, config);
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
}
|
||||
|
||||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Clear valid bit to not count rollovers on update, rollovers
|
||||
* are only updated in the irq handler.
|
||||
*/
|
||||
config &= ~perf_ibs->valid_mask;
|
||||
|
||||
perf_ibs_event_update(perf_ibs, event, &config);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
static int perf_ibs_add(struct perf_event *event, int flags)
|
||||
{
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
|
||||
if (test_and_set_bit(IBS_ENABLED, pcpu->state))
|
||||
return -ENOSPC;
|
||||
|
||||
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
pcpu->event = event;
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
perf_ibs_start(event, PERF_EF_RELOAD);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_ibs_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
|
||||
if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
|
||||
return;
|
||||
|
||||
perf_ibs_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
pcpu->event = NULL;
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static struct pmu perf_ibs = {
|
||||
.event_init= perf_ibs_init,
|
||||
.add= perf_ibs_add,
|
||||
.del= perf_ibs_del,
|
||||
static void perf_ibs_read(struct perf_event *event) { }
|
||||
|
||||
static struct perf_ibs perf_ibs_fetch = {
|
||||
.pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
|
||||
.event_init = perf_ibs_init,
|
||||
.add = perf_ibs_add,
|
||||
.del = perf_ibs_del,
|
||||
.start = perf_ibs_start,
|
||||
.stop = perf_ibs_stop,
|
||||
.read = perf_ibs_read,
|
||||
},
|
||||
.msr = MSR_AMD64_IBSFETCHCTL,
|
||||
.config_mask = IBS_FETCH_CONFIG_MASK,
|
||||
.cnt_mask = IBS_FETCH_MAX_CNT,
|
||||
.enable_mask = IBS_FETCH_ENABLE,
|
||||
.valid_mask = IBS_FETCH_VAL,
|
||||
.max_period = IBS_FETCH_MAX_CNT << 4,
|
||||
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
|
||||
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
|
||||
|
||||
.get_count = get_ibs_fetch_count,
|
||||
};
|
||||
|
||||
static struct perf_ibs perf_ibs_op = {
|
||||
.pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
|
||||
.event_init = perf_ibs_init,
|
||||
.add = perf_ibs_add,
|
||||
.del = perf_ibs_del,
|
||||
.start = perf_ibs_start,
|
||||
.stop = perf_ibs_stop,
|
||||
.read = perf_ibs_read,
|
||||
},
|
||||
.msr = MSR_AMD64_IBSOPCTL,
|
||||
.config_mask = IBS_OP_CONFIG_MASK,
|
||||
.cnt_mask = IBS_OP_MAX_CNT,
|
||||
.enable_mask = IBS_OP_ENABLE,
|
||||
.valid_mask = IBS_OP_VAL,
|
||||
.max_period = IBS_OP_MAX_CNT << 4,
|
||||
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
|
||||
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
|
||||
|
||||
.get_count = get_ibs_op_count,
|
||||
};
|
||||
|
||||
static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
|
||||
{
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
struct perf_event *event = pcpu->event;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_sample_data data;
|
||||
struct perf_raw_record raw;
|
||||
struct pt_regs regs;
|
||||
struct perf_ibs_data ibs_data;
|
||||
int offset, size, check_rip, offset_max, throttle = 0;
|
||||
unsigned int msr;
|
||||
u64 *buf, *config, period;
|
||||
|
||||
if (!test_bit(IBS_STARTED, pcpu->state)) {
|
||||
/*
|
||||
* Catch spurious interrupts after stopping IBS: After
|
||||
* disabling IBS there could be still incomming NMIs
|
||||
* with samples that even have the valid bit cleared.
|
||||
* Mark all this NMIs as handled.
|
||||
*/
|
||||
return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
|
||||
}
|
||||
|
||||
msr = hwc->config_base;
|
||||
buf = ibs_data.regs;
|
||||
rdmsrl(msr, *buf);
|
||||
if (!(*buf++ & perf_ibs->valid_mask))
|
||||
return 0;
|
||||
|
||||
config = &ibs_data.regs[0];
|
||||
perf_ibs_event_update(perf_ibs, event, config);
|
||||
perf_sample_data_init(&data, 0, hwc->last_period);
|
||||
if (!perf_ibs_set_period(perf_ibs, hwc, &period))
|
||||
goto out; /* no sw counter overflow */
|
||||
|
||||
ibs_data.caps = ibs_caps;
|
||||
size = 1;
|
||||
offset = 1;
|
||||
check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW)
|
||||
offset_max = perf_ibs->offset_max;
|
||||
else if (check_rip)
|
||||
offset_max = 2;
|
||||
else
|
||||
offset_max = 1;
|
||||
do {
|
||||
rdmsrl(msr + offset, *buf++);
|
||||
size++;
|
||||
offset = find_next_bit(perf_ibs->offset_mask,
|
||||
perf_ibs->offset_max,
|
||||
offset + 1);
|
||||
} while (offset < offset_max);
|
||||
ibs_data.size = sizeof(u64) * size;
|
||||
|
||||
regs = *iregs;
|
||||
if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
|
||||
regs.flags &= ~PERF_EFLAGS_EXACT;
|
||||
} else {
|
||||
instruction_pointer_set(®s, ibs_data.regs[1]);
|
||||
regs.flags |= PERF_EFLAGS_EXACT;
|
||||
}
|
||||
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
raw.size = sizeof(u32) + ibs_data.size;
|
||||
raw.data = ibs_data.data;
|
||||
data.raw = &raw;
|
||||
}
|
||||
|
||||
throttle = perf_event_overflow(event, &data, ®s);
|
||||
out:
|
||||
if (throttle)
|
||||
perf_ibs_disable_event(perf_ibs, hwc, *config);
|
||||
else
|
||||
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __kprobes
|
||||
perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
|
||||
{
|
||||
int handled = 0;
|
||||
|
||||
handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
|
||||
handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
|
||||
|
||||
if (handled)
|
||||
inc_irq_stat(apic_perf_irqs);
|
||||
|
||||
return handled;
|
||||
}
|
||||
|
||||
static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
|
||||
{
|
||||
struct cpu_perf_ibs __percpu *pcpu;
|
||||
int ret;
|
||||
|
||||
pcpu = alloc_percpu(struct cpu_perf_ibs);
|
||||
if (!pcpu)
|
||||
return -ENOMEM;
|
||||
|
||||
perf_ibs->pcpu = pcpu;
|
||||
|
||||
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
|
||||
if (ret) {
|
||||
perf_ibs->pcpu = NULL;
|
||||
free_percpu(pcpu);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __init int perf_event_ibs_init(void)
|
||||
{
|
||||
if (!ibs_caps)
|
||||
return -ENODEV; /* ibs not supported by the cpu */
|
||||
|
||||
perf_pmu_register(&perf_ibs, "ibs", -1);
|
||||
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
|
||||
if (ibs_caps & IBS_CAPS_OPCNT)
|
||||
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
|
||||
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
|
||||
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
|
||||
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
|
||||
|
||||
return 0;
|
||||
|
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
|
||||
u64 status;
|
||||
int handled;
|
||||
|
||||
perf_sample_data_init(&data, 0);
|
||||
|
||||
cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
|
||||
/*
|
||||
@@ -1082,7 +1080,7 @@ again:
|
||||
if (!intel_pmu_save_and_restart(event))
|
||||
continue;
|
||||
|
||||
data.period = event->hw.last_period;
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
if (has_branch_stack(event))
|
||||
data.br_stack = &cpuc->lbr_stack;
|
||||
|
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void)
|
||||
|
||||
ds->bts_index = ds->bts_buffer_base;
|
||||
|
||||
perf_sample_data_init(&data, 0);
|
||||
data.period = event->hw.last_period;
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
regs.ip = 0;
|
||||
|
||||
/*
|
||||
@@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
if (!intel_pmu_save_and_restart(event))
|
||||
return;
|
||||
|
||||
perf_sample_data_init(&data, 0);
|
||||
data.period = event->hw.last_period;
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
/*
|
||||
* We use the interrupt regs as a base because the PEBS record
|
||||
|
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
|
||||
int idx, handled = 0;
|
||||
u64 val;
|
||||
|
||||
perf_sample_data_init(&data, 0);
|
||||
|
||||
cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
|
||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
|
||||
handled += overflow;
|
||||
|
||||
/* event overflow for sure */
|
||||
data.period = event->hw.last_period;
|
||||
perf_sample_data_init(&data, 0, hwc->last_period);
|
||||
|
||||
if (!x86_perf_event_set_period(event))
|
||||
continue;
|
||||
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
}
|
||||
|
@@ -24,40 +24,21 @@
|
||||
#include <trace/syscall.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/kprobes.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/nops.h>
|
||||
#include <asm/nmi.h>
|
||||
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
|
||||
/*
|
||||
* modifying_code is set to notify NMIs that they need to use
|
||||
* memory barriers when entering or exiting. But we don't want
|
||||
* to burden NMIs with unnecessary memory barriers when code
|
||||
* modification is not being done (which is most of the time).
|
||||
*
|
||||
* A mutex is already held when ftrace_arch_code_modify_prepare
|
||||
* and post_process are called. No locks need to be taken here.
|
||||
*
|
||||
* Stop machine will make sure currently running NMIs are done
|
||||
* and new NMIs will see the updated variable before we need
|
||||
* to worry about NMIs doing memory barriers.
|
||||
*/
|
||||
static int modifying_code __read_mostly;
|
||||
static DEFINE_PER_CPU(int, save_modifying_code);
|
||||
|
||||
int ftrace_arch_code_modify_prepare(void)
|
||||
{
|
||||
set_kernel_text_rw();
|
||||
set_all_modules_text_rw();
|
||||
modifying_code = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ftrace_arch_code_modify_post_process(void)
|
||||
{
|
||||
modifying_code = 0;
|
||||
set_all_modules_text_ro();
|
||||
set_kernel_text_ro();
|
||||
return 0;
|
||||
@@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
|
||||
return calc.code;
|
||||
}
|
||||
|
||||
/*
|
||||
* Modifying code must take extra care. On an SMP machine, if
|
||||
* the code being modified is also being executed on another CPU
|
||||
* that CPU will have undefined results and possibly take a GPF.
|
||||
* We use kstop_machine to stop other CPUS from exectuing code.
|
||||
* But this does not stop NMIs from happening. We still need
|
||||
* to protect against that. We separate out the modification of
|
||||
* the code to take care of this.
|
||||
*
|
||||
* Two buffers are added: An IP buffer and a "code" buffer.
|
||||
*
|
||||
* 1) Put the instruction pointer into the IP buffer
|
||||
* and the new code into the "code" buffer.
|
||||
* 2) Wait for any running NMIs to finish and set a flag that says
|
||||
* we are modifying code, it is done in an atomic operation.
|
||||
* 3) Write the code
|
||||
* 4) clear the flag.
|
||||
* 5) Wait for any running NMIs to finish.
|
||||
*
|
||||
* If an NMI is executed, the first thing it does is to call
|
||||
* "ftrace_nmi_enter". This will check if the flag is set to write
|
||||
* and if it is, it will write what is in the IP and "code" buffers.
|
||||
*
|
||||
* The trick is, it does not matter if everyone is writing the same
|
||||
* content to the code location. Also, if a CPU is executing code
|
||||
* it is OK to write to that code location if the contents being written
|
||||
* are the same as what exists.
|
||||
*/
|
||||
|
||||
#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
|
||||
static atomic_t nmi_running = ATOMIC_INIT(0);
|
||||
static int mod_code_status; /* holds return value of text write */
|
||||
static void *mod_code_ip; /* holds the IP to write to */
|
||||
static const void *mod_code_newcode; /* holds the text to write to the IP */
|
||||
|
||||
static unsigned nmi_wait_count;
|
||||
static atomic_t nmi_update_count = ATOMIC_INIT(0);
|
||||
|
||||
int ftrace_arch_read_dyn_info(char *buf, int size)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = snprintf(buf, size, "%u %u",
|
||||
nmi_wait_count,
|
||||
atomic_read(&nmi_update_count));
|
||||
return r;
|
||||
}
|
||||
|
||||
static void clear_mod_flag(void)
|
||||
{
|
||||
int old = atomic_read(&nmi_running);
|
||||
|
||||
for (;;) {
|
||||
int new = old & ~MOD_CODE_WRITE_FLAG;
|
||||
|
||||
if (old == new)
|
||||
break;
|
||||
|
||||
old = atomic_cmpxchg(&nmi_running, old, new);
|
||||
}
|
||||
}
|
||||
|
||||
static void ftrace_mod_code(void)
|
||||
{
|
||||
/*
|
||||
* Yes, more than one CPU process can be writing to mod_code_status.
|
||||
* (and the code itself)
|
||||
* But if one were to fail, then they all should, and if one were
|
||||
* to succeed, then they all should.
|
||||
*/
|
||||
mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
|
||||
MCOUNT_INSN_SIZE);
|
||||
|
||||
/* if we fail, then kill any new writers */
|
||||
if (mod_code_status)
|
||||
clear_mod_flag();
|
||||
}
|
||||
|
||||
void ftrace_nmi_enter(void)
|
||||
{
|
||||
__this_cpu_write(save_modifying_code, modifying_code);
|
||||
|
||||
if (!__this_cpu_read(save_modifying_code))
|
||||
return;
|
||||
|
||||
if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
|
||||
smp_rmb();
|
||||
ftrace_mod_code();
|
||||
atomic_inc(&nmi_update_count);
|
||||
}
|
||||
/* Must have previous changes seen before executions */
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
void ftrace_nmi_exit(void)
|
||||
{
|
||||
if (!__this_cpu_read(save_modifying_code))
|
||||
return;
|
||||
|
||||
/* Finish all executions before clearing nmi_running */
|
||||
smp_mb();
|
||||
atomic_dec(&nmi_running);
|
||||
}
|
||||
|
||||
static void wait_for_nmi_and_set_mod_flag(void)
|
||||
{
|
||||
if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
|
||||
return;
|
||||
|
||||
do {
|
||||
cpu_relax();
|
||||
} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
|
||||
|
||||
nmi_wait_count++;
|
||||
}
|
||||
|
||||
static void wait_for_nmi(void)
|
||||
{
|
||||
if (!atomic_read(&nmi_running))
|
||||
return;
|
||||
|
||||
do {
|
||||
cpu_relax();
|
||||
} while (atomic_read(&nmi_running));
|
||||
|
||||
nmi_wait_count++;
|
||||
}
|
||||
|
||||
static inline int
|
||||
within(unsigned long addr, unsigned long start, unsigned long end)
|
||||
{
|
||||
@@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
|
||||
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
|
||||
ip = (unsigned long)__va(__pa(ip));
|
||||
|
||||
mod_code_ip = (void *)ip;
|
||||
mod_code_newcode = new_code;
|
||||
|
||||
/* The buffers need to be visible before we let NMIs write them */
|
||||
smp_mb();
|
||||
|
||||
wait_for_nmi_and_set_mod_flag();
|
||||
|
||||
/* Make sure all running NMIs have finished before we write the code */
|
||||
smp_mb();
|
||||
|
||||
ftrace_mod_code();
|
||||
|
||||
/* Make sure the write happens before clearing the bit */
|
||||
smp_mb();
|
||||
|
||||
clear_mod_flag();
|
||||
wait_for_nmi();
|
||||
|
||||
return mod_code_status;
|
||||
return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
|
||||
}
|
||||
|
||||
static const unsigned char *ftrace_nop_replace(void)
|
||||
@@ -334,6 +168,347 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int modifying_ftrace_code __read_mostly;
|
||||
|
||||
/*
|
||||
* A breakpoint was added to the code address we are about to
|
||||
* modify, and this is the handle that will just skip over it.
|
||||
* We are either changing a nop into a trace call, or a trace
|
||||
* call to a nop. While the change is taking place, we treat
|
||||
* it just like it was a nop.
|
||||
*/
|
||||
int ftrace_int3_handler(struct pt_regs *regs)
|
||||
{
|
||||
if (WARN_ON_ONCE(!regs))
|
||||
return 0;
|
||||
|
||||
if (!ftrace_location(regs->ip - 1))
|
||||
return 0;
|
||||
|
||||
regs->ip += MCOUNT_INSN_SIZE - 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int ftrace_write(unsigned long ip, const char *val, int size)
|
||||
{
|
||||
/*
|
||||
* On x86_64, kernel text mappings are mapped read-only with
|
||||
* CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
|
||||
* of the kernel text mapping to modify the kernel text.
|
||||
*
|
||||
* For 32bit kernels, these mappings are same and we can use
|
||||
* kernel identity mapping to modify code.
|
||||
*/
|
||||
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
|
||||
ip = (unsigned long)__va(__pa(ip));
|
||||
|
||||
return probe_kernel_write((void *)ip, val, size);
|
||||
}
|
||||
|
||||
static int add_break(unsigned long ip, const char *old)
|
||||
{
|
||||
unsigned char replaced[MCOUNT_INSN_SIZE];
|
||||
unsigned char brk = BREAKPOINT_INSTRUCTION;
|
||||
|
||||
if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
|
||||
return -EFAULT;
|
||||
|
||||
/* Make sure it is what we expect it to be */
|
||||
if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (ftrace_write(ip, &brk, 1))
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
|
||||
{
|
||||
unsigned const char *old;
|
||||
unsigned long ip = rec->ip;
|
||||
|
||||
old = ftrace_call_replace(ip, addr);
|
||||
|
||||
return add_break(rec->ip, old);
|
||||
}
|
||||
|
||||
|
||||
static int add_brk_on_nop(struct dyn_ftrace *rec)
|
||||
{
|
||||
unsigned const char *old;
|
||||
|
||||
old = ftrace_nop_replace();
|
||||
|
||||
return add_break(rec->ip, old);
|
||||
}
|
||||
|
||||
static int add_breakpoints(struct dyn_ftrace *rec, int enable)
|
||||
{
|
||||
unsigned long ftrace_addr;
|
||||
int ret;
|
||||
|
||||
ret = ftrace_test_record(rec, enable);
|
||||
|
||||
ftrace_addr = (unsigned long)FTRACE_ADDR;
|
||||
|
||||
switch (ret) {
|
||||
case FTRACE_UPDATE_IGNORE:
|
||||
return 0;
|
||||
|
||||
case FTRACE_UPDATE_MAKE_CALL:
|
||||
/* converting nop to call */
|
||||
return add_brk_on_nop(rec);
|
||||
|
||||
case FTRACE_UPDATE_MAKE_NOP:
|
||||
/* converting a call to a nop */
|
||||
return add_brk_on_call(rec, ftrace_addr);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* On error, we need to remove breakpoints. This needs to
|
||||
* be done caefully. If the address does not currently have a
|
||||
* breakpoint, we know we are done. Otherwise, we look at the
|
||||
* remaining 4 bytes of the instruction. If it matches a nop
|
||||
* we replace the breakpoint with the nop. Otherwise we replace
|
||||
* it with the call instruction.
|
||||
*/
|
||||
static int remove_breakpoint(struct dyn_ftrace *rec)
|
||||
{
|
||||
unsigned char ins[MCOUNT_INSN_SIZE];
|
||||
unsigned char brk = BREAKPOINT_INSTRUCTION;
|
||||
const unsigned char *nop;
|
||||
unsigned long ftrace_addr;
|
||||
unsigned long ip = rec->ip;
|
||||
|
||||
/* If we fail the read, just give up */
|
||||
if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
|
||||
return -EFAULT;
|
||||
|
||||
/* If this does not have a breakpoint, we are done */
|
||||
if (ins[0] != brk)
|
||||
return -1;
|
||||
|
||||
nop = ftrace_nop_replace();
|
||||
|
||||
/*
|
||||
* If the last 4 bytes of the instruction do not match
|
||||
* a nop, then we assume that this is a call to ftrace_addr.
|
||||
*/
|
||||
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
|
||||
/*
|
||||
* For extra paranoidism, we check if the breakpoint is on
|
||||
* a call that would actually jump to the ftrace_addr.
|
||||
* If not, don't touch the breakpoint, we make just create
|
||||
* a disaster.
|
||||
*/
|
||||
ftrace_addr = (unsigned long)FTRACE_ADDR;
|
||||
nop = ftrace_call_replace(ip, ftrace_addr);
|
||||
|
||||
if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return probe_kernel_write((void *)ip, &nop[0], 1);
|
||||
}
|
||||
|
||||
static int add_update_code(unsigned long ip, unsigned const char *new)
|
||||
{
|
||||
/* skip breakpoint */
|
||||
ip++;
|
||||
new++;
|
||||
if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
|
||||
return -EPERM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
|
||||
{
|
||||
unsigned long ip = rec->ip;
|
||||
unsigned const char *new;
|
||||
|
||||
new = ftrace_call_replace(ip, addr);
|
||||
return add_update_code(ip, new);
|
||||
}
|
||||
|
||||
static int add_update_nop(struct dyn_ftrace *rec)
|
||||
{
|
||||
unsigned long ip = rec->ip;
|
||||
unsigned const char *new;
|
||||
|
||||
new = ftrace_nop_replace();
|
||||
return add_update_code(ip, new);
|
||||
}
|
||||
|
||||
static int add_update(struct dyn_ftrace *rec, int enable)
|
||||
{
|
||||
unsigned long ftrace_addr;
|
||||
int ret;
|
||||
|
||||
ret = ftrace_test_record(rec, enable);
|
||||
|
||||
ftrace_addr = (unsigned long)FTRACE_ADDR;
|
||||
|
||||
switch (ret) {
|
||||
case FTRACE_UPDATE_IGNORE:
|
||||
return 0;
|
||||
|
||||
case FTRACE_UPDATE_MAKE_CALL:
|
||||
/* converting nop to call */
|
||||
return add_update_call(rec, ftrace_addr);
|
||||
|
||||
case FTRACE_UPDATE_MAKE_NOP:
|
||||
/* converting a call to a nop */
|
||||
return add_update_nop(rec);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
|
||||
{
|
||||
unsigned long ip = rec->ip;
|
||||
unsigned const char *new;
|
||||
|
||||
new = ftrace_call_replace(ip, addr);
|
||||
|
||||
if (ftrace_write(ip, new, 1))
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int finish_update_nop(struct dyn_ftrace *rec)
|
||||
{
|
||||
unsigned long ip = rec->ip;
|
||||
unsigned const char *new;
|
||||
|
||||
new = ftrace_nop_replace();
|
||||
|
||||
if (ftrace_write(ip, new, 1))
|
||||
return -EPERM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int finish_update(struct dyn_ftrace *rec, int enable)
|
||||
{
|
||||
unsigned long ftrace_addr;
|
||||
int ret;
|
||||
|
||||
ret = ftrace_update_record(rec, enable);
|
||||
|
||||
ftrace_addr = (unsigned long)FTRACE_ADDR;
|
||||
|
||||
switch (ret) {
|
||||
case FTRACE_UPDATE_IGNORE:
|
||||
return 0;
|
||||
|
||||
case FTRACE_UPDATE_MAKE_CALL:
|
||||
/* converting nop to call */
|
||||
return finish_update_call(rec, ftrace_addr);
|
||||
|
||||
case FTRACE_UPDATE_MAKE_NOP:
|
||||
/* converting a call to a nop */
|
||||
return finish_update_nop(rec);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void do_sync_core(void *data)
|
||||
{
|
||||
sync_core();
|
||||
}
|
||||
|
||||
static void run_sync(void)
|
||||
{
|
||||
int enable_irqs = irqs_disabled();
|
||||
|
||||
/* We may be called with interrupts disbled (on bootup). */
|
||||
if (enable_irqs)
|
||||
local_irq_enable();
|
||||
on_each_cpu(do_sync_core, NULL, 1);
|
||||
if (enable_irqs)
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
static void ftrace_replace_code(int enable)
|
||||
{
|
||||
struct ftrace_rec_iter *iter;
|
||||
struct dyn_ftrace *rec;
|
||||
const char *report = "adding breakpoints";
|
||||
int count = 0;
|
||||
int ret;
|
||||
|
||||
for_ftrace_rec_iter(iter) {
|
||||
rec = ftrace_rec_iter_record(iter);
|
||||
|
||||
ret = add_breakpoints(rec, enable);
|
||||
if (ret)
|
||||
goto remove_breakpoints;
|
||||
count++;
|
||||
}
|
||||
|
||||
run_sync();
|
||||
|
||||
report = "updating code";
|
||||
|
||||
for_ftrace_rec_iter(iter) {
|
||||
rec = ftrace_rec_iter_record(iter);
|
||||
|
||||
ret = add_update(rec, enable);
|
||||
if (ret)
|
||||
goto remove_breakpoints;
|
||||
}
|
||||
|
||||
run_sync();
|
||||
|
||||
report = "removing breakpoints";
|
||||
|
||||
for_ftrace_rec_iter(iter) {
|
||||
rec = ftrace_rec_iter_record(iter);
|
||||
|
||||
ret = finish_update(rec, enable);
|
||||
if (ret)
|
||||
goto remove_breakpoints;
|
||||
}
|
||||
|
||||
run_sync();
|
||||
|
||||
return;
|
||||
|
||||
remove_breakpoints:
|
||||
ftrace_bug(ret, rec ? rec->ip : 0);
|
||||
printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
|
||||
for_ftrace_rec_iter(iter) {
|
||||
rec = ftrace_rec_iter_record(iter);
|
||||
remove_breakpoint(rec);
|
||||
}
|
||||
}
|
||||
|
||||
void arch_ftrace_update_code(int command)
|
||||
{
|
||||
modifying_ftrace_code++;
|
||||
|
||||
if (command & FTRACE_UPDATE_CALLS)
|
||||
ftrace_replace_code(1);
|
||||
else if (command & FTRACE_DISABLE_CALLS)
|
||||
ftrace_replace_code(0);
|
||||
|
||||
if (command & FTRACE_UPDATE_TRACE_FUNC)
|
||||
ftrace_update_ftrace_func(ftrace_trace_function);
|
||||
|
||||
if (command & FTRACE_START_FUNC_RET)
|
||||
ftrace_enable_ftrace_graph_caller();
|
||||
else if (command & FTRACE_STOP_FUNC_RET)
|
||||
ftrace_disable_ftrace_graph_caller();
|
||||
|
||||
modifying_ftrace_code--;
|
||||
}
|
||||
|
||||
int __init ftrace_dyn_arch_init(void *data)
|
||||
{
|
||||
/* The return code is retured via data */
|
||||
|
@@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
|
||||
|
||||
#define nmi_to_desc(type) (&nmi_desc[type])
|
||||
|
||||
static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
|
||||
static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
|
||||
{
|
||||
struct nmi_desc *desc = nmi_to_desc(type);
|
||||
struct nmiaction *a;
|
||||
@@ -209,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
|
||||
|
||||
EXPORT_SYMBOL_GPL(unregister_nmi_handler);
|
||||
|
||||
static notrace __kprobes void
|
||||
static __kprobes void
|
||||
pci_serr_error(unsigned char reason, struct pt_regs *regs)
|
||||
{
|
||||
pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
|
||||
@@ -236,7 +236,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
|
||||
outb(reason, NMI_REASON_PORT);
|
||||
}
|
||||
|
||||
static notrace __kprobes void
|
||||
static __kprobes void
|
||||
io_check_error(unsigned char reason, struct pt_regs *regs)
|
||||
{
|
||||
unsigned long i;
|
||||
@@ -263,7 +263,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
|
||||
outb(reason, NMI_REASON_PORT);
|
||||
}
|
||||
|
||||
static notrace __kprobes void
|
||||
static __kprobes void
|
||||
unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
|
||||
{
|
||||
int handled;
|
||||
@@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
|
||||
static DEFINE_PER_CPU(bool, swallow_nmi);
|
||||
static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
|
||||
|
||||
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
|
||||
static __kprobes void default_do_nmi(struct pt_regs *regs)
|
||||
{
|
||||
unsigned char reason = 0;
|
||||
int handled;
|
||||
|
@@ -50,6 +50,7 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/debugreg.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/i387.h>
|
||||
@@ -303,8 +304,13 @@ gp_in_kernel:
|
||||
}
|
||||
|
||||
/* May run on IST stack. */
|
||||
dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
|
||||
dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
/* ftrace must be first, everything else may cause a recursive crash */
|
||||
if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
|
||||
return;
|
||||
#endif
|
||||
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
|
||||
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
|
||||
SIGTRAP) == NOTIFY_STOP)
|
||||
|
@@ -216,9 +216,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||
current_thread_info()->sig_on_uaccess_error = 1;
|
||||
|
||||
/*
|
||||
* 0 is a valid user pointer (in the access_ok sense) on 32-bit and
|
||||
* NULL is a valid user pointer (in the access_ok sense) on 32-bit and
|
||||
* 64-bit, so we don't need to special-case it here. For all the
|
||||
* vsyscalls, 0 means "don't write anything" not "write it at
|
||||
* vsyscalls, NULL means "don't write anything" not "write it at
|
||||
* address 0".
|
||||
*/
|
||||
ret = -EFAULT;
|
||||
@@ -247,7 +247,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||
|
||||
ret = sys_getcpu((unsigned __user *)regs->di,
|
||||
(unsigned __user *)regs->si,
|
||||
0);
|
||||
NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user