Merge branch 'akpm' (patches from Andrew)

Merge more updates from Andrew Morton:

 - a few misc things

 - kexec updates

 - DMA-mapping updates to better support networking DMA operations

 - IPC updates

 - various MM changes to improve DAX fault handling

 - lots of radix-tree changes, mainly to the test suite. All leading up
   to reimplementing the IDA/IDR code to be a wrapper layer over the
   radix-tree. However the final trigger-pulling patch is held off for
   4.11.

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (114 commits)
  radix tree test suite: delete unused rcupdate.c
  radix tree test suite: add new tag check
  radix-tree: ensure counts are initialised
  radix tree test suite: cache recently freed objects
  radix tree test suite: add some more functionality
  idr: reduce the number of bits per level from 8 to 6
  rxrpc: abstract away knowledge of IDR internals
  tpm: use idr_find(), not idr_find_slowpath()
  idr: add ida_is_empty
  radix tree test suite: check multiorder iteration
  radix-tree: fix replacement for multiorder entries
  radix-tree: add radix_tree_split_preload()
  radix-tree: add radix_tree_split
  radix-tree: add radix_tree_join
  radix-tree: delete radix_tree_range_tag_if_tagged()
  radix-tree: delete radix_tree_locate_item()
  radix-tree: improve multiorder iterators
  btrfs: fix race in btrfs_free_dummy_fs_info()
  radix-tree: improve dump output
  radix-tree: make radix_tree_find_next_bit more useful
  ...
This commit is contained in:
Linus Torvalds
2016-12-14 17:25:18 -08:00
140 changed files with 3435 additions and 2225 deletions

View File

@@ -84,6 +84,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o

View File

@@ -598,11 +598,11 @@ return_normal:
/*
* Wait for the other CPUs to be notified and be waiting for us:
*/
time_left = loops_per_jiffy * HZ;
time_left = MSEC_PER_SEC;
while (kgdb_do_roundup && --time_left &&
(atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) !=
online_cpus)
cpu_relax();
udelay(1000);
if (!time_left)
pr_crit("Timed out waiting for secondary CPUs.\n");

View File

@@ -30,6 +30,7 @@
char kdb_prompt_str[CMD_BUFLEN];
int kdb_trap_printk;
int kdb_printf_cpu = -1;
static int kgdb_transition_check(char *buffer)
{
@@ -554,31 +555,26 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
int linecount;
int colcount;
int logging, saved_loglevel = 0;
int saved_trap_printk;
int got_printf_lock = 0;
int retlen = 0;
int fnd, len;
int this_cpu, old_cpu;
char *cp, *cp2, *cphold = NULL, replaced_byte = ' ';
char *moreprompt = "more> ";
struct console *c = console_drivers;
static DEFINE_SPINLOCK(kdb_printf_lock);
unsigned long uninitialized_var(flags);
preempt_disable();
saved_trap_printk = kdb_trap_printk;
kdb_trap_printk = 0;
/* Serialize kdb_printf if multiple cpus try to write at once.
* But if any cpu goes recursive in kdb, just print the output,
* even if it is interleaved with any other text.
*/
if (!KDB_STATE(PRINTF_LOCK)) {
KDB_STATE_SET(PRINTF_LOCK);
spin_lock_irqsave(&kdb_printf_lock, flags);
got_printf_lock = 1;
atomic_inc(&kdb_event);
} else {
__acquire(kdb_printf_lock);
local_irq_save(flags);
this_cpu = smp_processor_id();
for (;;) {
old_cpu = cmpxchg(&kdb_printf_cpu, -1, this_cpu);
if (old_cpu == -1 || old_cpu == this_cpu)
break;
cpu_relax();
}
diag = kdbgetintenv("LINES", &linecount);
@@ -847,16 +843,9 @@ kdb_print_out:
suspend_grep = 0; /* end of what may have been a recursive call */
if (logging)
console_loglevel = saved_loglevel;
if (KDB_STATE(PRINTF_LOCK) && got_printf_lock) {
got_printf_lock = 0;
spin_unlock_irqrestore(&kdb_printf_lock, flags);
KDB_STATE_CLEAR(PRINTF_LOCK);
atomic_dec(&kdb_event);
} else {
__release(kdb_printf_lock);
}
kdb_trap_printk = saved_trap_printk;
preempt_enable();
/* kdb_printf_cpu locked the code above. */
smp_store_release(&kdb_printf_cpu, old_cpu);
local_irq_restore(flags);
return retlen;
}

View File

@@ -60,7 +60,6 @@ int kdb_grep_trailing;
* Kernel debugger state flags
*/
int kdb_flags;
atomic_t kdb_event;
/*
* kdb_lock protects updates to kdb_initial_cpu. Used to

View File

@@ -132,7 +132,6 @@ extern int kdb_state;
#define KDB_STATE_PAGER 0x00000400 /* pager is available */
#define KDB_STATE_GO_SWITCH 0x00000800 /* go is switching
* back to initial cpu */
#define KDB_STATE_PRINTF_LOCK 0x00001000 /* Holds kdb_printf lock */
#define KDB_STATE_WAIT_IPI 0x00002000 /* Waiting for kdb_ipi() NMI */
#define KDB_STATE_RECURSE 0x00004000 /* Recursive entry to kdb */
#define KDB_STATE_IP_ADJUSTED 0x00008000 /* Restart IP has been

View File

@@ -301,7 +301,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
retry:
/* Read the page with vaddr into memory */
ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page,
&vma);
&vma, NULL);
if (ret <= 0)
return ret;
@@ -1712,7 +1712,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
* essentially a kernel access to the memory.
*/
result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page,
NULL);
NULL, NULL);
if (result < 0)
return result;

View File

@@ -1,11 +1,16 @@
#define pr_fmt(fmt) "kcov: " fmt
#define DISABLE_BRANCH_PROFILING
#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/types.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/preempt.h>
#include <linux/printk.h>
#include <linux/sched.h>
#include <linux/slab.h>

View File

@@ -441,6 +441,8 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
while (hole_end <= crashk_res.end) {
unsigned long i;
cond_resched();
if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
break;
/* See if I overlap any of the segments */
@@ -1467,9 +1469,6 @@ static int __init crash_save_vmcoreinfo_init(void)
#endif
VMCOREINFO_NUMBER(PG_head_mask);
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
#ifdef CONFIG_X86
VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
#endif
#ifdef CONFIG_HUGETLB_PAGE
VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
#endif

View File

@@ -1926,7 +1926,8 @@ int vprintk_default(const char *fmt, va_list args)
int r;
#ifdef CONFIG_KGDB_KDB
if (unlikely(kdb_trap_printk)) {
/* Allow to pass printk() to kdb but avoid a recursion. */
if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) {
r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
return r;
}

View File

@@ -809,11 +809,11 @@ void relay_subbufs_consumed(struct rchan *chan,
{
struct rchan_buf *buf;
if (!chan)
if (!chan || cpu >= NR_CPUS)
return;
buf = *per_cpu_ptr(chan->buf, cpu);
if (cpu >= NR_CPUS || !buf || subbufs_consumed > chan->n_subbufs)
if (!buf || subbufs_consumed > chan->n_subbufs)
return;
if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed)

View File

@@ -2491,6 +2491,13 @@ void __set_current_blocked(const sigset_t *newset)
{
struct task_struct *tsk = current;
/*
* In case the signal mask hasn't changed, there is nothing we need
* to do. The current->blocked shouldn't be modified by other task.
*/
if (sigequalsets(&tsk->blocked, newset))
return;
spin_lock_irq(&tsk->sighand->siglock);
__set_task_blocked(tsk, newset);
spin_unlock_irq(&tsk->sighand->siglock);

View File

@@ -2389,9 +2389,11 @@ static void validate_coredump_safety(void)
#ifdef CONFIG_COREDUMP
if (suid_dumpable == SUID_DUMP_ROOT &&
core_pattern[0] != '/' && core_pattern[0] != '|') {
printk(KERN_WARNING "Unsafe core_pattern used with "\
"suid_dumpable=2. Pipe handler or fully qualified "\
"core dump path required.\n");
printk(KERN_WARNING
"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
"Pipe handler or fully qualified core dump path required.\n"
"Set kernel.core_pattern before fs.suid_dumpable.\n"
);
}
#endif
}

View File

@@ -1354,8 +1354,8 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
"warning: process `%s' used the deprecated sysctl "
"system call with ", current->comm);
for (i = 0; i < nlen; i++)
printk("%d.", name[i]);
printk("\n");
printk(KERN_CONT "%d.", name[i]);
printk(KERN_CONT "\n");
}
return;
}

View File

@@ -516,7 +516,8 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
spin_lock_irqsave(&ptr->it_lock, flags);
if ((ptr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) {
if (posix_timer_event(ptr, 0) != 0)
if (IS_ENABLED(CONFIG_POSIX_TIMERS) &&
posix_timer_event(ptr, 0) != 0)
ptr->it_overrun++;
}

View File

@@ -24,32 +24,14 @@
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
#include <linux/perf_event.h>
#include <linux/kthread.h>
/*
* The run state of the lockup detectors is controlled by the content of the
* 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
* bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
*
* 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
* are variables that are only used as an 'interface' between the parameters
* in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
* 'watchdog_thresh' variable is handled differently because its value is not
* boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
* is equal zero.
*/
#define NMI_WATCHDOG_ENABLED_BIT 0
#define SOFT_WATCHDOG_ENABLED_BIT 1
#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
static DEFINE_MUTEX(watchdog_proc_mutex);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
#else
static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
#endif
int __read_mostly nmi_watchdog_enabled;
int __read_mostly soft_watchdog_enabled;
@@ -59,9 +41,6 @@ int __read_mostly watchdog_thresh = 10;
#ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
#else
#define sysctl_softlockup_all_cpu_backtrace 0
#define sysctl_hardlockup_all_cpu_backtrace 0
#endif
static struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -100,50 +79,9 @@ static DEFINE_PER_CPU(bool, soft_watchdog_warn);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
static unsigned long soft_lockup_nmi_warn;
/* boot commands */
/*
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
#ifdef CONFIG_HARDLOCKUP_DETECTOR
unsigned int __read_mostly hardlockup_panic =
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
static unsigned long hardlockup_allcpu_dumped;
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
* cases this function can be called to disable hard lockup detection. This
* function should only be executed once by the boot processor before the
* kernel command line parameters are parsed, because otherwise it is not
* possible to override this in hardlockup_panic_setup().
*/
void hardlockup_detector_disable(void)
{
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
}
static int __init hardlockup_panic_setup(char *str)
{
if (!strncmp(str, "panic", 5))
hardlockup_panic = 1;
else if (!strncmp(str, "nopanic", 7))
hardlockup_panic = 0;
else if (!strncmp(str, "0", 1))
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
else if (!strncmp(str, "1", 1))
watchdog_enabled |= NMI_WATCHDOG_ENABLED;
return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
#endif
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
@@ -264,32 +202,14 @@ void touch_all_softlockup_watchdogs(void)
wq_watchdog_touch(-1);
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
void touch_nmi_watchdog(void)
{
/*
* Using __raw here because some code paths have
* preemption enabled. If preemption is enabled
* then interrupts should be enabled too, in which
* case we shouldn't have to worry about the watchdog
* going off.
*/
raw_cpu_write(watchdog_nmi_touch, true);
touch_softlockup_watchdog();
}
EXPORT_SYMBOL(touch_nmi_watchdog);
#endif
void touch_softlockup_watchdog_sync(void)
{
__this_cpu_write(softlockup_touch_sync, true);
__this_cpu_write(watchdog_touch_ts, 0);
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
/* watchdog detector functions */
static bool is_hardlockup(void)
bool is_hardlockup(void)
{
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
@@ -299,7 +219,6 @@ static bool is_hardlockup(void)
__this_cpu_write(hrtimer_interrupts_saved, hrint);
return false;
}
#endif
static int is_softlockup(unsigned long touch_ts)
{
@@ -313,78 +232,22 @@ static int is_softlockup(unsigned long touch_ts)
return 0;
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.size = sizeof(struct perf_event_attr),
.pinned = 1,
.disabled = 1,
};
/* Callback function for perf event subsystem */
static void watchdog_overflow_callback(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
/* Ensure the watchdog never gets throttled */
event->hw.interrupts = 0;
if (__this_cpu_read(watchdog_nmi_touch) == true) {
__this_cpu_write(watchdog_nmi_touch, false);
return;
}
/* check for a hardlockup
* This is done by making sure our timer interrupt
* is incrementing. The timer interrupt should have
* fired multiple times before we overflow'd. If it hasn't
* then this is a good indication the cpu is stuck
*/
if (is_hardlockup()) {
int this_cpu = smp_processor_id();
struct pt_regs *regs = get_irq_regs();
/* only print hardlockups once */
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
/*
* Perform all-CPU dump only once to avoid multiple hardlockups
* generating interleaving traces
*/
if (sysctl_hardlockup_all_cpu_backtrace &&
!test_and_set_bit(0, &hardlockup_allcpu_dumped))
trigger_allbutself_cpu_backtrace();
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
__this_cpu_write(hard_watchdog_warn, true);
return;
}
__this_cpu_write(hard_watchdog_warn, false);
return;
}
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
static void watchdog_interrupt_count(void)
{
__this_cpu_inc(hrtimer_interrupts);
}
static int watchdog_nmi_enable(unsigned int cpu);
static void watchdog_nmi_disable(unsigned int cpu);
/*
* These two functions are mostly architecture specific
* defining them as weak here.
*/
int __weak watchdog_nmi_enable(unsigned int cpu)
{
return 0;
}
void __weak watchdog_nmi_disable(unsigned int cpu)
{
}
static int watchdog_enable_all_cpus(void);
static void watchdog_disable_all_cpus(void);
@@ -577,109 +440,6 @@ static void watchdog(unsigned int cpu)
watchdog_nmi_disable(cpu);
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
/*
* People like the simple clean cpu node info on boot.
* Reduce the watchdog noise by only printing messages
* that are different from what cpu0 displayed.
*/
static unsigned long cpu0_err;
static int watchdog_nmi_enable(unsigned int cpu)
{
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
/* nothing to do if the hard lockup detector is disabled */
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
goto out;
/* is it already setup and enabled? */
if (event && event->state > PERF_EVENT_STATE_OFF)
goto out;
/* it is setup but not enabled */
if (event != NULL)
goto out_enable;
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
/* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
/* save cpu0 error for future comparision */
if (cpu == 0 && IS_ERR(event))
cpu0_err = PTR_ERR(event);
if (!IS_ERR(event)) {
/* only print for cpu0 or different than cpu0 */
if (cpu == 0 || cpu0_err)
pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
goto out_save;
}
/*
* Disable the hard lockup detector if _any_ CPU fails to set up
* set up the hardware perf event. The watchdog() function checks
* the NMI_WATCHDOG_ENABLED bit periodically.
*
* The barriers are for syncing up watchdog_enabled across all the
* cpus, as clear_bit() does not use barriers.
*/
smp_mb__before_atomic();
clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
smp_mb__after_atomic();
/* skip displaying the same error again */
if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
return PTR_ERR(event);
/* vary the KERN level based on the returned errno */
if (PTR_ERR(event) == -EOPNOTSUPP)
pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
else if (PTR_ERR(event) == -ENOENT)
pr_warn("disabled (cpu%i): hardware events not enabled\n",
cpu);
else
pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
cpu, PTR_ERR(event));
pr_info("Shutting down hard lockup detector on all cpus\n");
return PTR_ERR(event);
/* success path */
out_save:
per_cpu(watchdog_ev, cpu) = event;
out_enable:
perf_event_enable(per_cpu(watchdog_ev, cpu));
out:
return 0;
}
static void watchdog_nmi_disable(unsigned int cpu)
{
struct perf_event *event = per_cpu(watchdog_ev, cpu);
if (event) {
perf_event_disable(event);
per_cpu(watchdog_ev, cpu) = NULL;
/* should be in cleanup, but blocks oprofile */
perf_event_release_kernel(event);
}
if (cpu == 0) {
/* watchdog_nmi_enable() expects this to be zero initially. */
cpu0_err = 0;
}
}
#else
static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
static void watchdog_nmi_disable(unsigned int cpu) { return; }
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
static struct smp_hotplug_thread watchdog_threads = {
.store = &softlockup_watchdog,
.thread_should_run = watchdog_should_run,

227
kernel/watchdog_hld.c Normal file
View File

@@ -0,0 +1,227 @@
/*
* Detect hard lockups on a system
*
* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
*
* Note: Most of this code is borrowed heavily from the original softlockup
* detector, so thanks to Ingo for the initial implementation.
* Some chunks also taken from the old x86-specific nmi watchdog code, thanks
* to those contributors as well.
*/
#define pr_fmt(fmt) "NMI watchdog: " fmt
#include <linux/nmi.h>
#include <linux/module.h>
#include <asm/irq_regs.h>
#include <linux/perf_event.h>
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
/* boot commands */
/*
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
unsigned int __read_mostly hardlockup_panic =
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
static unsigned long hardlockup_allcpu_dumped;
/*
* We may not want to enable hard lockup detection by default in all cases,
* for example when running the kernel as a guest on a hypervisor. In these
* cases this function can be called to disable hard lockup detection. This
* function should only be executed once by the boot processor before the
* kernel command line parameters are parsed, because otherwise it is not
* possible to override this in hardlockup_panic_setup().
*/
void hardlockup_detector_disable(void)
{
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
}
static int __init hardlockup_panic_setup(char *str)
{
if (!strncmp(str, "panic", 5))
hardlockup_panic = 1;
else if (!strncmp(str, "nopanic", 7))
hardlockup_panic = 0;
else if (!strncmp(str, "0", 1))
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
else if (!strncmp(str, "1", 1))
watchdog_enabled |= NMI_WATCHDOG_ENABLED;
return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
void touch_nmi_watchdog(void)
{
/*
* Using __raw here because some code paths have
* preemption enabled. If preemption is enabled
* then interrupts should be enabled too, in which
* case we shouldn't have to worry about the watchdog
* going off.
*/
raw_cpu_write(watchdog_nmi_touch, true);
touch_softlockup_watchdog();
}
EXPORT_SYMBOL(touch_nmi_watchdog);
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.size = sizeof(struct perf_event_attr),
.pinned = 1,
.disabled = 1,
};
/* Callback function for perf event subsystem */
static void watchdog_overflow_callback(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
/* Ensure the watchdog never gets throttled */
event->hw.interrupts = 0;
if (__this_cpu_read(watchdog_nmi_touch) == true) {
__this_cpu_write(watchdog_nmi_touch, false);
return;
}
/* check for a hardlockup
* This is done by making sure our timer interrupt
* is incrementing. The timer interrupt should have
* fired multiple times before we overflow'd. If it hasn't
* then this is a good indication the cpu is stuck
*/
if (is_hardlockup()) {
int this_cpu = smp_processor_id();
/* only print hardlockups once */
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
/*
* Perform all-CPU dump only once to avoid multiple hardlockups
* generating interleaving traces
*/
if (sysctl_hardlockup_all_cpu_backtrace &&
!test_and_set_bit(0, &hardlockup_allcpu_dumped))
trigger_allbutself_cpu_backtrace();
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
__this_cpu_write(hard_watchdog_warn, true);
return;
}
__this_cpu_write(hard_watchdog_warn, false);
return;
}
/*
* People like the simple clean cpu node info on boot.
* Reduce the watchdog noise by only printing messages
* that are different from what cpu0 displayed.
*/
static unsigned long cpu0_err;
int watchdog_nmi_enable(unsigned int cpu)
{
struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu);
/* nothing to do if the hard lockup detector is disabled */
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
goto out;
/* is it already setup and enabled? */
if (event && event->state > PERF_EVENT_STATE_OFF)
goto out;
/* it is setup but not enabled */
if (event != NULL)
goto out_enable;
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
/* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
/* save cpu0 error for future comparision */
if (cpu == 0 && IS_ERR(event))
cpu0_err = PTR_ERR(event);
if (!IS_ERR(event)) {
/* only print for cpu0 or different than cpu0 */
if (cpu == 0 || cpu0_err)
pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
goto out_save;
}
/*
* Disable the hard lockup detector if _any_ CPU fails to set up
* set up the hardware perf event. The watchdog() function checks
* the NMI_WATCHDOG_ENABLED bit periodically.
*
* The barriers are for syncing up watchdog_enabled across all the
* cpus, as clear_bit() does not use barriers.
*/
smp_mb__before_atomic();
clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
smp_mb__after_atomic();
/* skip displaying the same error again */
if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
return PTR_ERR(event);
/* vary the KERN level based on the returned errno */
if (PTR_ERR(event) == -EOPNOTSUPP)
pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
else if (PTR_ERR(event) == -ENOENT)
pr_warn("disabled (cpu%i): hardware events not enabled\n",
cpu);
else
pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
cpu, PTR_ERR(event));
pr_info("Shutting down hard lockup detector on all cpus\n");
return PTR_ERR(event);
/* success path */
out_save:
per_cpu(watchdog_ev, cpu) = event;
out_enable:
perf_event_enable(per_cpu(watchdog_ev, cpu));
out:
return 0;
}
void watchdog_nmi_disable(unsigned int cpu)
{
struct perf_event *event = per_cpu(watchdog_ev, cpu);
if (event) {
perf_event_disable(event);
per_cpu(watchdog_ev, cpu) = NULL;
/* should be in cleanup, but blocks oprofile */
perf_event_release_kernel(event);
}
if (cpu == 0) {
/* watchdog_nmi_enable() expects this to be zero initially. */
cpu0_err = 0;
}
}