Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
This commit is contained in:
@@ -66,7 +66,7 @@ static struct fsnotify_group *audit_watch_group;
|
||||
|
||||
/* fsnotify events we care about. */
|
||||
#define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\
|
||||
FS_MOVE_SELF | FS_EVENT_ON_CHILD)
|
||||
FS_MOVE_SELF | FS_EVENT_ON_CHILD | FS_UNMOUNT)
|
||||
|
||||
static void audit_free_parent(struct audit_parent *parent)
|
||||
{
|
||||
@@ -457,13 +457,15 @@ void audit_remove_watch_rule(struct audit_krule *krule)
|
||||
list_del(&krule->rlist);
|
||||
|
||||
if (list_empty(&watch->rules)) {
|
||||
/*
|
||||
* audit_remove_watch() drops our reference to 'parent' which
|
||||
* can get freed. Grab our own reference to be safe.
|
||||
*/
|
||||
audit_get_parent(parent);
|
||||
audit_remove_watch(watch);
|
||||
|
||||
if (list_empty(&parent->watches)) {
|
||||
audit_get_parent(parent);
|
||||
if (list_empty(&parent->watches))
|
||||
fsnotify_destroy_mark(&parent->mark, audit_watch_group);
|
||||
audit_put_parent(parent);
|
||||
}
|
||||
audit_put_parent(parent);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -2217,6 +2217,33 @@ static int group_can_go_on(struct perf_event *event,
|
||||
return can_add_hw;
|
||||
}
|
||||
|
||||
/*
|
||||
* Complement to update_event_times(). This computes the tstamp_* values to
|
||||
* continue 'enabled' state from @now, and effectively discards the time
|
||||
* between the prior tstamp_stopped and now (as we were in the OFF state, or
|
||||
* just switched (context) time base).
|
||||
*
|
||||
* This further assumes '@event->state == INACTIVE' (we just came from OFF) and
|
||||
* cannot have been scheduled in yet. And going into INACTIVE state means
|
||||
* '@event->tstamp_stopped = @now'.
|
||||
*
|
||||
* Thus given the rules of update_event_times():
|
||||
*
|
||||
* total_time_enabled = tstamp_stopped - tstamp_enabled
|
||||
* total_time_running = tstamp_stopped - tstamp_running
|
||||
*
|
||||
* We can insert 'tstamp_stopped == now' and reverse them to compute new
|
||||
* tstamp_* values.
|
||||
*/
|
||||
static void __perf_event_enable_time(struct perf_event *event, u64 now)
|
||||
{
|
||||
WARN_ON_ONCE(event->state != PERF_EVENT_STATE_INACTIVE);
|
||||
|
||||
event->tstamp_stopped = now;
|
||||
event->tstamp_enabled = now - event->total_time_enabled;
|
||||
event->tstamp_running = now - event->total_time_running;
|
||||
}
|
||||
|
||||
static void add_event_to_ctx(struct perf_event *event,
|
||||
struct perf_event_context *ctx)
|
||||
{
|
||||
@@ -2224,9 +2251,12 @@ static void add_event_to_ctx(struct perf_event *event,
|
||||
|
||||
list_add_event(event, ctx);
|
||||
perf_group_attach(event);
|
||||
event->tstamp_enabled = tstamp;
|
||||
event->tstamp_running = tstamp;
|
||||
event->tstamp_stopped = tstamp;
|
||||
/*
|
||||
* We can be called with event->state == STATE_OFF when we create with
|
||||
* .disabled = 1. In that case the IOC_ENABLE will call this function.
|
||||
*/
|
||||
if (event->state == PERF_EVENT_STATE_INACTIVE)
|
||||
__perf_event_enable_time(event, tstamp);
|
||||
}
|
||||
|
||||
static void ctx_sched_out(struct perf_event_context *ctx,
|
||||
@@ -2471,10 +2501,11 @@ static void __perf_event_mark_enabled(struct perf_event *event)
|
||||
u64 tstamp = perf_event_time(event);
|
||||
|
||||
event->state = PERF_EVENT_STATE_INACTIVE;
|
||||
event->tstamp_enabled = tstamp - event->total_time_enabled;
|
||||
__perf_event_enable_time(event, tstamp);
|
||||
list_for_each_entry(sub, &event->sibling_list, group_entry) {
|
||||
/* XXX should not be > INACTIVE if event isn't */
|
||||
if (sub->state >= PERF_EVENT_STATE_INACTIVE)
|
||||
sub->tstamp_enabled = tstamp - sub->total_time_enabled;
|
||||
__perf_event_enable_time(sub, tstamp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5090,7 +5121,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
|
||||
atomic_inc(&event->rb->aux_mmap_count);
|
||||
|
||||
if (event->pmu->event_mapped)
|
||||
event->pmu->event_mapped(event);
|
||||
event->pmu->event_mapped(event, vma->vm_mm);
|
||||
}
|
||||
|
||||
static void perf_pmu_output_stop(struct perf_event *event);
|
||||
@@ -5113,7 +5144,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
|
||||
unsigned long size = perf_data_size(rb);
|
||||
|
||||
if (event->pmu->event_unmapped)
|
||||
event->pmu->event_unmapped(event);
|
||||
event->pmu->event_unmapped(event, vma->vm_mm);
|
||||
|
||||
/*
|
||||
* rb->aux_mmap_count will always drop before rb->mmap_count and
|
||||
@@ -5411,7 +5442,7 @@ aux_unlock:
|
||||
vma->vm_ops = &perf_mmap_vmops;
|
||||
|
||||
if (event->pmu->event_mapped)
|
||||
event->pmu->event_mapped(event);
|
||||
event->pmu->event_mapped(event, vma->vm_mm);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@@ -1000,7 +1000,7 @@ EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name);
|
||||
|
||||
void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long flags, trigger, tmp;
|
||||
struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
|
||||
|
||||
if (!desc)
|
||||
@@ -1014,6 +1014,8 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
|
||||
|
||||
irq_settings_clr_and_set(desc, clr, set);
|
||||
|
||||
trigger = irqd_get_trigger_type(&desc->irq_data);
|
||||
|
||||
irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
|
||||
IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT);
|
||||
if (irq_settings_has_no_balance_set(desc))
|
||||
@@ -1025,7 +1027,11 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
|
||||
if (irq_settings_is_level(desc))
|
||||
irqd_set(&desc->irq_data, IRQD_LEVEL);
|
||||
|
||||
irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc));
|
||||
tmp = irq_settings_get_trigger_mask(desc);
|
||||
if (tmp != IRQ_TYPE_NONE)
|
||||
trigger = tmp;
|
||||
|
||||
irqd_set(&desc->irq_data, trigger);
|
||||
|
||||
irq_put_desc_unlock(desc, flags);
|
||||
}
|
||||
|
@@ -165,7 +165,7 @@ irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu)
|
||||
struct irq_data *data = irq_get_irq_data(irq);
|
||||
struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
|
||||
|
||||
if (!data || !ipimask || cpu > nr_cpu_ids)
|
||||
if (!data || !ipimask || cpu >= nr_cpu_ids)
|
||||
return INVALID_HWIRQ;
|
||||
|
||||
if (!cpumask_test_cpu(cpu, ipimask))
|
||||
@@ -195,7 +195,7 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
|
||||
if (!chip->ipi_send_single && !chip->ipi_send_mask)
|
||||
return -EINVAL;
|
||||
|
||||
if (cpu > nr_cpu_ids)
|
||||
if (cpu >= nr_cpu_ids)
|
||||
return -EINVAL;
|
||||
|
||||
if (dest) {
|
||||
|
@@ -70,6 +70,18 @@ static DECLARE_RWSEM(umhelper_sem);
|
||||
static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
|
||||
static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
|
||||
|
||||
/*
|
||||
* This is a restriction on having *all* MAX_KMOD_CONCURRENT threads
|
||||
* running at the same time without returning. When this happens we
|
||||
* believe you've somehow ended up with a recursive module dependency
|
||||
* creating a loop.
|
||||
*
|
||||
* We have no option but to fail.
|
||||
*
|
||||
* Userspace should proactively try to detect and prevent these.
|
||||
*/
|
||||
#define MAX_KMOD_ALL_BUSY_TIMEOUT 5
|
||||
|
||||
/*
|
||||
modprobe_path is set via /proc/sys.
|
||||
*/
|
||||
@@ -167,8 +179,17 @@ int __request_module(bool wait, const char *fmt, ...)
|
||||
pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...",
|
||||
atomic_read(&kmod_concurrent_max),
|
||||
MAX_KMOD_CONCURRENT, module_name);
|
||||
wait_event_interruptible(kmod_wq,
|
||||
atomic_dec_if_positive(&kmod_concurrent_max) >= 0);
|
||||
ret = wait_event_killable_timeout(kmod_wq,
|
||||
atomic_dec_if_positive(&kmod_concurrent_max) >= 0,
|
||||
MAX_KMOD_ALL_BUSY_TIMEOUT * HZ);
|
||||
if (!ret) {
|
||||
pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now",
|
||||
module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT);
|
||||
return -ETIME;
|
||||
} else if (ret == -ERESTARTSYS) {
|
||||
pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
trace_module_request(module_name, wait, _RET_IP_);
|
||||
|
11
kernel/pid.c
11
kernel/pid.c
@@ -527,8 +527,11 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
|
||||
if (!ns)
|
||||
ns = task_active_pid_ns(current);
|
||||
if (likely(pid_alive(task))) {
|
||||
if (type != PIDTYPE_PID)
|
||||
if (type != PIDTYPE_PID) {
|
||||
if (type == __PIDTYPE_TGID)
|
||||
type = PIDTYPE_PID;
|
||||
task = task->group_leader;
|
||||
}
|
||||
nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@@ -537,12 +540,6 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
|
||||
}
|
||||
EXPORT_SYMBOL(__task_pid_nr_ns);
|
||||
|
||||
pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
|
||||
{
|
||||
return pid_nr_ns(task_tgid(tsk), ns);
|
||||
}
|
||||
EXPORT_SYMBOL(task_tgid_nr_ns);
|
||||
|
||||
struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
|
||||
{
|
||||
return ns_of_pid(task_pid(tsk));
|
||||
|
@@ -1194,7 +1194,11 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
||||
recalc_sigpending_and_wake(t);
|
||||
}
|
||||
}
|
||||
if (action->sa.sa_handler == SIG_DFL)
|
||||
/*
|
||||
* Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect
|
||||
* debugging to leave init killable.
|
||||
*/
|
||||
if (action->sa.sa_handler == SIG_DFL && !t->ptrace)
|
||||
t->signal->flags &= ~SIGNAL_UNKILLABLE;
|
||||
ret = specific_send_sig_info(sig, info, t);
|
||||
spin_unlock_irqrestore(&t->sighand->siglock, flags);
|
||||
|
@@ -240,6 +240,7 @@ static void set_sample_period(void)
|
||||
* hardlockup detector generates a warning
|
||||
*/
|
||||
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
|
||||
watchdog_update_hrtimer_threshold(sample_period);
|
||||
}
|
||||
|
||||
/* Commands for resetting the watchdog */
|
||||
|
@@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void)
|
||||
}
|
||||
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
|
||||
|
||||
#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
|
||||
static DEFINE_PER_CPU(ktime_t, last_timestamp);
|
||||
static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
|
||||
static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
|
||||
|
||||
void watchdog_update_hrtimer_threshold(u64 period)
|
||||
{
|
||||
/*
|
||||
* The hrtimer runs with a period of (watchdog_threshold * 2) / 5
|
||||
*
|
||||
* So it runs effectively with 2.5 times the rate of the NMI
|
||||
* watchdog. That means the hrtimer should fire 2-3 times before
|
||||
* the NMI watchdog expires. The NMI watchdog on x86 is based on
|
||||
* unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
|
||||
* might run way faster than expected and the NMI fires in a
|
||||
* smaller period than the one deduced from the nominal CPU
|
||||
* frequency. Depending on the Turbo-Mode factor this might be fast
|
||||
* enough to get the NMI period smaller than the hrtimer watchdog
|
||||
* period and trigger false positives.
|
||||
*
|
||||
* The sample threshold is used to check in the NMI handler whether
|
||||
* the minimum time between two NMI samples has elapsed. That
|
||||
* prevents false positives.
|
||||
*
|
||||
* Set this to 4/5 of the actual watchdog threshold period so the
|
||||
* hrtimer is guaranteed to fire at least once within the real
|
||||
* watchdog threshold.
|
||||
*/
|
||||
watchdog_hrtimer_sample_threshold = period * 2;
|
||||
}
|
||||
|
||||
static bool watchdog_check_timestamp(void)
|
||||
{
|
||||
ktime_t delta, now = ktime_get_mono_fast_ns();
|
||||
|
||||
delta = now - __this_cpu_read(last_timestamp);
|
||||
if (delta < watchdog_hrtimer_sample_threshold) {
|
||||
/*
|
||||
* If ktime is jiffies based, a stalled timer would prevent
|
||||
* jiffies from being incremented and the filter would look
|
||||
* at a stale timestamp and never trigger.
|
||||
*/
|
||||
if (__this_cpu_inc_return(nmi_rearmed) < 10)
|
||||
return false;
|
||||
}
|
||||
__this_cpu_write(nmi_rearmed, 0);
|
||||
__this_cpu_write(last_timestamp, now);
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
static inline bool watchdog_check_timestamp(void)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct perf_event_attr wd_hw_attr = {
|
||||
.type = PERF_TYPE_HARDWARE,
|
||||
.config = PERF_COUNT_HW_CPU_CYCLES,
|
||||
@@ -61,6 +117,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
|
||||
return;
|
||||
}
|
||||
|
||||
if (!watchdog_check_timestamp())
|
||||
return;
|
||||
|
||||
/* check for a hardlockup
|
||||
* This is done by making sure our timer interrupt
|
||||
* is incrementing. The timer interrupt should have
|
||||
|
Reference in New Issue
Block a user