Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

This commit is contained in:
David S. Miller
2017-08-21 17:06:42 -07:00
199 changed files with 1218 additions and 655 deletions

View File

@@ -66,7 +66,7 @@ static struct fsnotify_group *audit_watch_group;
/* fsnotify events we care about. */
#define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\
FS_MOVE_SELF | FS_EVENT_ON_CHILD)
FS_MOVE_SELF | FS_EVENT_ON_CHILD | FS_UNMOUNT)
static void audit_free_parent(struct audit_parent *parent)
{
@@ -457,13 +457,15 @@ void audit_remove_watch_rule(struct audit_krule *krule)
list_del(&krule->rlist);
if (list_empty(&watch->rules)) {
/*
* audit_remove_watch() drops our reference to 'parent' which
* can get freed. Grab our own reference to be safe.
*/
audit_get_parent(parent);
audit_remove_watch(watch);
if (list_empty(&parent->watches)) {
audit_get_parent(parent);
if (list_empty(&parent->watches))
fsnotify_destroy_mark(&parent->mark, audit_watch_group);
audit_put_parent(parent);
}
audit_put_parent(parent);
}
}

View File

@@ -2217,6 +2217,33 @@ static int group_can_go_on(struct perf_event *event,
return can_add_hw;
}
/*
* Complement to update_event_times(). This computes the tstamp_* values to
* continue 'enabled' state from @now, and effectively discards the time
* between the prior tstamp_stopped and now (as we were in the OFF state, or
* just switched (context) time base).
*
* This further assumes '@event->state == INACTIVE' (we just came from OFF) and
* cannot have been scheduled in yet. And going into INACTIVE state means
* '@event->tstamp_stopped = @now'.
*
* Thus given the rules of update_event_times():
*
* total_time_enabled = tstamp_stopped - tstamp_enabled
* total_time_running = tstamp_stopped - tstamp_running
*
* We can insert 'tstamp_stopped == now' and reverse them to compute new
* tstamp_* values.
*/
static void __perf_event_enable_time(struct perf_event *event, u64 now)
{
WARN_ON_ONCE(event->state != PERF_EVENT_STATE_INACTIVE);
event->tstamp_stopped = now;
event->tstamp_enabled = now - event->total_time_enabled;
event->tstamp_running = now - event->total_time_running;
}
static void add_event_to_ctx(struct perf_event *event,
struct perf_event_context *ctx)
{
@@ -2224,9 +2251,12 @@ static void add_event_to_ctx(struct perf_event *event,
list_add_event(event, ctx);
perf_group_attach(event);
event->tstamp_enabled = tstamp;
event->tstamp_running = tstamp;
event->tstamp_stopped = tstamp;
/*
* We can be called with event->state == STATE_OFF when we create with
* .disabled = 1. In that case the IOC_ENABLE will call this function.
*/
if (event->state == PERF_EVENT_STATE_INACTIVE)
__perf_event_enable_time(event, tstamp);
}
static void ctx_sched_out(struct perf_event_context *ctx,
@@ -2471,10 +2501,11 @@ static void __perf_event_mark_enabled(struct perf_event *event)
u64 tstamp = perf_event_time(event);
event->state = PERF_EVENT_STATE_INACTIVE;
event->tstamp_enabled = tstamp - event->total_time_enabled;
__perf_event_enable_time(event, tstamp);
list_for_each_entry(sub, &event->sibling_list, group_entry) {
/* XXX should not be > INACTIVE if event isn't */
if (sub->state >= PERF_EVENT_STATE_INACTIVE)
sub->tstamp_enabled = tstamp - sub->total_time_enabled;
__perf_event_enable_time(sub, tstamp);
}
}
@@ -5090,7 +5121,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
atomic_inc(&event->rb->aux_mmap_count);
if (event->pmu->event_mapped)
event->pmu->event_mapped(event);
event->pmu->event_mapped(event, vma->vm_mm);
}
static void perf_pmu_output_stop(struct perf_event *event);
@@ -5113,7 +5144,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
unsigned long size = perf_data_size(rb);
if (event->pmu->event_unmapped)
event->pmu->event_unmapped(event);
event->pmu->event_unmapped(event, vma->vm_mm);
/*
* rb->aux_mmap_count will always drop before rb->mmap_count and
@@ -5411,7 +5442,7 @@ aux_unlock:
vma->vm_ops = &perf_mmap_vmops;
if (event->pmu->event_mapped)
event->pmu->event_mapped(event);
event->pmu->event_mapped(event, vma->vm_mm);
return ret;
}

View File

@@ -1000,7 +1000,7 @@ EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name);
void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
{
unsigned long flags;
unsigned long flags, trigger, tmp;
struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
if (!desc)
@@ -1014,6 +1014,8 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
irq_settings_clr_and_set(desc, clr, set);
trigger = irqd_get_trigger_type(&desc->irq_data);
irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT);
if (irq_settings_has_no_balance_set(desc))
@@ -1025,7 +1027,11 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
if (irq_settings_is_level(desc))
irqd_set(&desc->irq_data, IRQD_LEVEL);
irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc));
tmp = irq_settings_get_trigger_mask(desc);
if (tmp != IRQ_TYPE_NONE)
trigger = tmp;
irqd_set(&desc->irq_data, trigger);
irq_put_desc_unlock(desc, flags);
}

View File

@@ -165,7 +165,7 @@ irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu)
struct irq_data *data = irq_get_irq_data(irq);
struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
if (!data || !ipimask || cpu > nr_cpu_ids)
if (!data || !ipimask || cpu >= nr_cpu_ids)
return INVALID_HWIRQ;
if (!cpumask_test_cpu(cpu, ipimask))
@@ -195,7 +195,7 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
if (!chip->ipi_send_single && !chip->ipi_send_mask)
return -EINVAL;
if (cpu > nr_cpu_ids)
if (cpu >= nr_cpu_ids)
return -EINVAL;
if (dest) {

View File

@@ -70,6 +70,18 @@ static DECLARE_RWSEM(umhelper_sem);
static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
/*
* This is a restriction on having *all* MAX_KMOD_CONCURRENT threads
* running at the same time without returning. When this happens we
* believe you've somehow ended up with a recursive module dependency
* creating a loop.
*
* We have no option but to fail.
*
* Userspace should proactively try to detect and prevent these.
*/
#define MAX_KMOD_ALL_BUSY_TIMEOUT 5
/*
modprobe_path is set via /proc/sys.
*/
@@ -167,8 +179,17 @@ int __request_module(bool wait, const char *fmt, ...)
pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...",
atomic_read(&kmod_concurrent_max),
MAX_KMOD_CONCURRENT, module_name);
wait_event_interruptible(kmod_wq,
atomic_dec_if_positive(&kmod_concurrent_max) >= 0);
ret = wait_event_killable_timeout(kmod_wq,
atomic_dec_if_positive(&kmod_concurrent_max) >= 0,
MAX_KMOD_ALL_BUSY_TIMEOUT * HZ);
if (!ret) {
pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now",
module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT);
return -ETIME;
} else if (ret == -ERESTARTSYS) {
pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name);
return ret;
}
}
trace_module_request(module_name, wait, _RET_IP_);

View File

@@ -527,8 +527,11 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
if (!ns)
ns = task_active_pid_ns(current);
if (likely(pid_alive(task))) {
if (type != PIDTYPE_PID)
if (type != PIDTYPE_PID) {
if (type == __PIDTYPE_TGID)
type = PIDTYPE_PID;
task = task->group_leader;
}
nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
}
rcu_read_unlock();
@@ -537,12 +540,6 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
}
EXPORT_SYMBOL(__task_pid_nr_ns);
pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
{
return pid_nr_ns(task_tgid(tsk), ns);
}
EXPORT_SYMBOL(task_tgid_nr_ns);
struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
{
return ns_of_pid(task_pid(tsk));

View File

@@ -1194,7 +1194,11 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
recalc_sigpending_and_wake(t);
}
}
if (action->sa.sa_handler == SIG_DFL)
/*
* Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect
* debugging to leave init killable.
*/
if (action->sa.sa_handler == SIG_DFL && !t->ptrace)
t->signal->flags &= ~SIGNAL_UNKILLABLE;
ret = specific_send_sig_info(sig, info, t);
spin_unlock_irqrestore(&t->sighand->siglock, flags);

View File

@@ -240,6 +240,7 @@ static void set_sample_period(void)
* hardlockup detector generates a warning
*/
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
watchdog_update_hrtimer_threshold(sample_period);
}
/* Commands for resetting the watchdog */

View File

@@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void)
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
static DEFINE_PER_CPU(ktime_t, last_timestamp);
static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
void watchdog_update_hrtimer_threshold(u64 period)
{
/*
* The hrtimer runs with a period of (watchdog_threshold * 2) / 5
*
* So it runs effectively with 2.5 times the rate of the NMI
* watchdog. That means the hrtimer should fire 2-3 times before
* the NMI watchdog expires. The NMI watchdog on x86 is based on
* unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
* might run way faster than expected and the NMI fires in a
* smaller period than the one deduced from the nominal CPU
* frequency. Depending on the Turbo-Mode factor this might be fast
* enough to get the NMI period smaller than the hrtimer watchdog
* period and trigger false positives.
*
* The sample threshold is used to check in the NMI handler whether
* the minimum time between two NMI samples has elapsed. That
* prevents false positives.
*
* Set this to 4/5 of the actual watchdog threshold period so the
* hrtimer is guaranteed to fire at least once within the real
* watchdog threshold.
*/
watchdog_hrtimer_sample_threshold = period * 2;
}
static bool watchdog_check_timestamp(void)
{
ktime_t delta, now = ktime_get_mono_fast_ns();
delta = now - __this_cpu_read(last_timestamp);
if (delta < watchdog_hrtimer_sample_threshold) {
/*
* If ktime is jiffies based, a stalled timer would prevent
* jiffies from being incremented and the filter would look
* at a stale timestamp and never trigger.
*/
if (__this_cpu_inc_return(nmi_rearmed) < 10)
return false;
}
__this_cpu_write(nmi_rearmed, 0);
__this_cpu_write(last_timestamp, now);
return true;
}
#else
static inline bool watchdog_check_timestamp(void)
{
return true;
}
#endif
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
@@ -61,6 +117,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
return;
}
if (!watchdog_check_timestamp())
return;
/* check for a hardlockup
* This is done by making sure our timer interrupt
* is incrementing. The timer interrupt should have