Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Conflicts: net/netfilter/nfnetlink_log.c net/netfilter/xt_LOG.c Rather easy conflict resolution, the 'net' tree had bug fixes to make sure we checked if a socket is a time-wait one or not and elide the logging code if so. Whereas on the 'net-next' side we are calculating the UID and GID from the creds using different interfaces due to the user namespace changes from Eric Biederman. Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -1253,7 +1253,7 @@ retry:
|
||||
/*
|
||||
* Cross CPU call to disable a performance event
|
||||
*/
|
||||
static int __perf_event_disable(void *info)
|
||||
int __perf_event_disable(void *info)
|
||||
{
|
||||
struct perf_event *event = info;
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
@@ -2935,12 +2935,12 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
|
||||
/*
|
||||
* Called when the last reference to the file is gone.
|
||||
*/
|
||||
static int perf_release(struct inode *inode, struct file *file)
|
||||
static void put_event(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *event = file->private_data;
|
||||
struct task_struct *owner;
|
||||
|
||||
file->private_data = NULL;
|
||||
if (!atomic_long_dec_and_test(&event->refcount))
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
owner = ACCESS_ONCE(event->owner);
|
||||
@@ -2975,7 +2975,13 @@ static int perf_release(struct inode *inode, struct file *file)
|
||||
put_task_struct(owner);
|
||||
}
|
||||
|
||||
return perf_event_release_kernel(event);
|
||||
perf_event_release_kernel(event);
|
||||
}
|
||||
|
||||
static int perf_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
put_event(file->private_data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
|
||||
@@ -3227,7 +3233,7 @@ unlock:
|
||||
|
||||
static const struct file_operations perf_fops;
|
||||
|
||||
static struct perf_event *perf_fget_light(int fd, int *fput_needed)
|
||||
static struct file *perf_fget_light(int fd, int *fput_needed)
|
||||
{
|
||||
struct file *file;
|
||||
|
||||
@@ -3241,7 +3247,7 @@ static struct perf_event *perf_fget_light(int fd, int *fput_needed)
|
||||
return ERR_PTR(-EBADF);
|
||||
}
|
||||
|
||||
return file->private_data;
|
||||
return file;
|
||||
}
|
||||
|
||||
static int perf_event_set_output(struct perf_event *event,
|
||||
@@ -3273,19 +3279,21 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
|
||||
case PERF_EVENT_IOC_SET_OUTPUT:
|
||||
{
|
||||
struct file *output_file = NULL;
|
||||
struct perf_event *output_event = NULL;
|
||||
int fput_needed = 0;
|
||||
int ret;
|
||||
|
||||
if (arg != -1) {
|
||||
output_event = perf_fget_light(arg, &fput_needed);
|
||||
if (IS_ERR(output_event))
|
||||
return PTR_ERR(output_event);
|
||||
output_file = perf_fget_light(arg, &fput_needed);
|
||||
if (IS_ERR(output_file))
|
||||
return PTR_ERR(output_file);
|
||||
output_event = output_file->private_data;
|
||||
}
|
||||
|
||||
ret = perf_event_set_output(event, output_event);
|
||||
if (output_event)
|
||||
fput_light(output_event->filp, fput_needed);
|
||||
fput_light(output_file, fput_needed);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -5950,6 +5958,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
||||
|
||||
mutex_init(&event->mmap_mutex);
|
||||
|
||||
atomic_long_set(&event->refcount, 1);
|
||||
event->cpu = cpu;
|
||||
event->attr = *attr;
|
||||
event->group_leader = group_leader;
|
||||
@@ -6260,12 +6269,12 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
return event_fd;
|
||||
|
||||
if (group_fd != -1) {
|
||||
group_leader = perf_fget_light(group_fd, &fput_needed);
|
||||
if (IS_ERR(group_leader)) {
|
||||
err = PTR_ERR(group_leader);
|
||||
group_file = perf_fget_light(group_fd, &fput_needed);
|
||||
if (IS_ERR(group_file)) {
|
||||
err = PTR_ERR(group_file);
|
||||
goto err_fd;
|
||||
}
|
||||
group_file = group_leader->filp;
|
||||
group_leader = group_file->private_data;
|
||||
if (flags & PERF_FLAG_FD_OUTPUT)
|
||||
output_event = group_leader;
|
||||
if (flags & PERF_FLAG_FD_NO_GROUP)
|
||||
@@ -6402,7 +6411,6 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
put_ctx(gctx);
|
||||
}
|
||||
|
||||
event->filp = event_file;
|
||||
WARN_ON_ONCE(ctx->parent_ctx);
|
||||
mutex_lock(&ctx->mutex);
|
||||
|
||||
@@ -6496,7 +6504,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
event->filp = NULL;
|
||||
WARN_ON_ONCE(ctx->parent_ctx);
|
||||
mutex_lock(&ctx->mutex);
|
||||
perf_install_in_context(ctx, event, cpu);
|
||||
@@ -6578,7 +6585,7 @@ static void sync_child_event(struct perf_event *child_event,
|
||||
* Release the parent event, if this was the last
|
||||
* reference to it.
|
||||
*/
|
||||
fput(parent_event->filp);
|
||||
put_event(parent_event);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -6654,9 +6661,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
|
||||
*
|
||||
* __perf_event_exit_task()
|
||||
* sync_child_event()
|
||||
* fput(parent_event->filp)
|
||||
* perf_release()
|
||||
* mutex_lock(&ctx->mutex)
|
||||
* put_event()
|
||||
* mutex_lock(&ctx->mutex)
|
||||
*
|
||||
* But since its the parent context it won't be the same instance.
|
||||
*/
|
||||
@@ -6724,7 +6730,7 @@ static void perf_free_event(struct perf_event *event,
|
||||
list_del_init(&event->child_list);
|
||||
mutex_unlock(&parent->child_mutex);
|
||||
|
||||
fput(parent->filp);
|
||||
put_event(parent);
|
||||
|
||||
perf_group_detach(event);
|
||||
list_del_event(event, ctx);
|
||||
@@ -6804,6 +6810,12 @@ inherit_event(struct perf_event *parent_event,
|
||||
NULL, NULL);
|
||||
if (IS_ERR(child_event))
|
||||
return child_event;
|
||||
|
||||
if (!atomic_long_inc_not_zero(&parent_event->refcount)) {
|
||||
free_event(child_event);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
get_ctx(child_ctx);
|
||||
|
||||
/*
|
||||
@@ -6844,14 +6856,6 @@ inherit_event(struct perf_event *parent_event,
|
||||
add_event_to_ctx(child_event, child_ctx);
|
||||
raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
|
||||
|
||||
/*
|
||||
* Get a reference to the parent filp - we will fput it
|
||||
* when the child event exits. This is safe to do because
|
||||
* we are in the parent and we know that the filp still
|
||||
* exists and has a nonzero count:
|
||||
*/
|
||||
atomic_long_inc(&parent_event->filp->f_count);
|
||||
|
||||
/*
|
||||
* Link this into the parent event's child list
|
||||
*/
|
||||
|
@@ -453,7 +453,16 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
|
||||
int old_type = bp->attr.bp_type;
|
||||
int err = 0;
|
||||
|
||||
perf_event_disable(bp);
|
||||
/*
|
||||
* modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
|
||||
* will not be possible to raise IPIs that invoke __perf_event_disable.
|
||||
* So call the function directly after making sure we are targeting the
|
||||
* current task.
|
||||
*/
|
||||
if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
|
||||
__perf_event_disable(bp);
|
||||
else
|
||||
perf_event_disable(bp);
|
||||
|
||||
bp->attr.bp_addr = attr->bp_addr;
|
||||
bp->attr.bp_type = attr->bp_type;
|
||||
|
@@ -455,8 +455,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
|
||||
if (retval)
|
||||
goto out;
|
||||
|
||||
if (file && uprobe_mmap(tmp))
|
||||
goto out;
|
||||
if (file)
|
||||
uprobe_mmap(tmp);
|
||||
}
|
||||
/* a new mm has just been created */
|
||||
arch_dup_mmap(oldmm, mm);
|
||||
|
@@ -5304,27 +5304,17 @@ void idle_task_exit(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* While a dead CPU has no uninterruptible tasks queued at this point,
|
||||
* it might still have a nonzero ->nr_uninterruptible counter, because
|
||||
* for performance reasons the counter is not stricly tracking tasks to
|
||||
* their home CPUs. So we just add the counter to another CPU's counter,
|
||||
* to keep the global sum constant after CPU-down:
|
||||
* Since this CPU is going 'away' for a while, fold any nr_active delta
|
||||
* we might have. Assumes we're called after migrate_tasks() so that the
|
||||
* nr_active count is stable.
|
||||
*
|
||||
* Also see the comment "Global load-average calculations".
|
||||
*/
|
||||
static void migrate_nr_uninterruptible(struct rq *rq_src)
|
||||
static void calc_load_migrate(struct rq *rq)
|
||||
{
|
||||
struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
|
||||
|
||||
rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
|
||||
rq_src->nr_uninterruptible = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* remove the tasks which were accounted by rq from calc_load_tasks.
|
||||
*/
|
||||
static void calc_global_load_remove(struct rq *rq)
|
||||
{
|
||||
atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
|
||||
rq->calc_load_active = 0;
|
||||
long delta = calc_load_fold_active(rq);
|
||||
if (delta)
|
||||
atomic_long_add(delta, &calc_load_tasks);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -5352,9 +5342,6 @@ static void migrate_tasks(unsigned int dead_cpu)
|
||||
*/
|
||||
rq->stop = NULL;
|
||||
|
||||
/* Ensure any throttled groups are reachable by pick_next_task */
|
||||
unthrottle_offline_cfs_rqs(rq);
|
||||
|
||||
for ( ; ; ) {
|
||||
/*
|
||||
* There's this thread running, bail when that's the only
|
||||
@@ -5618,8 +5605,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
BUG_ON(rq->nr_running != 1); /* the migration thread */
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
|
||||
migrate_nr_uninterruptible(rq);
|
||||
calc_global_load_remove(rq);
|
||||
calc_load_migrate(rq);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
@@ -2052,7 +2052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
|
||||
hrtimer_cancel(&cfs_b->slack_timer);
|
||||
}
|
||||
|
||||
void unthrottle_offline_cfs_rqs(struct rq *rq)
|
||||
static void unthrottle_offline_cfs_rqs(struct rq *rq)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
|
||||
@@ -2106,7 +2106,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
|
||||
return NULL;
|
||||
}
|
||||
static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
|
||||
void unthrottle_offline_cfs_rqs(struct rq *rq) {}
|
||||
static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
|
||||
|
||||
#endif /* CONFIG_CFS_BANDWIDTH */
|
||||
|
||||
@@ -3658,7 +3658,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
|
||||
* @group: sched_group whose statistics are to be updated.
|
||||
* @load_idx: Load index of sched_domain of this_cpu for load calc.
|
||||
* @local_group: Does group contain this_cpu.
|
||||
* @cpus: Set of cpus considered for load balancing.
|
||||
* @balance: Should we balance.
|
||||
* @sgs: variable to hold the statistics for this group.
|
||||
*/
|
||||
@@ -3805,7 +3804,6 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
||||
/**
|
||||
* update_sd_lb_stats - Update sched_domain's statistics for load balancing.
|
||||
* @env: The load balancing environment.
|
||||
* @cpus: Set of cpus considered for load balancing.
|
||||
* @balance: Should we balance.
|
||||
* @sds: variable to hold the statistics for this sched_domain.
|
||||
*/
|
||||
@@ -4956,6 +4954,9 @@ static void rq_online_fair(struct rq *rq)
|
||||
static void rq_offline_fair(struct rq *rq)
|
||||
{
|
||||
update_sysctl();
|
||||
|
||||
/* Ensure any throttled groups are reachable by pick_next_task */
|
||||
unthrottle_offline_cfs_rqs(rq);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
@@ -691,6 +691,7 @@ balanced:
|
||||
* runtime - in which case borrowing doesn't make sense.
|
||||
*/
|
||||
rt_rq->rt_runtime = RUNTIME_INF;
|
||||
rt_rq->rt_throttled = 0;
|
||||
raw_spin_unlock(&rt_rq->rt_runtime_lock);
|
||||
raw_spin_unlock(&rt_b->rt_runtime_lock);
|
||||
}
|
||||
|
@@ -1144,7 +1144,6 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
|
||||
|
||||
extern void init_cfs_rq(struct cfs_rq *cfs_rq);
|
||||
extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
|
||||
extern void unthrottle_offline_cfs_rqs(struct rq *rq);
|
||||
|
||||
extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
|
||||
|
||||
|
@@ -573,6 +573,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
|
||||
tick_do_update_jiffies64(now);
|
||||
update_cpu_load_nohz();
|
||||
|
||||
calc_load_exit_idle();
|
||||
touch_softlockup_watchdog();
|
||||
/*
|
||||
* Cancel the scheduled timer and restore the tick
|
||||
|
@@ -115,6 +115,7 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts)
|
||||
{
|
||||
tk->xtime_sec += ts->tv_sec;
|
||||
tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift;
|
||||
tk_normalize_xtime(tk);
|
||||
}
|
||||
|
||||
static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
|
||||
@@ -276,7 +277,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
|
||||
tk->xtime_nsec += cycle_delta * tk->mult;
|
||||
|
||||
/* If arch requires, add in gettimeoffset() */
|
||||
tk->xtime_nsec += arch_gettimeoffset() << tk->shift;
|
||||
tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift;
|
||||
|
||||
tk_normalize_xtime(tk);
|
||||
|
||||
@@ -427,7 +428,7 @@ int do_settimeofday(const struct timespec *tv)
|
||||
struct timespec ts_delta, xt;
|
||||
unsigned long flags;
|
||||
|
||||
if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
|
||||
if (!timespec_valid_strict(tv))
|
||||
return -EINVAL;
|
||||
|
||||
write_seqlock_irqsave(&tk->lock, flags);
|
||||
@@ -463,6 +464,8 @@ int timekeeping_inject_offset(struct timespec *ts)
|
||||
{
|
||||
struct timekeeper *tk = &timekeeper;
|
||||
unsigned long flags;
|
||||
struct timespec tmp;
|
||||
int ret = 0;
|
||||
|
||||
if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
|
||||
return -EINVAL;
|
||||
@@ -471,10 +474,17 @@ int timekeeping_inject_offset(struct timespec *ts)
|
||||
|
||||
timekeeping_forward_now(tk);
|
||||
|
||||
/* Make sure the proposed value is valid */
|
||||
tmp = timespec_add(tk_xtime(tk), *ts);
|
||||
if (!timespec_valid_strict(&tmp)) {
|
||||
ret = -EINVAL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
tk_xtime_add(tk, ts);
|
||||
tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
|
||||
|
||||
error: /* even if we error out, we forwarded the time, so call update */
|
||||
timekeeping_update(tk, true);
|
||||
|
||||
write_sequnlock_irqrestore(&tk->lock, flags);
|
||||
@@ -482,7 +492,7 @@ int timekeeping_inject_offset(struct timespec *ts)
|
||||
/* signal hrtimers about time change */
|
||||
clock_was_set();
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(timekeeping_inject_offset);
|
||||
|
||||
@@ -649,7 +659,20 @@ void __init timekeeping_init(void)
|
||||
struct timespec now, boot, tmp;
|
||||
|
||||
read_persistent_clock(&now);
|
||||
if (!timespec_valid_strict(&now)) {
|
||||
pr_warn("WARNING: Persistent clock returned invalid value!\n"
|
||||
" Check your CMOS/BIOS settings.\n");
|
||||
now.tv_sec = 0;
|
||||
now.tv_nsec = 0;
|
||||
}
|
||||
|
||||
read_boot_clock(&boot);
|
||||
if (!timespec_valid_strict(&boot)) {
|
||||
pr_warn("WARNING: Boot clock returned invalid value!\n"
|
||||
" Check your CMOS/BIOS settings.\n");
|
||||
boot.tv_sec = 0;
|
||||
boot.tv_nsec = 0;
|
||||
}
|
||||
|
||||
seqlock_init(&tk->lock);
|
||||
|
||||
@@ -690,7 +713,7 @@ static struct timespec timekeeping_suspend_time;
|
||||
static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
|
||||
struct timespec *delta)
|
||||
{
|
||||
if (!timespec_valid(delta)) {
|
||||
if (!timespec_valid_strict(delta)) {
|
||||
printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
|
||||
"sleep delta value!\n");
|
||||
return;
|
||||
@@ -1129,6 +1152,10 @@ static void update_wall_time(void)
|
||||
offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
|
||||
#endif
|
||||
|
||||
/* Check if there's really nothing to do */
|
||||
if (offset < tk->cycle_interval)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* With NO_HZ we may have to accumulate many cycle_intervals
|
||||
* (think "ticks") worth of time at once. To do this efficiently,
|
||||
@@ -1161,9 +1188,9 @@ static void update_wall_time(void)
|
||||
* the vsyscall implementations are converted to use xtime_nsec
|
||||
* (shifted nanoseconds), this can be killed.
|
||||
*/
|
||||
remainder = tk->xtime_nsec & ((1 << tk->shift) - 1);
|
||||
remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
|
||||
tk->xtime_nsec -= remainder;
|
||||
tk->xtime_nsec += 1 << tk->shift;
|
||||
tk->xtime_nsec += 1ULL << tk->shift;
|
||||
tk->ntp_error += remainder << tk->ntp_error_shift;
|
||||
|
||||
/*
|
||||
|
@@ -506,6 +506,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||
int size;
|
||||
|
||||
syscall_nr = syscall_get_nr(current, regs);
|
||||
if (syscall_nr < 0)
|
||||
return;
|
||||
if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
|
||||
return;
|
||||
|
||||
@@ -580,6 +582,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||
int size;
|
||||
|
||||
syscall_nr = syscall_get_nr(current, regs);
|
||||
if (syscall_nr < 0)
|
||||
return;
|
||||
if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
|
||||
return;
|
||||
|
||||
|
@@ -66,6 +66,7 @@ enum {
|
||||
|
||||
/* pool flags */
|
||||
POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
|
||||
POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
|
||||
|
||||
/* worker flags */
|
||||
WORKER_STARTED = 1 << 0, /* started */
|
||||
@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
|
||||
/* Do we have too many workers and should some go away? */
|
||||
static bool too_many_workers(struct worker_pool *pool)
|
||||
{
|
||||
bool managing = mutex_is_locked(&pool->manager_mutex);
|
||||
bool managing = pool->flags & POOL_MANAGING_WORKERS;
|
||||
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
|
||||
int nr_busy = pool->nr_workers - nr_idle;
|
||||
|
||||
@@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker)
|
||||
|
||||
/* we did our part, wait for rebind_workers() to finish up */
|
||||
wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
|
||||
|
||||
/*
|
||||
* rebind_workers() shouldn't finish until all workers passed the
|
||||
* above WORKER_REBIND wait. Tell it when done.
|
||||
*/
|
||||
spin_lock_irq(&worker->pool->gcwq->lock);
|
||||
if (!--worker->idle_rebind->cnt)
|
||||
complete(&worker->idle_rebind->done);
|
||||
spin_unlock_irq(&worker->pool->gcwq->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1396,12 +1406,15 @@ retry:
|
||||
/* set REBIND and kick idle ones, we'll wait for these later */
|
||||
for_each_worker_pool(pool, gcwq) {
|
||||
list_for_each_entry(worker, &pool->idle_list, entry) {
|
||||
unsigned long worker_flags = worker->flags;
|
||||
|
||||
if (worker->flags & WORKER_REBIND)
|
||||
continue;
|
||||
|
||||
/* morph UNBOUND to REBIND */
|
||||
worker->flags &= ~WORKER_UNBOUND;
|
||||
worker->flags |= WORKER_REBIND;
|
||||
/* morph UNBOUND to REBIND atomically */
|
||||
worker_flags &= ~WORKER_UNBOUND;
|
||||
worker_flags |= WORKER_REBIND;
|
||||
ACCESS_ONCE(worker->flags) = worker_flags;
|
||||
|
||||
idle_rebind.cnt++;
|
||||
worker->idle_rebind = &idle_rebind;
|
||||
@@ -1419,25 +1432,15 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* All idle workers are rebound and waiting for %WORKER_REBIND to
|
||||
* be cleared inside idle_worker_rebind(). Clear and release.
|
||||
* Clearing %WORKER_REBIND from this foreign context is safe
|
||||
* because these workers are still guaranteed to be idle.
|
||||
*/
|
||||
for_each_worker_pool(pool, gcwq)
|
||||
list_for_each_entry(worker, &pool->idle_list, entry)
|
||||
worker->flags &= ~WORKER_REBIND;
|
||||
|
||||
wake_up_all(&gcwq->rebind_hold);
|
||||
|
||||
/* rebind busy workers */
|
||||
/* all idle workers are rebound, rebind busy workers */
|
||||
for_each_busy_worker(worker, i, pos, gcwq) {
|
||||
struct work_struct *rebind_work = &worker->rebind_work;
|
||||
unsigned long worker_flags = worker->flags;
|
||||
|
||||
/* morph UNBOUND to REBIND */
|
||||
worker->flags &= ~WORKER_UNBOUND;
|
||||
worker->flags |= WORKER_REBIND;
|
||||
/* morph UNBOUND to REBIND atomically */
|
||||
worker_flags &= ~WORKER_UNBOUND;
|
||||
worker_flags |= WORKER_REBIND;
|
||||
ACCESS_ONCE(worker->flags) = worker_flags;
|
||||
|
||||
if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
|
||||
work_data_bits(rebind_work)))
|
||||
@@ -1449,6 +1452,34 @@ retry:
|
||||
worker->scheduled.next,
|
||||
work_color_to_flags(WORK_NO_COLOR));
|
||||
}
|
||||
|
||||
/*
|
||||
* All idle workers are rebound and waiting for %WORKER_REBIND to
|
||||
* be cleared inside idle_worker_rebind(). Clear and release.
|
||||
* Clearing %WORKER_REBIND from this foreign context is safe
|
||||
* because these workers are still guaranteed to be idle.
|
||||
*
|
||||
* We need to make sure all idle workers passed WORKER_REBIND wait
|
||||
* in idle_worker_rebind() before returning; otherwise, workers can
|
||||
* get stuck at the wait if hotplug cycle repeats.
|
||||
*/
|
||||
idle_rebind.cnt = 1;
|
||||
INIT_COMPLETION(idle_rebind.done);
|
||||
|
||||
for_each_worker_pool(pool, gcwq) {
|
||||
list_for_each_entry(worker, &pool->idle_list, entry) {
|
||||
worker->flags &= ~WORKER_REBIND;
|
||||
idle_rebind.cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
wake_up_all(&gcwq->rebind_hold);
|
||||
|
||||
if (--idle_rebind.cnt) {
|
||||
spin_unlock_irq(&gcwq->lock);
|
||||
wait_for_completion(&idle_rebind.done);
|
||||
spin_lock_irq(&gcwq->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static struct worker *alloc_worker(void)
|
||||
@@ -1794,9 +1825,45 @@ static bool manage_workers(struct worker *worker)
|
||||
struct worker_pool *pool = worker->pool;
|
||||
bool ret = false;
|
||||
|
||||
if (!mutex_trylock(&pool->manager_mutex))
|
||||
if (pool->flags & POOL_MANAGING_WORKERS)
|
||||
return ret;
|
||||
|
||||
pool->flags |= POOL_MANAGING_WORKERS;
|
||||
|
||||
/*
|
||||
* To simplify both worker management and CPU hotplug, hold off
|
||||
* management while hotplug is in progress. CPU hotplug path can't
|
||||
* grab %POOL_MANAGING_WORKERS to achieve this because that can
|
||||
* lead to idle worker depletion (all become busy thinking someone
|
||||
* else is managing) which in turn can result in deadlock under
|
||||
* extreme circumstances. Use @pool->manager_mutex to synchronize
|
||||
* manager against CPU hotplug.
|
||||
*
|
||||
* manager_mutex would always be free unless CPU hotplug is in
|
||||
* progress. trylock first without dropping @gcwq->lock.
|
||||
*/
|
||||
if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
|
||||
spin_unlock_irq(&pool->gcwq->lock);
|
||||
mutex_lock(&pool->manager_mutex);
|
||||
/*
|
||||
* CPU hotplug could have happened while we were waiting
|
||||
* for manager_mutex. Hotplug itself can't handle us
|
||||
* because manager isn't either on idle or busy list, and
|
||||
* @gcwq's state and ours could have deviated.
|
||||
*
|
||||
* As hotplug is now excluded via manager_mutex, we can
|
||||
* simply try to bind. It will succeed or fail depending
|
||||
* on @gcwq's current state. Try it and adjust
|
||||
* %WORKER_UNBOUND accordingly.
|
||||
*/
|
||||
if (worker_maybe_bind_and_lock(worker))
|
||||
worker->flags &= ~WORKER_UNBOUND;
|
||||
else
|
||||
worker->flags |= WORKER_UNBOUND;
|
||||
|
||||
ret = true;
|
||||
}
|
||||
|
||||
pool->flags &= ~POOL_MANAGE_WORKERS;
|
||||
|
||||
/*
|
||||
@@ -1806,6 +1873,7 @@ static bool manage_workers(struct worker *worker)
|
||||
ret |= maybe_destroy_workers(pool);
|
||||
ret |= maybe_create_worker(pool);
|
||||
|
||||
pool->flags &= ~POOL_MANAGING_WORKERS;
|
||||
mutex_unlock(&pool->manager_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
Reference in New Issue
Block a user