// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include "walt.h" #include "trace.h" static inline unsigned long walt_lb_cpu_util(int cpu) { struct walt_rq *wrq = &per_cpu(walt_rq, cpu); return wrq->walt_stats.cumulative_runnable_avg_scaled; } static void walt_detach_task(struct task_struct *p, struct rq *src_rq, struct rq *dst_rq) { //TODO can we just replace with detach_task in fair.c?? deactivate_task(src_rq, p, 0); set_task_cpu(p, dst_rq->cpu); } static void walt_attach_task(struct task_struct *p, struct rq *rq) { activate_task(rq, p, 0); check_preempt_curr(rq, p, 0); } static int stop_walt_lb_active_migration(void *data) { struct rq *busiest_rq = data; int busiest_cpu = cpu_of(busiest_rq); int target_cpu = busiest_rq->push_cpu; struct rq *target_rq = cpu_rq(target_cpu); struct walt_rq *wrq = &per_cpu(walt_rq, busiest_cpu); struct task_struct *push_task; int push_task_detached = 0; raw_spin_lock_irq(&busiest_rq->__lock); push_task = wrq->push_task; /* sanity checks before initiating the pull */ if (!cpu_active(busiest_cpu) || !cpu_active(target_cpu) || !push_task) goto out_unlock; if (unlikely(busiest_cpu != raw_smp_processor_id() || !busiest_rq->active_balance)) goto out_unlock; if (busiest_rq->nr_running <= 1) goto out_unlock; BUG_ON(busiest_rq == target_rq); if (task_on_rq_queued(push_task) && READ_ONCE(push_task->__state) == TASK_RUNNING && task_cpu(push_task) == busiest_cpu && cpu_active(target_cpu) && cpumask_test_cpu(target_cpu, push_task->cpus_ptr)) { walt_detach_task(push_task, busiest_rq, target_rq); push_task_detached = 1; } out_unlock: /* called with busiest_rq lock */ busiest_rq->active_balance = 0; target_cpu = busiest_rq->push_cpu; clear_reserved(target_cpu); wrq->push_task = NULL; raw_spin_unlock(&busiest_rq->__lock); if (push_task_detached) { raw_spin_lock(&target_rq->__lock); walt_attach_task(push_task, target_rq); raw_spin_unlock(&target_rq->__lock); } if (push_task) put_task_struct(push_task); local_irq_enable(); return 0; } struct walt_lb_rotate_work { struct work_struct w; struct task_struct *src_task; struct task_struct *dst_task; int src_cpu; int dst_cpu; }; DEFINE_PER_CPU(struct walt_lb_rotate_work, walt_lb_rotate_works); static void walt_lb_rotate_work_func(struct work_struct *work) { struct walt_lb_rotate_work *wr = container_of(work, struct walt_lb_rotate_work, w); struct rq *src_rq = cpu_rq(wr->src_cpu), *dst_rq = cpu_rq(wr->dst_cpu); unsigned long flags; migrate_swap(wr->src_task, wr->dst_task, wr->dst_cpu, wr->src_cpu); put_task_struct(wr->src_task); put_task_struct(wr->dst_task); local_irq_save(flags); double_rq_lock(src_rq, dst_rq); dst_rq->active_balance = 0; src_rq->active_balance = 0; double_rq_unlock(src_rq, dst_rq); local_irq_restore(flags); clear_reserved(wr->src_cpu); clear_reserved(wr->dst_cpu); } static void walt_lb_rotate_work_init(void) { int i; for_each_possible_cpu(i) { struct walt_lb_rotate_work *wr = &per_cpu(walt_lb_rotate_works, i); INIT_WORK(&wr->w, walt_lb_rotate_work_func); } } #define WALT_ROTATION_THRESHOLD_NS 16000000 static void walt_lb_check_for_rotation(struct rq *src_rq) { u64 wc, wait, max_wait = 0, run, max_run = 0; int deserved_cpu = nr_cpu_ids, dst_cpu = nr_cpu_ids; int i, src_cpu = cpu_of(src_rq); struct rq *dst_rq; struct walt_lb_rotate_work *wr = NULL; struct walt_task_struct *wts; if (!is_min_possible_cluster_cpu(src_cpu)) return; /* * Use src_rq->clock directly instead of rq_clock() since * we do not have the rq lock and * src_rq->clock was updated in the tick callpath. */ wc = src_rq->clock; for_each_possible_cpu(i) { struct rq *rq = cpu_rq(i); if (!is_min_possible_cluster_cpu(i)) break; if (is_reserved(i)) continue; if (!rq->misfit_task_load || !walt_fair_task(rq->curr)) continue; wts = (struct walt_task_struct *) rq->curr->android_vendor_data1; wait = wc - wts->last_enqueued_ts; if (wait > max_wait) { max_wait = wait; deserved_cpu = i; } } if (deserved_cpu != src_cpu) return; for_each_possible_cpu(i) { struct rq *rq = cpu_rq(i); if (is_min_possible_cluster_cpu(i)) continue; if (is_reserved(i)) continue; if (!walt_fair_task(rq->curr)) continue; if (rq->nr_running > 1) continue; wts = (struct walt_task_struct *) rq->curr->android_vendor_data1; run = wc - wts->last_enqueued_ts; if (run < WALT_ROTATION_THRESHOLD_NS) continue; if (run > max_run) { max_run = run; dst_cpu = i; } } if (dst_cpu == nr_cpu_ids) return; dst_rq = cpu_rq(dst_cpu); double_rq_lock(src_rq, dst_rq); if (walt_fair_task(dst_rq->curr) && !src_rq->active_balance && !dst_rq->active_balance && cpumask_test_cpu(dst_cpu, src_rq->curr->cpus_ptr) && cpumask_test_cpu(src_cpu, dst_rq->curr->cpus_ptr)) { get_task_struct(src_rq->curr); get_task_struct(dst_rq->curr); mark_reserved(src_cpu); mark_reserved(dst_cpu); wr = &per_cpu(walt_lb_rotate_works, src_cpu); wr->src_task = src_rq->curr; wr->dst_task = dst_rq->curr; wr->src_cpu = src_cpu; wr->dst_cpu = dst_cpu; dst_rq->active_balance = 1; src_rq->active_balance = 1; } double_rq_unlock(src_rq, dst_rq); if (wr) queue_work_on(src_cpu, system_highpri_wq, &wr->w); } static inline bool _walt_can_migrate_task(struct task_struct *p, int dst_cpu, bool to_lower, bool to_higher, bool force) { struct walt_rq *wrq = &per_cpu(walt_rq, task_cpu(p)); struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1; /* Don't detach task if it is under active migration */ if (wrq->push_task == p) return false; if (to_lower) { if (wts->iowaited) return false; if (per_task_boost(p) == TASK_BOOST_STRICT_MAX && task_in_related_thread_group(p)) return false; if (walt_pipeline_low_latency_task(p)) return false; if (!force && walt_get_rtg_status(p)) return false; if (!force && !task_fits_max(p, dst_cpu)) return false; } else if (!to_higher) { if (!task_fits_max(p, dst_cpu) && wrq->walt_stats.nr_big_tasks < 2) return false; } /* Don't detach task if dest cpu is halted */ if (cpu_halted(dst_cpu)) return false; return true; } static inline bool need_active_lb(struct task_struct *p, int dst_cpu, int src_cpu) { struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1; if (cpu_rq(src_cpu)->active_balance) return false; if (!check_for_higher_capacity(dst_cpu, src_cpu)) return false; if (!wts->misfit) return false; if (!is_min_possible_cluster_cpu(src_cpu) && !task_fits_max(p, dst_cpu)) return false; if (task_reject_partialhalt_cpu(p, dst_cpu)) return false; /* * If the sleeping task on the dst_cpu and the task for which we are * doing active load balance, are pipeline tasks then don't do active * load balance. If we allow this, the sleeping task might wakeup * again on dst_cpu before the migration of actively pulled task. This * will result in two pipeline tasks on the same cpu */ if (walt_pipeline_low_latency_task(p) && walt_pipeline_low_latency_task(cpu_rq(dst_cpu)->curr)) return false; return true; } static int walt_lb_pull_tasks(int dst_cpu, int src_cpu, struct task_struct **pulled_task_struct) { struct rq *dst_rq = cpu_rq(dst_cpu); struct rq *src_rq = cpu_rq(src_cpu); unsigned long flags; struct task_struct *p; bool active_balance = false, to_lower, to_higher; struct walt_rq *src_wrq = &per_cpu(walt_rq, src_cpu); struct walt_task_struct *wts; struct task_struct *pull_me; int task_visited; BUG_ON(src_cpu == dst_cpu); to_lower = check_for_higher_capacity(src_cpu, dst_cpu); to_higher = check_for_higher_capacity(dst_cpu, src_cpu); raw_spin_lock_irqsave(&src_rq->__lock, flags); pull_me = NULL; task_visited = 0; list_for_each_entry_reverse(p, &src_rq->cfs_tasks, se.group_node) { if (!cpumask_test_cpu(dst_cpu, p->cpus_ptr)) continue; if (task_on_cpu(src_rq, p)) continue; if (!_walt_can_migrate_task(p, dst_cpu, to_lower, to_higher, false)) continue; if (pull_me == NULL) { pull_me = p; } else { if (to_lower) { if (task_util(p) < task_util(pull_me)) pull_me = p; } else if (task_util(p) > task_util(pull_me)) { pull_me = p; } } task_visited++; if (task_visited > 5) break; } if (pull_me) { walt_detach_task(pull_me, src_rq, dst_rq); goto unlock; } pull_me = NULL; task_visited = 0; list_for_each_entry_reverse(p, &src_rq->cfs_tasks, se.group_node) { if (!cpumask_test_cpu(dst_cpu, p->cpus_ptr)) continue; if (task_on_cpu(src_rq, p)) continue; if (!_walt_can_migrate_task(p, dst_cpu, to_lower, to_higher, true)) continue; if (pull_me == NULL) { pull_me = p; } else { if (to_lower) { if (task_util(p) < task_util(pull_me)) pull_me = p; } else if (task_util(p) > task_util(pull_me)) { pull_me = p; } } task_visited++; if (task_visited > 5) break; } if (pull_me) { walt_detach_task(pull_me, src_rq, dst_rq); goto unlock; } list_for_each_entry_reverse(p, &src_rq->cfs_tasks, se.group_node) { if (task_on_cpu(src_rq, p)) { if (cpumask_test_cpu(dst_cpu, p->cpus_ptr) && need_active_lb(p, dst_cpu, src_cpu)) { bool success; active_balance = true; src_rq->active_balance = 1; src_rq->push_cpu = dst_cpu; get_task_struct(p); src_wrq->push_task = p; mark_reserved(dst_cpu); /* lock must be dropped before waking the stopper */ raw_spin_unlock_irqrestore(&src_rq->__lock, flags); /* * Using our custom active load balance callback so that * the push_task is really pulled onto this CPU. */ wts = (struct walt_task_struct *) p->android_vendor_data1; trace_walt_active_load_balance(p, src_cpu, dst_cpu, wts); success = stop_one_cpu_nowait(src_cpu, stop_walt_lb_active_migration, src_rq, &src_rq->active_balance_work); if (!success) clear_reserved(dst_cpu); return 0; /* we did not pull any task here */ } goto unlock; } } unlock: /* lock must be dropped before waking the stopper */ raw_spin_unlock_irqrestore(&src_rq->__lock, flags); if (!pull_me) return 0; raw_spin_lock_irqsave(&dst_rq->__lock, flags); walt_attach_task(pull_me, dst_rq); raw_spin_unlock_irqrestore(&dst_rq->__lock, flags); *pulled_task_struct = pull_me; return 1; /* we pulled 1 task */ } #define SMALL_TASK_THRESHOLD 102 /* * find_first_idle_if_others_are_busy * * Get an idle cpu in the middle clusters * * Returns -1 if there are no idle cpus in the mask, or if there * is a CPU that is about to be come newly idle. Returns * if there is an idle cpu in a cluster that's not about to do * newly idle load balancing. * Also returns -1 if called on a single or dual cluster system */ static int find_first_idle_if_others_are_busy(void) { int i, first_idle = -1; struct cpumask src_mask; cpumask_clear(&src_mask); for (i = 0; i < num_sched_clusters; i++) { if (i == 0 || i == num_sched_clusters - 1) continue; else cpumask_or(&src_mask, &src_mask, &cpu_array[0][i]); } for_each_cpu(i, &src_mask) { if (!cpu_active(i)) continue; if (cpu_halted(i)) continue; if (available_idle_cpu(i)) first_idle = i; if (cpu_util(i) < SMALL_TASK_THRESHOLD && cpu_rq(i)->nr_running == 1) { /* * there was one CPU in the mask that was almost idle, but not * quite. when it becomes idle, it will do newidle load-balance, * and start off with it's own cluster. So no reason to kick * anything in this cluster. i.e. don't perform a wasted kick. */ first_idle = -1; /* return -1, a cpu in this cluster is about to do * newly idle load balancing */ break; } } return first_idle; } static int walt_lb_find_busiest_similar_cap_cpu(int dst_cpu, const cpumask_t *src_mask, int *has_misfit, bool is_newidle) { int i; int busiest_cpu = -1; unsigned long util, busiest_util = 0; struct walt_rq *wrq; for_each_cpu(i, src_mask) { wrq = &per_cpu(walt_rq, i); trace_walt_lb_cpu_util(i, wrq); if (cpu_rq(i)->nr_running < 2 || !cpu_rq(i)->cfs.h_nr_running) continue; util = walt_lb_cpu_util(i); if (util < busiest_util) continue; busiest_util = util; busiest_cpu = i; } return busiest_cpu; } static int walt_lb_find_busiest_from_higher_cap_cpu(int dst_cpu, const cpumask_t *src_mask, int *has_misfit, bool is_newidle) { int i; int busiest_cpu = -1; unsigned long util, busiest_util = 0; unsigned long total_capacity = 0, total_util = 0, total_nr = 0; int total_cpus = 0; struct walt_rq *wrq; bool asymcap_boost = ASYMCAP_BOOST(dst_cpu); if (cpu_partial_halted(dst_cpu)) return -1; for_each_cpu(i, src_mask) { if (!cpu_active(i)) continue; wrq = &per_cpu(walt_rq, i); trace_walt_lb_cpu_util(i, wrq); util = walt_lb_cpu_util(i); total_cpus += 1; total_util += util; total_capacity += capacity_orig_of(i); total_nr += cpu_rq(i)->cfs.h_nr_running; if (cpu_rq(i)->cfs.h_nr_running < 2) continue; if (cpu_rq(i)->cfs.h_nr_running == 2 && task_util(cpu_rq(i)->curr) < SMALL_TASK_THRESHOLD) continue; /* * During rotation, two silver fmax tasks gets * placed on gold/prime and the CPU may not be * overutilized but for rotation, we have to * spread out. */ if (!walt_rotation_enabled && !cpu_overutilized(i) && !asymcap_boost) continue; if (util < busiest_util) continue; busiest_util = util; busiest_cpu = i; } /* * Don't allow migrating to lower cluster unless this high * capacity cluster is sufficiently loaded. */ if (!walt_rotation_enabled && !asymcap_boost) { if (total_nr <= total_cpus || total_util * 1280 < total_capacity * 1024) busiest_cpu = -1; } return busiest_cpu; } static int walt_lb_find_busiest_from_lower_cap_cpu(int dst_cpu, const cpumask_t *src_mask, int *has_misfit, bool is_newidle) { int i; int busiest_cpu = -1; unsigned long util, busiest_util = 0; unsigned long total_capacity = 0, total_util = 0, total_nr = 0; int total_cpus = 0; int busy_nr_big_tasks = 0; struct walt_rq *wrq; bool treat_dst_idle = is_newidle || available_idle_cpu(dst_cpu); /* * A higher capacity CPU is looking at a lower capacity * cluster. active balance and big tasks are in play. * other than that, it is very much same as above. we * really don't need this as a separate block. will * refactor this after final testing is done. */ for_each_cpu(i, src_mask) { wrq = &per_cpu(walt_rq, i); if (!cpu_active(i)) continue; trace_walt_lb_cpu_util(i, wrq); util = walt_lb_cpu_util(i); total_cpus += 1; total_util += util; total_capacity += capacity_orig_of(i); total_nr += cpu_rq(i)->cfs.h_nr_running; /* * no point in selecting this CPU as busy, as * active balance is in progress. */ if (cpu_rq(i)->active_balance) continue; /* active migration is allowed only to idle cpu */ if (cpu_rq(i)->cfs.h_nr_running < 2 && (!wrq->walt_stats.nr_big_tasks || !treat_dst_idle)) continue; if (!walt_rotation_enabled && !cpu_overutilized(i) && !ASYMCAP_BOOST(i)) continue; if (util < busiest_util) continue; busiest_util = util; busiest_cpu = i; busy_nr_big_tasks = wrq->walt_stats.nr_big_tasks; } if (!walt_rotation_enabled && !busy_nr_big_tasks && !(busiest_cpu != -1 && ASYMCAP_BOOST(busiest_cpu))) { if (total_nr <= total_cpus || total_util * 1280 < total_capacity * 1024) busiest_cpu = -1; } if (busy_nr_big_tasks && busiest_cpu != -1) *has_misfit = true; return busiest_cpu; } static int walt_lb_find_busiest_cpu(int dst_cpu, const cpumask_t *src_mask, int *has_misfit, bool is_newidle) { int fsrc_cpu = cpumask_first(src_mask); int busiest_cpu; if (check_for_higher_capacity(dst_cpu, fsrc_cpu)) busiest_cpu = walt_lb_find_busiest_from_lower_cap_cpu(dst_cpu, src_mask, has_misfit, is_newidle); else if (check_for_higher_capacity(fsrc_cpu, dst_cpu)) busiest_cpu = walt_lb_find_busiest_from_higher_cap_cpu(dst_cpu, src_mask, has_misfit, is_newidle); else busiest_cpu = walt_lb_find_busiest_similar_cap_cpu(dst_cpu, src_mask, has_misfit, is_newidle); return busiest_cpu; } static DEFINE_RAW_SPINLOCK(walt_lb_migration_lock); void walt_lb_tick(struct rq *rq) { int prev_cpu = rq->cpu, new_cpu, ret; struct task_struct *p = rq->curr; unsigned long flags; struct walt_rq *prev_wrq = &per_cpu(walt_rq, cpu_of(rq)); struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1; raw_spin_lock(&rq->__lock); if (available_idle_cpu(prev_cpu) && is_reserved(prev_cpu) && !rq->active_balance) clear_reserved(prev_cpu); raw_spin_unlock(&rq->__lock); if (!walt_fair_task(p)) return; walt_cfs_tick(rq); if (!rq->misfit_task_load) return; if (READ_ONCE(p->__state) != TASK_RUNNING || p->nr_cpus_allowed == 1) return; raw_spin_lock_irqsave(&walt_lb_migration_lock, flags); if (walt_rotation_enabled) { walt_lb_check_for_rotation(rq); goto out_unlock; } rcu_read_lock(); new_cpu = walt_find_energy_efficient_cpu(p, prev_cpu, 0, 1); rcu_read_unlock(); if (new_cpu < 0) goto out_unlock; /* prevent active task migration to busy or same/lower capacity CPU */ if (!available_idle_cpu(new_cpu) || !check_for_higher_capacity(new_cpu, prev_cpu)) goto out_unlock; if (!is_min_possible_cluster_cpu(prev_cpu) && !task_fits_max(p, new_cpu)) goto out_unlock; raw_spin_lock(&rq->__lock); if (rq->active_balance) { raw_spin_unlock(&rq->__lock); goto out_unlock; } rq->active_balance = 1; rq->push_cpu = new_cpu; get_task_struct(p); prev_wrq->push_task = p; raw_spin_unlock(&rq->__lock); mark_reserved(new_cpu); raw_spin_unlock_irqrestore(&walt_lb_migration_lock, flags); trace_walt_active_load_balance(p, prev_cpu, new_cpu, wts); ret = stop_one_cpu_nowait(prev_cpu, stop_walt_lb_active_migration, rq, &rq->active_balance_work); if (!ret) clear_reserved(new_cpu); else wake_up_if_idle(new_cpu); return; out_unlock: raw_spin_unlock_irqrestore(&walt_lb_migration_lock, flags); } static inline int has_pushable_tasks(struct rq *rq) { return !plist_head_empty(&rq->rt.pushable_tasks); } #define WALT_RT_PULL_THRESHOLD_NS 250000 static bool walt_balance_rt(struct rq *this_rq) { int i, this_cpu = this_rq->cpu, src_cpu = this_cpu; struct rq *src_rq; struct task_struct *p; struct walt_task_struct *wts; bool pulled = false; u64 wallclock; /* can't help if this has a runnable RT */ if (sched_rt_runnable(this_rq)) return false; /* check if any CPU has a pushable RT task */ for_each_possible_cpu(i) { struct rq *rq = cpu_rq(i); if (!has_pushable_tasks(rq)) continue; src_cpu = i; break; } if (src_cpu == this_cpu) return false; src_rq = cpu_rq(src_cpu); double_lock_balance(this_rq, src_rq); /* lock is dropped, so check again */ if (sched_rt_runnable(this_rq)) goto unlock; p = pick_highest_pushable_task(src_rq, this_cpu); if (!p) goto unlock; if (!cpumask_test_cpu(this_cpu, p->cpus_ptr)) goto unlock; wts = (struct walt_task_struct *) p->android_vendor_data1; /* * Use rq->clock directly instead of rq_clock() since * rq->clock was updated recently in the __schedule() -> pick_next_task() callpath. * Time lost in grabbing rq locks will likely be corrected via max. */ wallclock = max(this_rq->clock, src_rq->clock); if (wallclock > wts->last_wake_ts && wallclock - wts->last_wake_ts < WALT_RT_PULL_THRESHOLD_NS) goto unlock; pulled = true; deactivate_task(src_rq, p, 0); set_task_cpu(p, this_cpu); activate_task(this_rq, p, 0); unlock: double_unlock_balance(this_rq, src_rq); return pulled; } __read_mostly unsigned int sysctl_sched_force_lb_enable = 1; static bool should_help_min_cap(int this_cpu) { int cpu; if (!sysctl_sched_force_lb_enable || is_min_possible_cluster_cpu(this_cpu)) return false; for_each_cpu(cpu, &cpu_array[0][0]) { struct walt_rq *wrq = &per_cpu(walt_rq, cpu); if (wrq->walt_stats.nr_big_tasks) return true; } return false; } /* similar to sysctl_sched_migration_cost */ #define NEWIDLE_BALANCE_THRESHOLD 500000 static void walt_newidle_balance(struct rq *this_rq, struct rq_flags *rf, int *pulled_task, int *done, int force_overload) { int this_cpu = this_rq->cpu; struct walt_rq *wrq = &per_cpu(walt_rq, this_cpu); int order_index; int busy_cpu = -1; bool enough_idle = (this_rq->avg_idle > NEWIDLE_BALANCE_THRESHOLD); bool help_min_cap = false; int first_idle; int has_misfit = 0; int i; struct task_struct *pulled_task_struct = NULL; struct walt_sched_cluster *cluster; if (unlikely(walt_disabled)) return; for_each_sched_cluster(cluster) { if (cluster != cpu_cluster(this_cpu)) update_freq_relation(cluster); } /* * newly idle load balance is completely handled here, so * set done to skip the load balance by the caller. */ *done = 1; *pulled_task = 0; /* * This CPU is about to enter idle, so clear the * misfit_task_load and mark the idle stamp. */ this_rq->misfit_task_load = 0; this_rq->idle_stamp = rq_clock(this_rq); if (!cpu_active(this_cpu)) return; if (cpu_halted(this_cpu)) return; if (is_reserved(this_cpu)) return; /*Cluster isn't initialized until after WALT is enabled*/ order_index = wrq->cluster->id; rq_unpin_lock(this_rq, rf); /* * Since we drop rq lock while doing RT balance, * check if any tasks are queued on this and bail out * early. */ if (walt_balance_rt(this_rq) || this_rq->nr_running) goto rt_pulled; if (!force_overload && !READ_ONCE(this_rq->rd->overload)) goto repin; if (atomic_read(&this_rq->nr_iowait) && !enough_idle) goto repin; help_min_cap = should_help_min_cap(this_cpu); raw_spin_unlock(&this_rq->__lock); /* * careful, we dropped the lock, and has to be acquired * before returning. Since rq lock is dropped, tasks * can be queued remotely, so keep a check on nr_running * and bail out. */ order_index = wrq->cluster->id; for (i = 0; i < num_sched_clusters; i++) { int first_cpu = cpumask_first(&cpu_array[order_index][i]); struct walt_rq *src_wrq = &per_cpu(walt_rq, first_cpu); int src_cluster_id = src_wrq->cluster->id; busy_cpu = walt_lb_find_busiest_cpu(this_cpu, &cpu_array[order_index][i], &has_misfit, true); if (busy_cpu == -1) continue; /* when not enough idle * Small should not help big. * Big should help small ONLY is mifit is present. * Same capacity cpus should help each other */ if (!enough_idle && (capacity_orig_of(this_cpu) < capacity_orig_of(busy_cpu) || (capacity_orig_of(this_cpu) > capacity_orig_of(busy_cpu) && !has_misfit))) continue; /* if helping farthest cluster, kick a middle */ if (num_sched_clusters > 2 && ((wrq->cluster->id == 0 && src_cluster_id == num_sched_clusters - 1) || (wrq->cluster->id == num_sched_clusters - 1 && src_cluster_id == 0))) { first_idle = find_first_idle_if_others_are_busy(); if (first_idle != -1) { walt_kick_cpu(first_idle); } else { if (walt_rotation_enabled && capacity_orig_of(this_cpu) > capacity_orig_of(busy_cpu)) { /* * When BTR active help * smallest immediately */ goto found_busy_cpu; } } } else { goto found_busy_cpu; } } goto unlock; found_busy_cpu: /* sanity checks before attempting the pull */ if (this_rq->nr_running > 0 || (busy_cpu == this_cpu)) goto unlock; *pulled_task = walt_lb_pull_tasks(this_cpu, busy_cpu, &pulled_task_struct); unlock: raw_spin_lock(&this_rq->__lock); rt_pulled: if (this_rq->cfs.h_nr_running && !*pulled_task) *pulled_task = 1; /* Is there a task of a high priority class? */ if (this_rq->nr_running != this_rq->cfs.h_nr_running) *pulled_task = -1; /* reset the idle time stamp if we pulled any task */ if (*pulled_task) this_rq->idle_stamp = 0; repin: rq_repin_lock(this_rq, rf); trace_walt_newidle_balance(this_cpu, busy_cpu, *pulled_task, help_min_cap, enough_idle, pulled_task_struct); } /* run newidle balance as a result of an unhalt operation */ void walt_smp_newidle_balance(void *ignored) { int cpu = raw_smp_processor_id(); struct rq *rq = cpu_rq(cpu); struct rq_flags rf; int pulled_task; int done = 0; rq_lock(rq, &rf); update_rq_clock(rq); walt_newidle_balance(rq, &rf, &pulled_task, &done, true); resched_curr(rq); rq_unlock(rq, &rf); } static DEFINE_PER_CPU(call_single_data_t, nib_csd); void walt_smp_call_newidle_balance(int cpu) { call_single_data_t *csd = &per_cpu(nib_csd, cpu); if (unlikely(walt_disabled)) return; smp_call_function_single_async(cpu, csd); } static void walt_find_busiest_queue(void *unused, int dst_cpu, struct sched_group *group, struct cpumask *env_cpus, struct rq **busiest, int *done) { int fsrc_cpu = cpumask_first(sched_group_span(group)); int busiest_cpu = -1; struct cpumask src_mask; int has_misfit; if (unlikely(walt_disabled)) return; *done = 1; *busiest = NULL; /* * same cluster means, there will only be 1 * CPU in the busy group, so just select it. */ if (same_cluster(dst_cpu, fsrc_cpu)) { busiest_cpu = fsrc_cpu; goto done; } /* * We will allow inter cluster migrations * only if the source group is sufficiently * loaded. The upstream load balancer is a * bit more generous. * * re-using the same code that we use it * for newly idle load balance. The policies * remain same. */ cpumask_and(&src_mask, sched_group_span(group), env_cpus); busiest_cpu = walt_lb_find_busiest_cpu(dst_cpu, &src_mask, &has_misfit, false); done: if (busiest_cpu != -1) *busiest = cpu_rq(busiest_cpu); trace_walt_find_busiest_queue(dst_cpu, busiest_cpu, src_mask.bits[0]); } /* * we only decide if nohz balance kick is needed or not. the * first CPU in the nohz.idle will come out of idle and do * load balance on behalf of every CPU. adding another hook * to decide which cpu to kick is useless. most of the time, * it is impossible to decide which CPU has to come out because * we get to kick only once. */ static void walt_nohz_balancer_kick(void *unused, struct rq *rq, unsigned int *flags, int *done) { if (unlikely(walt_disabled)) return; *done = 1; /* * tick path migration takes care of misfit task. * so we have to check for nr_running >= 2 here. */ if (rq->nr_running >= 2 && cpu_overutilized(rq->cpu)) { *flags = NOHZ_KICK_MASK; trace_walt_nohz_balance_kick(rq); } } static void walt_can_migrate_task(void *unused, struct task_struct *p, int dst_cpu, int *can_migrate) { bool to_lower, to_higher; if (unlikely(walt_disabled)) return; to_lower = check_for_higher_capacity(task_cpu(p), dst_cpu); to_higher = check_for_higher_capacity(dst_cpu, task_cpu(p)); if (_walt_can_migrate_task(p, dst_cpu, to_lower, to_higher, true)) return; *can_migrate = 0; } static void walt_sched_newidle_balance(void *unused, struct rq *this_rq, struct rq_flags *rf, int *pulled_task, int *done) { walt_newidle_balance(this_rq, rf, pulled_task, done, false); } void walt_lb_init(void) { int cpu; walt_lb_rotate_work_init(); register_trace_android_rvh_sched_nohz_balancer_kick(walt_nohz_balancer_kick, NULL); register_trace_android_rvh_can_migrate_task(walt_can_migrate_task, NULL); register_trace_android_rvh_find_busiest_queue(walt_find_busiest_queue, NULL); register_trace_android_rvh_sched_newidle_balance(walt_sched_newidle_balance, NULL); for_each_cpu(cpu, cpu_possible_mask) { call_single_data_t *csd; csd = &per_cpu(nib_csd, cpu); INIT_CSD(csd, walt_smp_newidle_balance, (void *)(unsigned long)cpu); } }