123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500 |
- // SPDX-License-Identifier: GPL-2.0-only
- /*
- * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
- * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
- */
- #include <linux/seq_file.h>
- #include <trace/hooks/sched.h>
- #include <trace/hooks/binder.h>
- #include "walt.h"
- #include "trace.h"
- #include <../../../drivers/android/binder_internal.h>
- #include "../../../drivers/android/binder_trace.h"
- static void create_util_to_cost_pd(struct em_perf_domain *pd)
- {
- int util, cpu = cpumask_first(to_cpumask(pd->cpus));
- unsigned long fmax;
- unsigned long scale_cpu;
- struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
- struct walt_sched_cluster *cluster = wrq->cluster;
- fmax = (u64)pd->table[pd->nr_perf_states - 1].frequency;
- scale_cpu = arch_scale_cpu_capacity(cpu);
- for (util = 0; util < 1024; util++) {
- int j;
- int f = (fmax * util) / scale_cpu;
- struct em_perf_state *ps = &pd->table[0];
- for (j = 0; j < pd->nr_perf_states; j++) {
- ps = &pd->table[j];
- if (ps->frequency >= f)
- break;
- }
- cluster->util_to_cost[util] = ps->cost;
- }
- }
- void create_util_to_cost(void)
- {
- struct perf_domain *pd;
- struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
- rcu_read_lock();
- pd = rcu_dereference(rd->pd);
- for (; pd; pd = pd->next)
- create_util_to_cost_pd(pd->em_pd);
- rcu_read_unlock();
- }
- DECLARE_PER_CPU(unsigned long, gov_last_util);
- /* Migration margins */
- unsigned int sched_capacity_margin_up[WALT_NR_CPUS] = {
- [0 ... WALT_NR_CPUS-1] = 1078 /* ~5% margin */
- };
- unsigned int sched_capacity_margin_down[WALT_NR_CPUS] = {
- [0 ... WALT_NR_CPUS-1] = 1205 /* ~15% margin */
- };
- /* Migration margins for topapp */
- unsigned int sched_capacity_margin_early_up[WALT_NR_CPUS] = {
- [0 ... WALT_NR_CPUS-1] = 1078 /* ~5% margin */
- };
- unsigned int sched_capacity_margin_early_down[WALT_NR_CPUS] = {
- [0 ... WALT_NR_CPUS-1] = 1205 /* ~15% margin */
- };
- static inline bool
- bias_to_this_cpu(struct task_struct *p, int cpu, int start_cpu)
- {
- bool base_test = cpumask_test_cpu(cpu, p->cpus_ptr) &&
- cpu_active(cpu);
- bool start_cap_test = !check_for_higher_capacity(start_cpu, cpu);
- return base_test && start_cap_test;
- }
- static inline bool task_demand_fits(struct task_struct *p, int dst_cpu)
- {
- if (is_max_possible_cluster_cpu(dst_cpu))
- return true;
- if (!task_in_related_thread_group(p) && p->prio >= 124 &&
- !is_min_possible_cluster_cpu(dst_cpu) &&
- !is_max_possible_cluster_cpu(dst_cpu)) {
- /* a non topapp low prio task fits on gold */
- return true;
- }
- return task_fits_capacity(p, dst_cpu);
- }
- struct find_best_target_env {
- bool is_rtg;
- int need_idle;
- int fastpath;
- int start_cpu;
- int order_index;
- int end_index;
- bool strict_max;
- int skip_cpu;
- u64 prs[8];
- };
- /*
- * cpu_util_without: compute cpu utilization without any contributions from *p
- * @cpu: the CPU which utilization is requested
- * @p: the task which utilization should be discounted
- *
- * The utilization of a CPU is defined by the utilization of tasks currently
- * enqueued on that CPU as well as tasks which are currently sleeping after an
- * execution on that CPU.
- *
- * This method returns the utilization of the specified CPU by discounting the
- * utilization of the specified task, whenever the task is currently
- * contributing to the CPU utilization.
- */
- static unsigned long cpu_util_without(int cpu, struct task_struct *p)
- {
- unsigned int util;
- /*
- * WALT does not decay idle tasks in the same manner
- * as PELT, so it makes little sense to subtract task
- * utilization from cpu utilization. Instead just use
- * cpu_util for this case.
- */
- if (likely(READ_ONCE(p->__state) == TASK_WAKING))
- return cpu_util(cpu);
- /* Task has no contribution or is new */
- if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
- return cpu_util(cpu);
- util = max_t(long, cpu_util(cpu) - task_util(p), 0);
- /*
- * Utilization (estimated) can exceed the CPU capacity, thus let's
- * clamp to the maximum CPU capacity to ensure consistency with
- * the cpu_util call.
- */
- return min_t(unsigned long, util, capacity_orig_of(cpu));
- }
- static inline bool walt_task_skip_min_cpu(struct task_struct *p)
- {
- struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
- return (sched_boost_type != CONSERVATIVE_BOOST) &&
- walt_get_rtg_status(p) && (wts->unfilter ||
- walt_pipeline_low_latency_task(p));
- }
- static inline bool walt_is_many_wakeup(int sibling_count_hint)
- {
- return sibling_count_hint >= sysctl_sched_many_wakeup_threshold;
- }
- static inline bool walt_target_ok(int target_cpu, int order_index)
- {
- return !((order_index != num_sched_clusters - 1) &&
- (cpumask_weight(&cpu_array[order_index][0]) == 1) &&
- (target_cpu == cpumask_first(&cpu_array[order_index][0])));
- }
- #define MIN_UTIL_FOR_ENERGY_EVAL 52
- static void walt_get_indicies(struct task_struct *p, int *order_index,
- int *end_index, int per_task_boost, bool is_uclamp_boosted,
- bool *energy_eval_needed)
- {
- *order_index = 0;
- *end_index = 0;
- if (num_sched_clusters <= 1)
- return;
- if (per_task_boost > TASK_BOOST_ON_MID) {
- *order_index = num_sched_clusters - 1;
- *energy_eval_needed = false;
- return;
- }
- if (is_full_throttle_boost()) {
- *energy_eval_needed = false;
- *order_index = num_sched_clusters - 1;
- *end_index = num_sched_clusters - 2;
- for (; *end_index >= 0; (*end_index)--)
- if (task_demand_fits(p,
- cpumask_first(&cpu_array[*order_index][*end_index])))
- break;
- return;
- }
- if (is_uclamp_boosted || per_task_boost ||
- task_boost_policy(p) == SCHED_BOOST_ON_BIG ||
- walt_task_skip_min_cpu(p)) {
- *energy_eval_needed = false;
- *order_index = 1;
- *end_index = max(0, num_sched_clusters - 3);
- if (sysctl_sched_asymcap_boost) {
- (*end_index)++;
- return;
- }
- }
- for (; *order_index < num_sched_clusters - 1; (*order_index)++) {
- if (task_demand_fits(p, cpumask_first(&cpu_array[*order_index][0])))
- break;
- }
- if (*order_index == 0 &&
- (task_util(p) >= MIN_UTIL_FOR_ENERGY_EVAL) &&
- !(p->in_iowait && task_in_related_thread_group(p)) &&
- !walt_get_rtg_status(p) &&
- !(sched_boost_type == CONSERVATIVE_BOOST && task_sched_boost(p)) &&
- !sysctl_sched_suppress_region2
- )
- *end_index = 1;
- if (p->in_iowait && task_in_related_thread_group(p))
- *energy_eval_needed = false;
- }
- enum fastpaths {
- NONE = 0,
- SYNC_WAKEUP,
- PREV_CPU_FASTPATH,
- CLUSTER_PACKING_FASTPATH,
- PIPELINE_FASTPATH,
- };
- static inline bool is_complex_sibling_idle(int cpu)
- {
- if (cpu_l2_sibling[cpu] != -1)
- return available_idle_cpu(cpu_l2_sibling[cpu]);
- return false;
- }
- static inline bool walt_should_reject_fbt_cpu(struct walt_rq *wrq, struct task_struct *p,
- int cpu, int order_index,
- struct find_best_target_env *fbt_env)
- {
- if (!cpu_active(cpu))
- return true;
- if (cpu_halted(cpu))
- return true;
- if (order_index != 0 && cpu_partial_halted(cpu))
- return true;
- /*
- * This CPU is the target of an active migration that's
- * yet to complete. Avoid placing another task on it.
- */
- if (is_reserved(cpu))
- return true;
- if (sched_cpu_high_irqload(cpu))
- return true;
- if (fbt_env->skip_cpu == cpu)
- return true;
- if (wrq->num_mvp_tasks > 0 && per_task_boost(p) != TASK_BOOST_STRICT_MAX)
- return true;
- return false;
- }
- bool select_prev_cpu_fastpath(int prev_cpu, int start_cpu, int order_index,
- struct task_struct *p)
- {
- struct walt_rq *prev_wrq = &per_cpu(walt_rq, prev_cpu);
- struct walt_rq *start_wrq = &per_cpu(walt_rq, start_cpu);
- bool valid_part_haltable_prev_cpu = false, valid_prev_cpu = false;
- if (!cpu_active(prev_cpu))
- return false;
- if (!available_idle_cpu(prev_cpu))
- return false;
- if (!cpumask_test_cpu(prev_cpu, p->cpus_ptr))
- return false;
- if (cpu_halted(prev_cpu))
- return false;
- if (is_reserved(prev_cpu))
- return false;
- valid_part_haltable_prev_cpu = cpumask_test_cpu(prev_cpu, &part_haltable_cpus) &&
- ((order_index == 0 && cpu_partial_halted(prev_cpu)) ||
- (order_index == 1 && !cpu_partial_halted(prev_cpu)));
- valid_prev_cpu = (prev_wrq->cluster->id == start_wrq->cluster->id);
- if (!(valid_part_haltable_prev_cpu || valid_prev_cpu))
- return false;
- return true;
- }
- #define DIRE_STRAITS_PREV_NR_LIMIT 10
- static void walt_find_best_target(struct sched_domain *sd,
- cpumask_t *candidates,
- struct task_struct *p,
- struct find_best_target_env *fbt_env)
- {
- unsigned long min_task_util = uclamp_task_util(p);
- long target_max_spare_cap = 0;
- unsigned long best_idle_cuml_util = ULONG_MAX;
- unsigned int min_exit_latency = UINT_MAX;
- int i, start_cpu;
- long spare_wake_cap, most_spare_wake_cap = 0;
- int most_spare_cap_cpu = -1;
- int least_nr_cpu = -1;
- unsigned int cpu_rq_runnable_cnt = UINT_MAX;
- int prev_cpu = task_cpu(p);
- int order_index = fbt_env->order_index, end_index = fbt_env->end_index;
- int stop_index = INT_MAX;
- int cluster;
- unsigned int target_nr_rtg_high_prio = UINT_MAX;
- bool rtg_high_prio_task = task_rtg_high_prio(p);
- cpumask_t visit_cpus;
- struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
- int packing_cpu;
- long most_spare_wake_cap_target_clusters = LONG_MIN;
- int most_spare_cap_target_cluster_cpu = -1;
- /* Find start CPU based on boost value */
- start_cpu = fbt_env->start_cpu;
- /*
- * For higher capacity worth I/O tasks, stop the search
- * at the end of higher capacity cluster(s).
- */
- if (order_index > 0 && wts->iowaited) {
- stop_index = num_sched_clusters - 2;
- most_spare_wake_cap = LONG_MIN;
- }
- if (fbt_env->strict_max) {
- stop_index = 0;
- most_spare_wake_cap = LONG_MIN;
- }
- /* fast path for packing_cpu */
- packing_cpu = walt_find_and_choose_cluster_packing_cpu(start_cpu, p);
- if (packing_cpu >= 0) {
- fbt_env->fastpath = CLUSTER_PACKING_FASTPATH;
- cpumask_set_cpu(packing_cpu, candidates);
- goto out;
- }
- /* fast path for prev_cpu */
- if (select_prev_cpu_fastpath(prev_cpu, start_cpu, order_index, p)) {
- fbt_env->fastpath = PREV_CPU_FASTPATH;
- cpumask_set_cpu(prev_cpu, candidates);
- goto out;
- }
- for (cluster = 0; cluster < num_sched_clusters; cluster++) {
- int best_idle_cpu_cluster = -1;
- int target_cpu_cluster = -1;
- int this_complex_idle = 0;
- int best_complex_idle = 0;
- target_max_spare_cap = 0;
- min_exit_latency = INT_MAX;
- best_idle_cuml_util = ULONG_MAX;
- cpumask_and(&visit_cpus, p->cpus_ptr,
- &cpu_array[order_index][cluster]);
- for_each_cpu(i, &visit_cpus) {
- unsigned long capacity_orig = capacity_orig_of(i);
- unsigned long wake_cpu_util, new_cpu_util, new_util_cuml;
- long spare_cap;
- unsigned int idle_exit_latency = UINT_MAX;
- struct walt_rq *wrq = &per_cpu(walt_rq, i);
- trace_sched_cpu_util(i, NULL);
- /* record the prss as we visit cpus in a cluster */
- fbt_env->prs[i] = wrq->prev_runnable_sum + wrq->grp_time.prev_runnable_sum;
- if (walt_should_reject_fbt_cpu(wrq, p, i, order_index, fbt_env))
- continue;
- /*
- * p's blocked utilization is still accounted for on prev_cpu
- * so prev_cpu will receive a negative bias due to the double
- * accounting. However, the blocked utilization may be zero.
- */
- wake_cpu_util = cpu_util_without(i, p);
- spare_wake_cap = capacity_orig - wake_cpu_util;
- if (spare_wake_cap > most_spare_wake_cap) {
- most_spare_wake_cap = spare_wake_cap;
- most_spare_cap_cpu = i;
- }
- /*
- * Keep track of least loaded cpu which can be used as a
- * fallback placement core for BIG rtg task in case all
- * the cores are busy, this is to avoid prev_cpu
- * fallback mechanism.
- */
- if ((cluster <= end_index) &&
- (spare_wake_cap > most_spare_wake_cap_target_clusters)) {
- most_spare_wake_cap_target_clusters = spare_wake_cap;
- most_spare_cap_target_cluster_cpu = i;
- }
- /*
- * Keep track of runnables for each CPU, if none of the
- * CPUs have spare capacity then use CPU with less
- * number of runnables.
- */
- if (cpu_rq(i)->nr_running < cpu_rq_runnable_cnt) {
- cpu_rq_runnable_cnt = cpu_rq(i)->nr_running;
- least_nr_cpu = i;
- }
- /*
- * Ensure minimum capacity to grant the required boost.
- * The target CPU can be already at a capacity level higher
- * than the one required to boost the task.
- */
- new_cpu_util = wake_cpu_util + min_task_util;
- if (new_cpu_util > capacity_orig)
- continue;
- /*
- * Find an optimal backup IDLE CPU for non latency
- * sensitive tasks.
- *
- * Looking for:
- * - favoring shallowest idle states
- * i.e. avoid to wakeup deep-idle CPUs
- *
- * The following code path is used by non latency
- * sensitive tasks if IDLE CPUs are available. If at
- * least one of such CPUs are available it sets the
- * best_idle_cpu to the most suitable idle CPU to be
- * selected.
- *
- * If idle CPUs are available, favour these CPUs to
- * improve performances by spreading tasks.
- * Indeed, the energy_diff() computed by the caller
- * will take care to ensure the minimization of energy
- * consumptions without affecting performance.
- */
- if (available_idle_cpu(i)) {
- idle_exit_latency = walt_get_idle_exit_latency(cpu_rq(i));
- this_complex_idle = is_complex_sibling_idle(i) ? 1 : 0;
- if (this_complex_idle < best_complex_idle)
- continue;
- /*
- * Prefer shallowest over deeper idle state cpu,
- * of same capacity cpus.
- */
- if (idle_exit_latency > min_exit_latency)
- continue;
- new_util_cuml = cpu_util_cum(i);
- if (min_exit_latency == idle_exit_latency &&
- (best_idle_cpu_cluster == prev_cpu ||
- (i != prev_cpu &&
- new_util_cuml > best_idle_cuml_util)))
- continue;
- min_exit_latency = idle_exit_latency;
- best_idle_cuml_util = new_util_cuml;
- best_idle_cpu_cluster = i;
- best_complex_idle = this_complex_idle;
- continue;
- }
- /* skip visiting any more busy if idle was found */
- if (best_idle_cpu_cluster != -1)
- continue;
- /*
- * Compute the maximum possible capacity we expect
- * to have available on this CPU once the task is
- * enqueued here.
- */
- spare_cap = capacity_orig - new_cpu_util;
- /*
- * Try to spread the rtg high prio tasks so that they
- * don't preempt each other. This is a optimisitc
- * check assuming rtg high prio can actually preempt
- * the current running task with the given vruntime
- * boost.
- */
- if (rtg_high_prio_task) {
- if (walt_nr_rtg_high_prio(i) > target_nr_rtg_high_prio)
- continue;
- /* Favor CPUs with maximum spare capacity */
- if (walt_nr_rtg_high_prio(i) == target_nr_rtg_high_prio &&
- spare_cap < target_max_spare_cap)
- continue;
- } else {
- /* Favor CPUs with maximum spare capacity */
- if (spare_cap < target_max_spare_cap)
- continue;
- }
- target_max_spare_cap = spare_cap;
- target_nr_rtg_high_prio = walt_nr_rtg_high_prio(i);
- target_cpu_cluster = i;
- }
- if (best_idle_cpu_cluster != -1)
- cpumask_set_cpu(best_idle_cpu_cluster, candidates);
- else if (target_cpu_cluster != -1)
- cpumask_set_cpu(target_cpu_cluster, candidates);
- if ((cluster >= end_index) && (!cpumask_empty(candidates)) &&
- walt_target_ok(target_cpu_cluster, order_index))
- break;
- if (most_spare_cap_cpu != -1 && cluster >= stop_index)
- break;
- }
- /*
- * We have set idle or target as long as they are valid CPUs.
- * If we don't find either, then we fallback to most_spare_cap,
- * If we don't find most spare cap, we fallback to prev_cpu,
- * provided that the prev_cpu is active and has less than
- * DIRE_STRAITS_PREV_NR_LIMIT runnables otherwise, we fallback to cpu
- * with least number of runnables.
- */
- if (unlikely(cpumask_empty(candidates))) {
- if (most_spare_cap_cpu != -1)
- cpumask_set_cpu(most_spare_cap_cpu, candidates);
- else if (most_spare_cap_target_cluster_cpu != -1 && (order_index > 0) &&
- fbt_env->is_rtg)
- cpumask_set_cpu(most_spare_cap_target_cluster_cpu, candidates);
- else if (cpu_active(prev_cpu)
- && (cpu_rq(prev_cpu)->nr_running < DIRE_STRAITS_PREV_NR_LIMIT))
- cpumask_set_cpu(prev_cpu, candidates);
- else if (least_nr_cpu != -1)
- cpumask_set_cpu(least_nr_cpu, candidates);
- }
- out:
- trace_sched_find_best_target(p, min_task_util, start_cpu, cpumask_bits(candidates)[0],
- most_spare_cap_cpu, order_index, end_index,
- fbt_env->skip_cpu, task_on_rq_queued(p), least_nr_cpu,
- cpu_rq_runnable_cnt, most_spare_cap_target_cluster_cpu);
- }
- static inline unsigned long
- cpu_util_next_walt(int cpu, struct task_struct *p, int dst_cpu)
- {
- struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
- unsigned long util = wrq->walt_stats.cumulative_runnable_avg_scaled;
- bool queued = task_on_rq_queued(p);
- /*
- * When task is queued,
- * (a) The evaluating CPU (cpu) is task's current CPU. If the
- * task is migrating, discount the task contribution from the
- * evaluation cpu.
- * (b) The evaluating CPU (cpu) is task's current CPU. If the
- * task is NOT migrating, nothing to do. The contribution is
- * already present on the evaluation CPU.
- * (c) The evaluating CPU (cpu) is not task's current CPU. But
- * the task is migrating to the evaluating CPU. So add the
- * task contribution to it.
- * (d) The evaluating CPU (cpu) is neither the current CPU nor
- * the destination CPU. don't care.
- *
- * When task is NOT queued i.e waking. Task contribution is not
- * present on any CPU.
- *
- * (a) If the evaluating CPU is the destination CPU, add the task
- * contribution.
- * (b) The evaluation CPU is not the destination CPU, don't care.
- */
- if (unlikely(queued)) {
- if (task_cpu(p) == cpu) {
- if (dst_cpu != cpu)
- util = max_t(long, util - task_util(p), 0);
- } else if (dst_cpu == cpu) {
- util += task_util(p);
- }
- } else if (dst_cpu == cpu) {
- util += task_util(p);
- }
- return min_t(unsigned long, util, capacity_orig_of(cpu));
- }
- static inline u64
- cpu_util_next_walt_prs(int cpu, struct task_struct *p, int dst_cpu, bool prev_dst_same_cluster,
- u64 *prs)
- {
- struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
- long util = prs[cpu];
- if (wts->prev_window) {
- if (!prev_dst_same_cluster) {
- /* intercluster migration of non rtg task - mimic fixups */
- util -= wts->prev_window_cpu[cpu];
- if (util < 0)
- util = 0;
- if (cpu == dst_cpu)
- util += wts->prev_window;
- }
- } else {
- if (cpu == dst_cpu)
- util += wts->demand;
- }
- return util;
- }
- static inline unsigned long get_util_to_cost(int cpu, unsigned long util)
- {
- struct walt_rq *wrq = &per_cpu(walt_rq, cpu);
- if (cpu == 0 && util > sysctl_em_inflate_thres)
- return mult_frac(wrq->cluster->util_to_cost[util], sysctl_em_inflate_pct, 100);
- else
- return wrq->cluster->util_to_cost[util];
- }
- /**
- * walt_em_cpu_energy() - Estimates the energy consumed by the CPUs of a
- performance domain
- * @pd : performance domain for which energy has to be estimated
- * @max_util : highest utilization among CPUs of the domain
- * @sum_util : sum of the utilization of all CPUs in the domain
- *
- * This function must be used only for CPU devices. There is no validation,
- * i.e. if the EM is a CPU type and has cpumask allocated. It is called from
- * the scheduler code quite frequently and that is why there is not checks.
- *
- * Return: the sum of the energy consumed by the CPUs of the domain assuming
- * a capacity state satisfying the max utilization of the domain.
- */
- static inline unsigned long walt_em_cpu_energy(struct em_perf_domain *pd,
- unsigned long max_util, unsigned long sum_util,
- struct compute_energy_output *output, unsigned int x)
- {
- unsigned long scale_cpu, cost;
- int cpu;
- if (!sum_util)
- return 0;
- /*
- * In order to predict the capacity state, map the utilization of the
- * most utilized CPU of the performance domain to a requested frequency,
- * like schedutil.
- */
- cpu = cpumask_first(to_cpumask(pd->cpus));
- scale_cpu = arch_scale_cpu_capacity(cpu);
- max_util = max_util + (max_util >> 2); /* account for TARGET_LOAD usually 80 */
- max_util = max(max_util,
- (arch_scale_freq_capacity(cpu) * scale_cpu) >>
- SCHED_CAPACITY_SHIFT);
- /*
- * The capacity of a CPU in the domain at the performance state (ps)
- * can be computed as:
- *
- * ps->freq * scale_cpu
- * ps->cap = -------------------- (1)
- * cpu_max_freq
- *
- * So, ignoring the costs of idle states (which are not available in
- * the EM), the energy consumed by this CPU at that performance state
- * is estimated as:
- *
- * ps->power * cpu_util
- * cpu_nrg = -------------------- (2)
- * ps->cap
- *
- * since 'cpu_util / ps->cap' represents its percentage of busy time.
- *
- * NOTE: Although the result of this computation actually is in
- * units of power, it can be manipulated as an energy value
- * over a scheduling period, since it is assumed to be
- * constant during that interval.
- *
- * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product
- * of two terms:
- *
- * ps->power * cpu_max_freq cpu_util
- * cpu_nrg = ------------------------ * --------- (3)
- * ps->freq scale_cpu
- *
- * The first term is static, and is stored in the em_perf_state struct
- * as 'ps->cost'.
- *
- * Since all CPUs of the domain have the same micro-architecture, they
- * share the same 'ps->cost', and the same CPU capacity. Hence, the
- * total energy of the domain (which is the simple sum of the energy of
- * all of its CPUs) can be factorized as:
- *
- * ps->cost * \Sum cpu_util
- * pd_nrg = ------------------------ (4)
- * scale_cpu
- */
- if (max_util >= 1024)
- max_util = 1023;
- cost = get_util_to_cost(cpu, max_util);
- if (output) {
- output->cost[x] = cost;
- output->max_util[x] = max_util;
- output->sum_util[x] = sum_util;
- }
- return cost * sum_util / scale_cpu;
- }
- /*
- * walt_pd_compute_energy(): Estimates the energy that @pd would consume if @p was
- * migrated to @dst_cpu. compute_energy() predicts what will be the utilization
- * landscape of @pd's CPUs after the task migration, and uses the Energy Model
- * to compute what would be the energy if we decided to actually migrate that
- * task.
- */
- static long
- walt_pd_compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd, u64 *prs,
- struct compute_energy_output *output, unsigned int x)
- {
- struct cpumask *pd_mask = perf_domain_span(pd);
- unsigned long max_util = 0, sum_util = 0;
- int cpu;
- unsigned long cpu_util;
- bool prev_dst_same_cluster = false;
- if (same_cluster(task_cpu(p), dst_cpu))
- prev_dst_same_cluster = true;
- /*
- * The capacity state of CPUs of the current rd can be driven by CPUs
- * of another rd if they belong to the same pd. So, account for the
- * utilization of these CPUs too by masking pd with cpu_online_mask
- * instead of the rd span.
- *
- * If an entire pd is outside of the current rd, it will not appear in
- * its pd list and will not be accounted by compute_energy().
- */
- for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
- sum_util += cpu_util_next_walt(cpu, p, dst_cpu);
- cpu_util = cpu_util_next_walt_prs(cpu, p, dst_cpu, prev_dst_same_cluster, prs);
- max_util = max(max_util, cpu_util);
- }
- max_util = scale_time_to_util(max_util);
- if (output)
- output->cluster_first_cpu[x] = cpumask_first(pd_mask);
- return walt_em_cpu_energy(pd->em_pd, max_util, sum_util, output, x);
- }
- static inline long
- walt_compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd,
- cpumask_t *candidates, u64 *prs, struct compute_energy_output *output)
- {
- long energy = 0;
- unsigned int x = 0;
- for (; pd; pd = pd->next) {
- struct cpumask *pd_mask = perf_domain_span(pd);
- if (cpumask_intersects(candidates, pd_mask)
- || cpumask_test_cpu(task_cpu(p), pd_mask)) {
- energy += walt_pd_compute_energy(p, dst_cpu, pd, prs, output, x);
- x++;
- }
- }
- return energy;
- }
- static inline int wake_to_idle(struct task_struct *p)
- {
- struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
- struct walt_task_struct *cur_wts =
- (struct walt_task_struct *) current->android_vendor_data1;
- return (cur_wts->wake_up_idle || wts->wake_up_idle);
- }
- /* return true if cpu should be chosen over best_energy_cpu */
- static inline bool select_cpu_same_energy(int cpu, int best_cpu, int prev_cpu)
- {
- bool new_cpu_is_idle = available_idle_cpu(cpu);
- bool best_cpu_is_idle = available_idle_cpu(best_cpu);
- if (check_for_higher_capacity(cpu, best_cpu))
- return false;
- if (check_for_higher_capacity(best_cpu, cpu))
- return true;
- if (best_cpu_is_idle && walt_get_idle_exit_latency(cpu_rq(best_cpu)) <= 1)
- return false;
- if (new_cpu_is_idle && walt_get_idle_exit_latency(cpu_rq(cpu)) <= 1)
- return true;
- if (best_cpu_is_idle && !new_cpu_is_idle)
- return false;
- if (new_cpu_is_idle && !best_cpu_is_idle)
- return true;
- if (best_cpu == prev_cpu)
- return false;
- if (cpu == prev_cpu)
- return true;
- if (best_cpu_is_idle && new_cpu_is_idle)
- return false;
- if (cpu_util(best_cpu) <= cpu_util(cpu))
- return false;
- return true;
- }
- static inline unsigned int capacity_spare_of(int cpu)
- {
- return capacity_orig_of(cpu) - cpu_util(cpu);
- }
- static DEFINE_PER_CPU(cpumask_t, energy_cpus);
- int walt_find_energy_efficient_cpu(struct task_struct *p, int prev_cpu,
- int sync, int sibling_count_hint)
- {
- unsigned long prev_energy = ULONG_MAX, best_energy = ULONG_MAX;
- struct root_domain *rd = cpu_rq(cpumask_first(cpu_active_mask))->rd;
- int weight, cpu = smp_processor_id(), best_energy_cpu = prev_cpu;
- struct perf_domain *pd;
- unsigned long cur_energy;
- cpumask_t *candidates;
- bool is_rtg, curr_is_rtg;
- struct find_best_target_env fbt_env;
- bool need_idle = wake_to_idle(p);
- u64 start_t = 0;
- int delta = 0;
- int task_boost = per_task_boost(p);
- bool uclamp_boost = walt_uclamp_boosted(p);
- int start_cpu = 0, order_index, end_index;
- int first_cpu;
- bool energy_eval_needed = true;
- struct compute_energy_output output;
- struct walt_task_struct *wts;
- int pipeline_cpu;
- if (walt_is_many_wakeup(sibling_count_hint) && prev_cpu != cpu &&
- cpumask_test_cpu(prev_cpu, p->cpus_ptr))
- return prev_cpu;
- if (unlikely(!cpu_array))
- return prev_cpu;
- /* Pre-select a set of candidate CPUs. */
- candidates = this_cpu_ptr(&energy_cpus);
- cpumask_clear(candidates);
- wts = (struct walt_task_struct *) p->android_vendor_data1;
- pipeline_cpu = wts->pipeline_cpu;
- if ((wts->low_latency & WALT_LOW_LATENCY_MASK) &&
- (pipeline_cpu != -1) &&
- walt_task_skip_min_cpu(p) &&
- cpumask_test_cpu(pipeline_cpu, p->cpus_ptr) &&
- cpu_active(pipeline_cpu) &&
- !cpu_halted(pipeline_cpu)) {
- if ((p == cpu_rq(pipeline_cpu)->curr) ||
- !walt_pipeline_low_latency_task(cpu_rq(pipeline_cpu)->curr)) {
- best_energy_cpu = pipeline_cpu;
- fbt_env.fastpath = PIPELINE_FASTPATH;
- goto out;
- }
- }
- walt_get_indicies(p, &order_index, &end_index, task_boost, uclamp_boost,
- &energy_eval_needed);
- start_cpu = cpumask_first(&cpu_array[order_index][0]);
- is_rtg = task_in_related_thread_group(p);
- curr_is_rtg = task_in_related_thread_group(cpu_rq(cpu)->curr);
- if (trace_sched_task_util_enabled())
- start_t = sched_clock();
- rcu_read_lock();
- need_idle |= uclamp_latency_sensitive(p);
- fbt_env.fastpath = 0;
- fbt_env.need_idle = need_idle;
- if (sync && (need_idle || (is_rtg && curr_is_rtg)))
- sync = 0;
- if (sysctl_sched_sync_hint_enable && sync
- && bias_to_this_cpu(p, cpu, start_cpu) && !cpu_halted(cpu)) {
- best_energy_cpu = cpu;
- fbt_env.fastpath = SYNC_WAKEUP;
- goto unlock;
- }
- /* if symmetrical system, default to upstream behavior */
- pd = rcu_dereference(rd->pd);
- if (!pd)
- goto fail;
- fbt_env.is_rtg = is_rtg;
- fbt_env.start_cpu = start_cpu;
- fbt_env.order_index = order_index;
- fbt_env.end_index = end_index;
- fbt_env.strict_max = is_rtg &&
- (task_boost == TASK_BOOST_STRICT_MAX);
- fbt_env.skip_cpu = walt_is_many_wakeup(sibling_count_hint) ?
- cpu : -1;
- walt_find_best_target(NULL, candidates, p, &fbt_env);
- /* Bail out if no candidate was found. */
- weight = cpumask_weight(candidates);
- if (!weight)
- goto unlock;
- first_cpu = cpumask_first(candidates);
- if (fbt_env.fastpath == CLUSTER_PACKING_FASTPATH) {
- best_energy_cpu = first_cpu;
- goto unlock;
- }
- if (weight == 1) {
- if (available_idle_cpu(first_cpu) || first_cpu == prev_cpu) {
- best_energy_cpu = first_cpu;
- goto unlock;
- }
- }
- if (need_idle && available_idle_cpu(first_cpu)) {
- best_energy_cpu = first_cpu;
- goto unlock;
- }
- if (!energy_eval_needed) {
- int max_spare_cpu = first_cpu;
- for_each_cpu(cpu, candidates) {
- if (capacity_spare_of(max_spare_cpu) < capacity_spare_of(cpu))
- max_spare_cpu = cpu;
- }
- best_energy_cpu = max_spare_cpu;
- goto unlock;
- }
- if (READ_ONCE(p->__state) == TASK_WAKING)
- delta = task_util(p);
- if (cpumask_test_cpu(prev_cpu, p->cpus_ptr) && !__cpu_overutilized(prev_cpu, delta)) {
- if (trace_sched_compute_energy_enabled()) {
- memset(&output, 0, sizeof(output));
- prev_energy = walt_compute_energy(p, prev_cpu, pd, candidates, fbt_env.prs,
- &output);
- } else {
- prev_energy = walt_compute_energy(p, prev_cpu, pd, candidates, fbt_env.prs,
- NULL);
- }
- best_energy = prev_energy;
- trace_sched_compute_energy(p, prev_cpu, prev_energy, 0, 0, 0, &output);
- } else {
- prev_energy = best_energy = ULONG_MAX;
- if (weight == 1) {
- best_energy_cpu = first_cpu;
- goto unlock;
- }
- }
- /* Select the best candidate energy-wise. */
- for_each_cpu(cpu, candidates) {
- if (cpu == prev_cpu)
- continue;
- if (trace_sched_compute_energy_enabled()) {
- memset(&output, 0, sizeof(output));
- cur_energy = walt_compute_energy(p, cpu, pd, candidates, fbt_env.prs,
- &output);
- } else {
- cur_energy = walt_compute_energy(p, cpu, pd, candidates, fbt_env.prs,
- NULL);
- }
- if (cur_energy < best_energy) {
- best_energy = cur_energy;
- best_energy_cpu = cpu;
- } else if (cur_energy == best_energy) {
- if (select_cpu_same_energy(cpu, best_energy_cpu,
- prev_cpu)) {
- best_energy = cur_energy;
- best_energy_cpu = cpu;
- }
- }
- trace_sched_compute_energy(p, cpu, cur_energy,
- prev_energy, best_energy, best_energy_cpu, &output);
- }
- /*
- * Pick the prev CPU, if best energy CPU can't saves at least 6% of
- * the energy used by prev_cpu.
- */
- if (!(available_idle_cpu(best_energy_cpu) &&
- walt_get_idle_exit_latency(cpu_rq(best_energy_cpu)) <= 1) &&
- (prev_energy != ULONG_MAX) && (best_energy_cpu != prev_cpu) &&
- ((prev_energy - best_energy) <= prev_energy >> 5) &&
- !check_for_higher_capacity(prev_cpu, start_cpu))
- best_energy_cpu = prev_cpu;
- unlock:
- rcu_read_unlock();
- out:
- if (best_energy_cpu < 0 || best_energy_cpu >= WALT_NR_CPUS)
- best_energy_cpu = prev_cpu;
- trace_sched_task_util(p, cpumask_bits(candidates)[0], best_energy_cpu,
- sync, fbt_env.need_idle, fbt_env.fastpath,
- start_t, uclamp_boost, start_cpu);
- return best_energy_cpu;
- fail:
- rcu_read_unlock();
- return -1;
- }
- static void
- walt_select_task_rq_fair(void *unused, struct task_struct *p, int prev_cpu,
- int sd_flag, int wake_flags, int *target_cpu)
- {
- int sync;
- int sibling_count_hint;
- if (unlikely(walt_disabled))
- return;
- sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
- sibling_count_hint = p->wake_q_count;
- p->wake_q_count = 0;
- *target_cpu = walt_find_energy_efficient_cpu(p, prev_cpu, sync, sibling_count_hint);
- }
- static void walt_binder_low_latency_set(void *unused, struct task_struct *task,
- bool sync, struct binder_proc *proc)
- {
- struct walt_task_struct *wts = (struct walt_task_struct *) task->android_vendor_data1;
- if (unlikely(walt_disabled))
- return;
- if (task && ((task_in_related_thread_group(current) &&
- task->group_leader->prio < MAX_RT_PRIO) ||
- (current->group_leader->prio < MAX_RT_PRIO &&
- task_in_related_thread_group(task))))
- wts->low_latency |= WALT_LOW_LATENCY_BINDER;
- else
- /*
- * Clear low_latency flag if criterion above is not met, this
- * will handle usecase where for a binder thread WALT_LOW_LATENCY_BINDER
- * is set by one task and before WALT clears this flag after timer expiry
- * some other task tries to use same binder thread.
- *
- * The only gets cleared when binder transaction is initiated
- * and the above condition to set flasg is nto satisfied.
- */
- wts->low_latency &= ~WALT_LOW_LATENCY_BINDER;
- }
- static void binder_set_priority_hook(void *data,
- struct binder_transaction *bndrtrans, struct task_struct *task)
- {
- struct walt_task_struct *wts = (struct walt_task_struct *) task->android_vendor_data1;
- struct walt_task_struct *current_wts =
- (struct walt_task_struct *) current->android_vendor_data1;
- if (unlikely(walt_disabled))
- return;
- if (bndrtrans && bndrtrans->need_reply && current_wts->boost == TASK_BOOST_STRICT_MAX) {
- bndrtrans->android_vendor_data1 = wts->boost;
- wts->boost = TASK_BOOST_STRICT_MAX;
- }
- }
- static void binder_restore_priority_hook(void *data,
- struct binder_transaction *bndrtrans, struct task_struct *task)
- {
- struct walt_task_struct *wts = (struct walt_task_struct *) task->android_vendor_data1;
- if (unlikely(walt_disabled))
- return;
- if (bndrtrans && wts->boost == TASK_BOOST_STRICT_MAX)
- wts->boost = bndrtrans->android_vendor_data1;
- }
- /*
- * Higher prio mvp can preempt lower prio mvp.
- *
- * However, the lower prio MVP slice will be more since we expect them to
- * be the work horses. For example, binders will have higher prio MVP and
- * they can preempt long running rtg prio tasks but binders loose their
- * powers with in 3 msec where as rtg prio tasks can run more than that.
- */
- int walt_get_mvp_task_prio(struct task_struct *p)
- {
- if (walt_procfs_low_latency_task(p) ||
- walt_pipeline_low_latency_task(p))
- return WALT_LL_PIPE_MVP;
- if (per_task_boost(p) == TASK_BOOST_STRICT_MAX)
- return WALT_TASK_BOOST_MVP;
- if (walt_binder_low_latency_task(p))
- return WALT_BINDER_MVP;
- if (task_rtg_high_prio(p))
- return WALT_RTG_MVP;
- return WALT_NOT_MVP;
- }
- static inline unsigned int walt_cfs_mvp_task_limit(struct task_struct *p)
- {
- struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
- /* Binder MVP tasks are high prio but have only single slice */
- if (wts->mvp_prio == WALT_BINDER_MVP)
- return WALT_MVP_SLICE;
- return WALT_MVP_LIMIT;
- }
- static void walt_cfs_insert_mvp_task(struct walt_rq *wrq, struct walt_task_struct *wts,
- bool at_front)
- {
- struct list_head *pos;
- list_for_each(pos, &wrq->mvp_tasks) {
- struct walt_task_struct *tmp_wts = container_of(pos, struct walt_task_struct,
- mvp_list);
- if (at_front) {
- if (wts->mvp_prio >= tmp_wts->mvp_prio)
- break;
- } else {
- if (wts->mvp_prio > tmp_wts->mvp_prio)
- break;
- }
- }
- list_add(&wts->mvp_list, pos->prev);
- wrq->num_mvp_tasks++;
- }
- void walt_cfs_deactivate_mvp_task(struct rq *rq, struct task_struct *p)
- {
- struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
- struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
- list_del_init(&wts->mvp_list);
- wts->mvp_prio = WALT_NOT_MVP;
- wrq->num_mvp_tasks--;
- }
- /*
- * MVP task runtime update happens here. Three possibilities:
- *
- * de-activated: The MVP consumed its runtime. Non MVP can preempt.
- * slice expired: MVP slice is expired and other MVP can preempt.
- * slice not expired: This MVP task can continue to run.
- */
- #define MAX_MVP_TIME_NS 500000000ULL
- #define MVP_THROTTLE_TIME_NS 100000000ULL
- static void walt_cfs_account_mvp_runtime(struct rq *rq, struct task_struct *curr)
- {
- struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
- struct walt_task_struct *wts = (struct walt_task_struct *) curr->android_vendor_data1;
- u64 slice;
- unsigned int limit;
- walt_lockdep_assert_rq(rq, NULL);
- /*
- * RQ clock update happens in tick path in the scheduler.
- * Since we drop the lock in the scheduler before calling
- * into vendor hook, it is possible that update flags are
- * reset by another rq lock and unlock. Do the update here
- * if required.
- */
- if (!(rq->clock_update_flags & RQCF_UPDATED))
- update_rq_clock(rq);
- if (wrq->mvp_throttle_time) {
- if ((rq->clock - wrq->mvp_throttle_time) > MVP_THROTTLE_TIME_NS) {
- wrq->skip_mvp = false;
- wrq->mvp_throttle_time = 0;
- }
- } else if (wrq->mvp_arrival_time) {
- if ((rq->clock - wrq->mvp_arrival_time) > MAX_MVP_TIME_NS) {
- wrq->skip_mvp = true;
- wrq->mvp_arrival_time = 0;
- wrq->mvp_throttle_time = rq->clock;
- }
- }
- /*
- * continue accounting even in skip_mvp state if a MVP task is selected
- * by scheduler core to run on CPU.
- */
- if (curr->se.sum_exec_runtime > wts->sum_exec_snapshot_for_total)
- wts->total_exec = curr->se.sum_exec_runtime - wts->sum_exec_snapshot_for_total;
- else
- wts->total_exec = 0;
- if (curr->se.sum_exec_runtime > wts->sum_exec_snapshot_for_slice)
- slice = curr->se.sum_exec_runtime - wts->sum_exec_snapshot_for_slice;
- else
- slice = 0;
- /* slice is not expired */
- if (slice < WALT_MVP_SLICE)
- return;
- wts->sum_exec_snapshot_for_slice = curr->se.sum_exec_runtime;
- /*
- * slice is expired, check if we have to deactivate the
- * MVP task, otherwise requeue the task in the list so
- * that other MVP tasks gets a chance.
- */
- limit = walt_cfs_mvp_task_limit(curr);
- if (wts->total_exec > limit) {
- walt_cfs_deactivate_mvp_task(rq, curr);
- trace_walt_cfs_deactivate_mvp_task(curr, wts, limit);
- return;
- }
- if (wrq->num_mvp_tasks == 1)
- return;
- /* slice expired. re-queue the task */
- list_del(&wts->mvp_list);
- wrq->num_mvp_tasks--;
- walt_cfs_insert_mvp_task(wrq, wts, false);
- }
- void walt_cfs_enqueue_task(struct rq *rq, struct task_struct *p)
- {
- struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
- struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
- int mvp_prio = walt_get_mvp_task_prio(p);
- if (mvp_prio == WALT_NOT_MVP)
- return;
- /*
- * This can happen during migration or enq/deq for prio/class change.
- * it was once MVP but got demoted, it will not be MVP until
- * it goes to sleep again.
- */
- if (wts->total_exec > walt_cfs_mvp_task_limit(p))
- return;
- wts->mvp_prio = mvp_prio;
- walt_cfs_insert_mvp_task(wrq, wts, task_on_cpu(rq, p));
- /*
- * We inserted the task at the appropriate position. Take the
- * task runtime snapshot. From now onwards we use this point as a
- * baseline to enforce the slice and demotion.
- */
- if (!wts->total_exec) /* queue after sleep */ {
- wts->sum_exec_snapshot_for_total = p->se.sum_exec_runtime;
- wts->sum_exec_snapshot_for_slice = p->se.sum_exec_runtime;
- }
- }
- void walt_cfs_dequeue_task(struct rq *rq, struct task_struct *p)
- {
- struct walt_task_struct *wts = (struct walt_task_struct *) p->android_vendor_data1;
- if (!list_empty(&wts->mvp_list) && wts->mvp_list.next)
- walt_cfs_deactivate_mvp_task(rq, p);
- /*
- * Reset the exec time during sleep so that it starts
- * from scratch upon next wakeup. total_exec should
- * be preserved when task is enq/deq while it is on
- * runqueue.
- */
- if (READ_ONCE(p->__state) != TASK_RUNNING)
- wts->total_exec = 0;
- }
- void walt_cfs_tick(struct rq *rq)
- {
- struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
- struct walt_task_struct *wts = (struct walt_task_struct *) rq->curr->android_vendor_data1;
- bool skip_mvp;
- if (unlikely(walt_disabled))
- return;
- raw_spin_lock(&rq->__lock);
- if (list_empty(&wts->mvp_list) || (wts->mvp_list.next == NULL))
- goto out;
- /* Reschedule if RQ's skip_mvp state changes */
- skip_mvp = wrq->skip_mvp;
- walt_cfs_account_mvp_runtime(rq, rq->curr);
- /*
- * If the current is not MVP means, we have to re-schedule to
- * see if we can run any other task including MVP tasks.
- */
- if (((skip_mvp != wrq->skip_mvp) ||
- (wrq->mvp_tasks.next != &wts->mvp_list)) && rq->cfs.h_nr_running > 1)
- resched_curr(rq);
- out:
- raw_spin_unlock(&rq->__lock);
- }
- /*
- * When preempt = false and nopreempt = false, we leave the preemption
- * decision to CFS.
- */
- static void walt_cfs_check_preempt_wakeup(void *unused, struct rq *rq, struct task_struct *p,
- bool *preempt, bool *nopreempt, int wake_flags,
- struct sched_entity *se, struct sched_entity *pse,
- int next_buddy_marked, unsigned int granularity)
- {
- struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
- struct walt_task_struct *wts_p = (struct walt_task_struct *) p->android_vendor_data1;
- struct task_struct *c = rq->curr;
- struct walt_task_struct *wts_c = (struct walt_task_struct *) rq->curr->android_vendor_data1;
- bool resched = false, skip_mvp;
- bool p_is_mvp, curr_is_mvp;
- if (unlikely(walt_disabled))
- return;
- p_is_mvp = !list_empty(&wts_p->mvp_list) && wts_p->mvp_list.next;
- curr_is_mvp = !list_empty(&wts_c->mvp_list) && wts_c->mvp_list.next;
- /*
- * current is not MVP, so preemption decision
- * is simple.
- */
- if (!curr_is_mvp) {
- if (p_is_mvp && !wrq->skip_mvp)
- goto preempt;
- return; /* CFS decides preemption */
- }
- /*
- * current is MVP. update its runtime before deciding the
- * preemption.
- */
- skip_mvp = wrq->skip_mvp;
- walt_cfs_account_mvp_runtime(rq, c);
- resched = (skip_mvp != wrq->skip_mvp) || (wrq->mvp_tasks.next != &wts_c->mvp_list);
- /*
- * current is no longer eligible to run. It must have been
- * picked (because of MVP) ahead of other tasks in the CFS
- * tree, so drive preemption to pick up the next task from
- * the tree, which also includes picking up the first in
- * the MVP queue.
- */
- if (resched)
- goto preempt;
- /* current is the first in the queue, so no preemption */
- *nopreempt = true;
- trace_walt_cfs_mvp_wakeup_nopreempt(c, wts_c, walt_cfs_mvp_task_limit(c));
- return;
- preempt:
- *preempt = true;
- trace_walt_cfs_mvp_wakeup_preempt(p, wts_p, walt_cfs_mvp_task_limit(p));
- }
- #ifdef CONFIG_FAIR_GROUP_SCHED
- /* Walk up scheduling entities hierarchy */
- #define for_each_sched_entity(se) \
- for (; se; se = se->parent)
- #else /* !CONFIG_FAIR_GROUP_SCHED */
- #define for_each_sched_entity(se) \
- for (; se; se = NULL)
- #endif
- extern void set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se);
- static void walt_cfs_replace_next_task_fair(void *unused, struct rq *rq, struct task_struct **p,
- struct sched_entity **se, bool *repick, bool simple,
- struct task_struct *prev)
- {
- struct walt_rq *wrq = &per_cpu(walt_rq, cpu_of(rq));
- struct walt_task_struct *wts;
- struct task_struct *mvp;
- struct cfs_rq *cfs_rq;
- if (unlikely(walt_disabled))
- return;
- if ((*p) && (*p) != prev && ((*p)->on_cpu == 1 || (*p)->on_rq == 0 ||
- (*p)->on_rq == TASK_ON_RQ_MIGRATING ||
- task_thread_info(*p)->cpu != cpu_of(rq)))
- WALT_BUG(WALT_BUG_UPSTREAM, *p,
- "picked %s(%d) on_cpu=%d on_rq=%d p->cpu=%d cpu_of(rq)=%d kthread=%d\n",
- (*p)->comm, (*p)->pid, (*p)->on_cpu,
- (*p)->on_rq, task_thread_info(*p)->cpu,
- cpu_of(rq), ((*p)->flags & PF_KTHREAD));
- /* RQ is in MVP throttled state*/
- if (wrq->skip_mvp)
- return;
- if (list_empty(&wrq->mvp_tasks)) {
- wrq->mvp_arrival_time = 0;
- return;
- }
- /* Return the first task from MVP queue */
- wts = list_first_entry(&wrq->mvp_tasks, struct walt_task_struct, mvp_list);
- mvp = wts_to_ts(wts);
- *p = mvp;
- *se = &mvp->se;
- *repick = true;
- /* TODO: check with team if it is fine in case clock is not updated */
- /* Mark arrival of MVP task */
- if (!wrq->mvp_arrival_time)
- wrq->mvp_arrival_time = rq->clock;
- if (simple) {
- for_each_sched_entity((*se)) {
- /*
- * TODO If CFS_BANDWIDTH is enabled, we might pick
- * from a throttled cfs_rq
- */
- cfs_rq = cfs_rq_of(*se);
- set_next_entity(cfs_rq, *se);
- }
- }
- if ((*p) && (*p) != prev && ((*p)->on_cpu == 1 || (*p)->on_rq == 0 ||
- (*p)->on_rq == TASK_ON_RQ_MIGRATING ||
- task_thread_info(*p)->cpu != cpu_of(rq)))
- WALT_BUG(WALT_BUG_UPSTREAM, *p,
- "picked %s(%d) on_cpu=%d on_rq=%d p->cpu=%d cpu_of(rq)=%d kthread=%d\n",
- (*p)->comm, (*p)->pid, (*p)->on_cpu,
- (*p)->on_rq, task_thread_info(*p)->cpu,
- cpu_of(rq), ((*p)->flags & PF_KTHREAD));
- trace_walt_cfs_mvp_pick_next(mvp, wts, walt_cfs_mvp_task_limit(mvp));
- }
- void walt_cfs_init(void)
- {
- register_trace_android_rvh_select_task_rq_fair(walt_select_task_rq_fair, NULL);
- register_trace_android_vh_binder_wakeup_ilocked(walt_binder_low_latency_set, NULL);
- register_trace_android_vh_binder_set_priority(binder_set_priority_hook, NULL);
- register_trace_android_vh_binder_restore_priority(binder_restore_priority_hook, NULL);
- register_trace_android_rvh_check_preempt_wakeup(walt_cfs_check_preempt_wakeup, NULL);
- register_trace_android_rvh_replace_next_task_fair(walt_cfs_replace_next_task_fair, NULL);
- }
|