Files
android_kernel_xiaomi_sm8450/kernel/sched/debug.c
Blagovest Kolenichev fac8c789d8 Merge android11-5.4.52 (c7725ae) into msm-5.4
* refs/heads/tmp-c7725ae:
  Linux 5.4.52
  s390/maccess: add no DAT mode to kernel_write
  s390: Change s390_kernel_write() return type to match memcpy()
  pwm: jz4740: Fix build failure
  perf scripts python: exported-sql-viewer.py: Fix unexpanded 'Find' result
  perf scripts python: exported-sql-viewer.py: Fix zero id in call tree 'Find' result
  perf scripts python: exported-sql-viewer.py: Fix zero id in call graph 'Find' result
  perf scripts python: export-to-postgresql.py: Fix struct.pack() int argument
  dm writecache: reject asynchronous pmem devices
  blk-mq: consider non-idle request as "inflight" in blk_mq_rq_inflight()
  s390/mm: fix huge pte soft dirty copying
  s390/setup: init jump labels before command line parsing
  ARC: elf: use right ELF_ARCH
  ARC: entry: fix potential EFA clobber when TIF_SYSCALL_TRACE
  mmc: meson-gx: limit segments to 1 when dram-access-quirk is needed
  dm: use noio when sending kobject event
  drm/amdgpu: don't do soft recovery if gpu_recovery=0
  drm/radeon: fix double free
  btrfs: fix double put of block group with nocow
  btrfs: fix fatal extent_buffer readahead vs releasepage race
  Revert "ath9k: Fix general protection fault in ath9k_hif_usb_rx_cb"
  bpf: Check correct cred for CAP_SYSLOG in bpf_dump_raw_ok()
  kprobes: Do not expose probe addresses to non-CAP_SYSLOG
  module: Do not expose section addresses to non-CAP_SYSLOG
  module: Refactor section attr into bin attribute
  kallsyms: Refactor kallsyms_show_value() to take cred
  KVM: arm64: Fix kvm_reset_vcpu() return code being incorrect with SVE
  KVM: x86: Mark CR4.TSD as being possibly owned by the guest
  KVM: x86: Inject #GP if guest attempts to toggle CR4.LA57 in 64-bit mode
  KVM: x86: bit 8 of non-leaf PDPEs is not reserved
  KVM: arm64: Annotate hyp NMI-related functions as __always_inline
  KVM: arm64: Stop clobbering x0 for HVC_SOFT_RESTART
  KVM: arm64: Fix definition of PAGE_HYP_DEVICE
  ALSA: hda/realtek: Enable headset mic of Acer Veriton N4660G with ALC269VC
  ALSA: hda/realtek: Enable headset mic of Acer C20-820 with ALC269VC
  ALSA: hda/realtek - Enable audio jacks of Acer vCopperbox with ALC269VC
  ALSA: hda/realtek - Fix Lenovo Thinkpad X1 Carbon 7th quirk subdevice id
  ALSA: usb-audio: Add implicit feedback quirk for RTX6001
  ALSA: usb-audio: add quirk for MacroSilicon MS2109
  ALSA: hda - let hs_mic be picked ahead of hp_mic
  ALSA: opl3: fix infoleak in opl3
  IB/hfi1: Do not destroy link_wq when the device is shut down
  IB/hfi1: Do not destroy hfi1_wq when the device is shut down
  mlxsw: pci: Fix use-after-free in case of failed devlink reload
  mlxsw: spectrum_router: Remove inappropriate usage of WARN_ON()
  net: macb: fix call to pm_runtime in the suspend/resume functions
  net: macb: mark device wake capable when "magic-packet" property present
  net: macb: fix wakeup test in runtime suspend/resume routines
  bnxt_en: fix NULL dereference in case SR-IOV configuration fails
  net/mlx5e: Fix 50G per lane indication
  net/mlx5: Fix eeprom support for SFP module
  qed: Populate nvm-file attributes while reading nvm config partition.
  IB/mlx5: Fix 50G per lane indication
  cxgb4: fix all-mask IP address comparison
  nbd: Fix memory leak in nbd_add_socket
  arm64: kgdb: Fix single-step exception handling oops
  RDMA/siw: Fix reporting vendor_part_id
  ALSA: compress: fix partial_drain completion state
  net: hns3: fix use-after-free when doing self test
  net: hns3: add a missing uninit debugfs when unload driver
  smsc95xx: avoid memory leak in smsc95xx_bind
  smsc95xx: check return value of smsc95xx_reset
  perf intel-pt: Fix PEBS sample for XMM registers
  perf intel-pt: Fix recording PEBS-via-PT with registers
  perf report TUI: Fix segmentation fault in perf_evsel__hists_browse()
  netfilter: conntrack: refetch conntrack after nf_conntrack_update()
  net: dsa: microchip: set the correct number of ports
  IB/sa: Resolv use-after-free in ib_nl_make_request()
  net: cxgb4: fix return error value in t4_prep_fw
  net: mvneta: fix use of state->speed
  netfilter: ipset: call ip_set_free() instead of kfree()
  bpf, sockmap: RCU dereferenced psock may be used outside RCU block
  bpf, sockmap: RCU splat with redirect and strparser error or TLS
  drm/mediatek: Check plane visibility in atomic_update
  nl80211: don't return err unconditionally in nl80211_start_ap()
  gpio: pca953x: Fix GPIO resource leak on Intel Galileo Gen 2
  gpio: pca953x: Override IRQ for one of the expanders on Galileo Gen 2
  net: qrtr: Fix an out of bounds read qrtr_endpoint_post()
  sched/core: Check cpus_mask, not cpus_ptr in __set_cpus_allowed_ptr(), to fix mask corruption
  x86/entry: Increase entry_stack size to a full page
  nvme-rdma: assign completion vector correctly
  block: release bip in a right way in error path
  usb: dwc3: pci: Fix reference count leak in dwc3_pci_resume_work
  scsi: mptscsih: Fix read sense data size
  ARM: imx6: add missing put_device() call in imx6q_suspend_init()
  cifs: update ctime and mtime during truncate
  s390/kasan: fix early pgm check handler execution
  drm: panel-orientation-quirks: Use generic orientation-data for Acer S1003
  drm: panel-orientation-quirks: Add quirk for Asus T101HA panel
  iommu/vt-d: Don't apply gfx quirks to untrusted devices
  powerpc/kvm/book3s64: Fix kernel crash with nested kvm & DEBUG_VIRTUAL
  ibmvnic: continue to init in CRQ reset returns H_CLOSED
  i40e: protect ring accesses with READ- and WRITE_ONCE
  ixgbe: protect ring accesses with READ- and WRITE_ONCE
  net: ethernet: mvneta: Add 2500BaseX support for SoCs without comphy
  net: ethernet: mvneta: Fix Serdes configuration for SoCs without comphy
  spi: spidev: fix a potential use-after-free in spidev_release()
  spi: spidev: fix a race between spidev_release and spidev_remove
  ALSA: hda: Intel: add missing PCI IDs for ICL-H, TGL-H and EKL
  ASoC: SOF: Intel: add PCI ID for CometLake-S
  drm: mcde: Fix display initialization problem
  gpu: host1x: Detach driver on unregister
  drm/tegra: hub: Do not enable orphaned window group
  drm/ttm: Fix dma_fence refcnt leak when adding move fence
  ARM: dts: omap4-droid4: Fix spi configuration and increase rate
  perf/x86/rapl: Fix RAPL config variable bug
  perf/x86/rapl: Move RAPL support to common x86 code
  regmap: fix alignment issue
  spi: spi-fsl-dspi: Fix lockup if device is removed during SPI transfer
  spi: spi-fsl-dspi: Adding shutdown hook
  KVM: s390: reduce number of IO pins to 1
  ANDROID: ABI: add already existing symbols of ufs_* to unisoc
  ANDROID: ABI: add nf_* symbols to unisoc
  ANDROID: GKI: update abi xml file
  ANDROID: GKI: build in more Bluetooth drivers
  ANDROID: GKI: update abi xml file
  ANDROID: GKI: Enable CONFIG_BT_HIDP as it is needed by some platforms
  ANDROID: GKI: add CONFIG_BT to x86 gki_defconfig
  ANDROID: GKI: update abi definitions
  ANDROID: mm: add vendor padding for SPECULATIVE_PAGE_FAULTS
  ANDROID: GKI: enable usb storage drivers
  ANDROID: fscrypt: fix DUN contiguity with inline encryption + IV_INO_LBLK_32 policies
  ANDROID: GKI: set CONFIG_STATIC_USERMODEHELPER_PATH
  ANDROID: sound: usb: Add vendor's hooking interface
  UPSTREAM: slimbus: core: Set fwnode for a device when setting of_node
  ANDROID: f2fs: add back compress inode check
  ANDROID: abi: update abi dump for CONFIG_KPROBES
  ANDROID: gki_defconfig: Enable Kprobes
  ANDROID: Correct branch name after rename
  ANDROID: GKI: update ABI due to power supply changes
  UPSTREAM: power: supply: core: add POWER_SUPPLY_HEALTH_CALIBRATION_REQUIRED
  UPSTREAM: power: supply: core: add manufacture date properties
  UPSTREAM: power: supply: core: add capacity error margin property
  UPSTREAM: power: supply: core: Add type property to uevent env
  UPSTREAM: power: supply: core: Add a macro that maps enum properties to text values
  UPSTREAM: power: supply: core: Use designated initializer for property text arrays
  UPSTREAM: power: supply: core: Cleanup power supply sysfs attribute list
  UPSTREAM: power: supply: core: reduce power_supply_show_usb_type() parameters
  ANDROID: fix copyright notice
  ANDROID: add support for vendor hooks
  ANDROID: GKI: update abi definitions
  Linux 5.4.51
  efi: Make it possible to disable efivar_ssdt entirely
  dm zoned: assign max_io_len correctly
  x86/resctrl: Fix memory bandwidth counter width for AMD
  mm, compaction: make capture control handling safe wrt interrupts
  mm, compaction: fully assume capture is not NULL in compact_zone_order()
  irqchip/gic: Atomically update affinity
  dma-buf: Move dma_buf_release() from fops to dentry_ops
  drm/amdgpu/atomfirmware: fix vram_info fetching for renoir
  drm/amdgpu: use %u rather than %d for sclk/mclk
  drm/amd/display: Only revalidate bandwidth on medium and fast updates
  MIPS: Add missing EHB in mtc0 -> mfc0 sequence for DSPen
  MIPS: lantiq: xway: sysctrl: fix the GPHY clock alias names
  cifs: Fix the target file was deleted when rename failed.
  SMB3: Honor 'handletimeout' flag for multiuser mounts
  SMB3: Honor lease disabling for multiuser mounts
  SMB3: Honor persistent/resilient handle flags for multiuser mounts
  SMB3: Honor 'seal' flag for multiuser mounts
  Revert "ALSA: usb-audio: Improve frames size computation"
  nfsd: apply umask on fs without ACL support
  spi: spi-fsl-dspi: Fix external abort on interrupt in resume or exit paths
  i2c: mlxcpld: check correct size of maximum RECV_LEN packet
  i2c: algo-pca: Add 0x78 as SCL stuck low status for PCA9665
  samples/vfs: avoid warning in statx override
  nvme: fix a crash in nvme_mpath_add_disk
  nvme: fix identify error status silent ignore
  SMB3: Honor 'posix' flag for multiuser mounts
  virtio-blk: free vblk-vqs in error path of virtblk_probe()
  drm: sun4i: hdmi: Remove extra HPD polling
  nfsd: fix nfsdfs inode reference count leak
  nfsd4: fix nfsdfs reference count loop
  thermal/drivers/rcar_gen3: Fix undefined temperature if negative
  thermal/drivers/mediatek: Fix bank number settings on mt8183
  hwmon: (acpi_power_meter) Fix potential memory leak in acpi_power_meter_add()
  hwmon: (max6697) Make sure the OVERT mask is set correctly
  cxgb4: fix SGE queue dump destination buffer context
  cxgb4: use correct type for all-mask IP address comparison
  cxgb4: fix endian conversions for L4 ports in filters
  cxgb4: parse TC-U32 key values and masks natively
  cxgb4: use unaligned conversion for fetching timestamp
  RDMA/counter: Query a counter before release
  rxrpc: Fix afs large storage transmission performance drop
  drm/msm/dpu: fix error return code in dpu_encoder_init
  crypto: af_alg - fix use-after-free in af_alg_accept() due to bh_lock_sock()
  tpm: Fix TIS locality timeout problems
  selftests: tpm: Use /bin/sh instead of /bin/bash
  kgdb: Avoid suspicious RCU usage warning
  nvme-multipath: fix bogus request queue reference put
  nvme-multipath: fix deadlock due to head->lock
  nvme-multipath: fix deadlock between ana_work and scan_work
  nvme-multipath: set bdi capabilities once
  s390/debug: avoid kernel warning on too large number of pages
  tools lib traceevent: Handle __attribute__((user)) in field names
  tools lib traceevent: Add append() function helper for appending strings
  usb: usbtest: fix missing kfree(dev->buf) in usbtest_disconnect
  rxrpc: Fix race between incoming ACK parser and retransmitter
  mm/slub: fix stack overruns with SLUB_STATS
  mm/slub.c: fix corrupted freechain in deactivate_slab()
  sched/debug: Make sd->flags sysctl read-only
  usbnet: smsc95xx: Fix use-after-free after removal
  EDAC/amd64: Read back the scrub rate PCI register on F15h
  mm: fix swap cache node allocation mask
  io_uring: make sure async workqueue is canceled on exit
  ANDROID: update the ABI representation after ion sysfs changes
  ANDROID: add ion_stat tracepoint to common kernel
  ANDROID: staging: android: ion: Expose total heap and pool sizes via sysfs
  ANDROID: Update ABI representation for libabigail update
  ANDROID: Update the ABI representation
  ANDROID: GKI: Incremental fs: move MODULE_IMPORT_NS() line
  ANDROID: GKI: set vfs-only exports into their own namespace
  ANDROID: gki: build-in DMA_VIRTUAL_CHANNELS
  ANDROID: GKI: make VIDEOBUF2_DMA_CONTIG under GKI_HIDDEN_MEDIA_CONFIGS
  ANDROID: PM: sleep: export device_pm_callback_start/end trace event
  ANDROID: fix copyright notice
  ANDROID: GKI: add android_vendor.h
  ANDROID: GKI: enable CONFIG_V4L_PLATFORM_DRIVERS
  ANDROID: GKI: enable CONFIG_TYPEC_TCPM
  ANDROID: GKI: support CONFIG_INPUT_TOUCHSCREEN
  ANDROID: GKI: move abi files to android/
  ANDROID: GKI: drop unneeded "_whitelist" off of symbol filenames
  ANDROID: GKI: modules should not run programs
  UPSTREAM: net: bpf: Make bpf_ktime_get_ns() available to non GPL programs
  UPSTREAM: bpf: Add gso_size to __sk_buff
  UPSTREAM: usb: musb: mediatek: add reset FADDR to zero in reset interrupt handle
  ANDROID: ABI/Whitelist: update symbols of incrementalfs.ko and sprd_*.ko to unisoc whitelist
  ANDROID: GKI: update abi definition now that EHCI driver is built in
  ANDROID: GKI: support CONFIG_USB_EHCI_HCD_PLATFORM
  ANDROID: GKI: support CONFIG_USB_EHCI_HCD
  ANDROID: Update kabi for db845c (keeping up with ufs-qcom zig-zag)
  ANDROID: GKI: scripts: Makefile: update the lz4 command (#2)

Conflicts:
	android/abi_gki_aarch64_qcom
	drivers/dma-buf/dma-buf.c
	kernel/trace/power-traces.c

Change-Id: I9993aff20a659ab20af5f890f1f789cd4ef19246
Signed-off-by: Blagovest Kolenichev <bkolenichev@codeaurora.org>
2020-07-27 01:21:35 -07:00

1001 lines
24 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* kernel/sched/debug.c
*
* Print the CFS rbtree and other debugging details
*
* Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
*/
#include "sched.h"
static DEFINE_SPINLOCK(sched_debug_lock);
/*
* This allows printing both to /proc/sched_debug and
* to the console
*/
#define SEQ_printf(m, x...) \
do { \
if (m) \
seq_printf(m, x); \
else \
pr_cont(x); \
} while (0)
/*
* Ease the printing of nsec fields:
*/
static long long nsec_high(unsigned long long nsec)
{
if ((long long)nsec < 0) {
nsec = -nsec;
do_div(nsec, 1000000);
return -nsec;
}
do_div(nsec, 1000000);
return nsec;
}
static unsigned long nsec_low(unsigned long long nsec)
{
if ((long long)nsec < 0)
nsec = -nsec;
return do_div(nsec, 1000000);
}
#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
#define SCHED_FEAT(name, enabled) \
#name ,
static const char * const sched_feat_names[] = {
#include "features.h"
};
#undef SCHED_FEAT
static int sched_feat_show(struct seq_file *m, void *v)
{
int i;
for (i = 0; i < __SCHED_FEAT_NR; i++) {
if (!(sysctl_sched_features & (1UL << i)))
seq_puts(m, "NO_");
seq_printf(m, "%s ", sched_feat_names[i]);
}
seq_puts(m, "\n");
return 0;
}
#ifdef CONFIG_JUMP_LABEL
#define jump_label_key__true STATIC_KEY_INIT_TRUE
#define jump_label_key__false STATIC_KEY_INIT_FALSE
#define SCHED_FEAT(name, enabled) \
jump_label_key__##enabled ,
struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
#include "features.h"
};
#undef SCHED_FEAT
static void sched_feat_disable(int i)
{
static_key_disable_cpuslocked(&sched_feat_keys[i]);
}
static void sched_feat_enable(int i)
{
static_key_enable_cpuslocked(&sched_feat_keys[i]);
}
#else
static void sched_feat_disable(int i) { };
static void sched_feat_enable(int i) { };
#endif /* CONFIG_JUMP_LABEL */
static int sched_feat_set(char *cmp)
{
int i;
int neg = 0;
if (strncmp(cmp, "NO_", 3) == 0) {
neg = 1;
cmp += 3;
}
i = match_string(sched_feat_names, __SCHED_FEAT_NR, cmp);
if (i < 0)
return i;
if (neg) {
sysctl_sched_features &= ~(1UL << i);
sched_feat_disable(i);
} else {
sysctl_sched_features |= (1UL << i);
sched_feat_enable(i);
}
return 0;
}
static ssize_t
sched_feat_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
char *cmp;
int ret;
struct inode *inode;
if (cnt > 63)
cnt = 63;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
cmp = strstrip(buf);
/* Ensure the static_key remains in a consistent state */
inode = file_inode(filp);
cpus_read_lock();
inode_lock(inode);
ret = sched_feat_set(cmp);
inode_unlock(inode);
cpus_read_unlock();
if (ret < 0)
return ret;
*ppos += cnt;
return cnt;
}
static int sched_feat_open(struct inode *inode, struct file *filp)
{
return single_open(filp, sched_feat_show, NULL);
}
static const struct file_operations sched_feat_fops = {
.open = sched_feat_open,
.write = sched_feat_write,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
__read_mostly bool sched_debug_enabled;
static __init int sched_init_debug(void)
{
debugfs_create_file("sched_features", 0644, NULL, NULL,
&sched_feat_fops);
debugfs_create_bool("sched_debug", 0644, NULL,
&sched_debug_enabled);
return 0;
}
late_initcall(sched_init_debug);
#ifdef CONFIG_SMP
#ifdef CONFIG_SYSCTL
static struct ctl_table sd_ctl_dir[] = {
{
.procname = "sched_domain",
.mode = 0555,
},
{}
};
static struct ctl_table sd_ctl_root[] = {
{
.procname = "kernel",
.mode = 0555,
.child = sd_ctl_dir,
},
{}
};
static struct ctl_table *sd_alloc_ctl_entry(int n)
{
struct ctl_table *entry =
kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
return entry;
}
static void sd_free_ctl_entry(struct ctl_table **tablep)
{
struct ctl_table *entry;
/*
* In the intermediate directories, both the child directory and
* procname are dynamically allocated and could fail but the mode
* will always be set. In the lowest directory the names are
* static strings and all have proc handlers.
*/
for (entry = *tablep; entry->mode; entry++) {
if (entry->child)
sd_free_ctl_entry(&entry->child);
if (entry->proc_handler == NULL)
kfree(entry->procname);
}
kfree(*tablep);
*tablep = NULL;
}
static void
set_table_entry(struct ctl_table *entry,
const char *procname, void *data, int maxlen,
umode_t mode, proc_handler *proc_handler)
{
entry->procname = procname;
entry->data = data;
entry->maxlen = maxlen;
entry->mode = mode;
entry->proc_handler = proc_handler;
}
static struct ctl_table *
sd_alloc_ctl_domain_table(struct sched_domain *sd)
{
struct ctl_table *table = sd_alloc_ctl_entry(9);
if (table == NULL)
return NULL;
set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax);
set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax);
set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax);
set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax);
set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax);
set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0444, proc_dointvec_minmax);
set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring);
/* &table[8] is terminator */
return table;
}
static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
{
struct ctl_table *entry, *table;
struct sched_domain *sd;
int domain_num = 0, i;
char buf[32];
for_each_domain(cpu, sd)
domain_num++;
entry = table = sd_alloc_ctl_entry(domain_num + 1);
if (table == NULL)
return NULL;
i = 0;
for_each_domain(cpu, sd) {
snprintf(buf, 32, "domain%d", i);
entry->procname = kstrdup(buf, GFP_KERNEL);
entry->mode = 0555;
entry->child = sd_alloc_ctl_domain_table(sd);
entry++;
i++;
}
return table;
}
static cpumask_var_t sd_sysctl_cpus;
static struct ctl_table_header *sd_sysctl_header;
void register_sched_domain_sysctl(void)
{
static struct ctl_table *cpu_entries;
static struct ctl_table **cpu_idx;
static bool init_done = false;
char buf[32];
int i;
if (!cpu_entries) {
cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
if (!cpu_entries)
return;
WARN_ON(sd_ctl_dir[0].child);
sd_ctl_dir[0].child = cpu_entries;
}
if (!cpu_idx) {
struct ctl_table *e = cpu_entries;
cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
if (!cpu_idx)
return;
/* deal with sparse possible map */
for_each_possible_cpu(i) {
cpu_idx[i] = e;
e++;
}
}
if (!cpumask_available(sd_sysctl_cpus)) {
if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
return;
}
if (!init_done) {
init_done = true;
/* init to possible to not have holes in @cpu_entries */
cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
}
for_each_cpu(i, sd_sysctl_cpus) {
struct ctl_table *e = cpu_idx[i];
if (e->child)
sd_free_ctl_entry(&e->child);
if (!e->procname) {
snprintf(buf, 32, "cpu%d", i);
e->procname = kstrdup(buf, GFP_KERNEL);
}
e->mode = 0555;
e->child = sd_alloc_ctl_cpu_table(i);
__cpumask_clear_cpu(i, sd_sysctl_cpus);
}
WARN_ON(sd_sysctl_header);
sd_sysctl_header = register_sysctl_table(sd_ctl_root);
}
void dirty_sched_domain_sysctl(int cpu)
{
if (cpumask_available(sd_sysctl_cpus))
__cpumask_set_cpu(cpu, sd_sysctl_cpus);
}
/* may be called multiple times per register */
void unregister_sched_domain_sysctl(void)
{
unregister_sysctl_table(sd_sysctl_header);
sd_sysctl_header = NULL;
}
#endif /* CONFIG_SYSCTL */
#endif /* CONFIG_SMP */
#ifdef CONFIG_FAIR_GROUP_SCHED
static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
{
struct sched_entity *se = tg->se[cpu];
#define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F))
#define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
if (!se)
return;
PN(se->exec_start);
PN(se->vruntime);
PN(se->sum_exec_runtime);
if (schedstat_enabled()) {
PN_SCHEDSTAT(se->statistics.wait_start);
PN_SCHEDSTAT(se->statistics.sleep_start);
PN_SCHEDSTAT(se->statistics.block_start);
PN_SCHEDSTAT(se->statistics.sleep_max);
PN_SCHEDSTAT(se->statistics.block_max);
PN_SCHEDSTAT(se->statistics.exec_max);
PN_SCHEDSTAT(se->statistics.slice_max);
PN_SCHEDSTAT(se->statistics.wait_max);
PN_SCHEDSTAT(se->statistics.wait_sum);
P_SCHEDSTAT(se->statistics.wait_count);
}
P(se->load.weight);
P(se->runnable_weight);
#ifdef CONFIG_SMP
P(se->avg.load_avg);
P(se->avg.util_avg);
P(se->avg.runnable_load_avg);
#endif
#undef PN_SCHEDSTAT
#undef PN
#undef P_SCHEDSTAT
#undef P
}
#endif
#ifdef CONFIG_CGROUP_SCHED
static char group_path[PATH_MAX];
static char *task_group_path(struct task_group *tg)
{
if (autogroup_path(tg, group_path, PATH_MAX))
return group_path;
cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
return group_path;
}
#endif
static void
print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
{
if (rq->curr == p)
SEQ_printf(m, ">R");
else
SEQ_printf(m, " %c", task_state_to_char(p));
SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
p->comm, task_pid_nr(p),
SPLIT_NS(p->se.vruntime),
(long long)(p->nvcsw + p->nivcsw),
p->prio);
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
SPLIT_NS(p->se.sum_exec_runtime),
SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
#ifdef CONFIG_NUMA_BALANCING
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
#endif
#ifdef CONFIG_CGROUP_SCHED
SEQ_printf(m, " %s", task_group_path(task_group(p)));
#endif
SEQ_printf(m, "\n");
}
static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
{
struct task_struct *g, *p;
SEQ_printf(m, "\n");
SEQ_printf(m, "runnable tasks:\n");
SEQ_printf(m, " S task PID tree-key switches prio"
" wait-time sum-exec sum-sleep\n");
SEQ_printf(m, "-------------------------------------------------------"
"----------------------------------------------------\n");
rcu_read_lock();
for_each_process_thread(g, p) {
if (task_cpu(p) != rq_cpu)
continue;
print_task(m, rq, p);
}
rcu_read_unlock();
}
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
{
s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
spread, rq0_min_vruntime, spread0;
struct rq *rq = cpu_rq(cpu);
struct sched_entity *last;
unsigned long flags;
#ifdef CONFIG_FAIR_GROUP_SCHED
SEQ_printf(m, "\n");
SEQ_printf(m, "cfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
#else
SEQ_printf(m, "\n");
SEQ_printf(m, "cfs_rq[%d]:\n", cpu);
#endif
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
SPLIT_NS(cfs_rq->exec_clock));
raw_spin_lock_irqsave(&rq->lock, flags);
if (rb_first_cached(&cfs_rq->tasks_timeline))
MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
last = __pick_last_entity(cfs_rq);
if (last)
max_vruntime = last->vruntime;
min_vruntime = cfs_rq->min_vruntime;
rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
raw_spin_unlock_irqrestore(&rq->lock, flags);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
SPLIT_NS(MIN_vruntime));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
SPLIT_NS(min_vruntime));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "max_vruntime",
SPLIT_NS(max_vruntime));
spread = max_vruntime - MIN_vruntime;
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread",
SPLIT_NS(spread));
spread0 = min_vruntime - rq0_min_vruntime;
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
SPLIT_NS(spread0));
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over);
SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
#ifdef CONFIG_SMP
SEQ_printf(m, " .%-30s: %ld\n", "runnable_weight", cfs_rq->runnable_weight);
SEQ_printf(m, " .%-30s: %lu\n", "load_avg",
cfs_rq->avg.load_avg);
SEQ_printf(m, " .%-30s: %lu\n", "runnable_load_avg",
cfs_rq->avg.runnable_load_avg);
SEQ_printf(m, " .%-30s: %lu\n", "util_avg",
cfs_rq->avg.util_avg);
SEQ_printf(m, " .%-30s: %u\n", "util_est_enqueued",
cfs_rq->avg.util_est.enqueued);
SEQ_printf(m, " .%-30s: %ld\n", "removed.load_avg",
cfs_rq->removed.load_avg);
SEQ_printf(m, " .%-30s: %ld\n", "removed.util_avg",
cfs_rq->removed.util_avg);
SEQ_printf(m, " .%-30s: %ld\n", "removed.runnable_sum",
cfs_rq->removed.runnable_sum);
#ifdef CONFIG_FAIR_GROUP_SCHED
SEQ_printf(m, " .%-30s: %lu\n", "tg_load_avg_contrib",
cfs_rq->tg_load_avg_contrib);
SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg",
atomic_long_read(&cfs_rq->tg->load_avg));
#endif
#endif
#ifdef CONFIG_CFS_BANDWIDTH
SEQ_printf(m, " .%-30s: %d\n", "throttled",
cfs_rq->throttled);
SEQ_printf(m, " .%-30s: %d\n", "throttle_count",
cfs_rq->throttle_count);
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
print_cfs_group_stats(m, cpu, cfs_rq->tg);
#endif
}
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
{
#ifdef CONFIG_RT_GROUP_SCHED
SEQ_printf(m, "\n");
SEQ_printf(m, "rt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
#else
SEQ_printf(m, "\n");
SEQ_printf(m, "rt_rq[%d]:\n", cpu);
#endif
#define P(x) \
SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
#define PU(x) \
SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
#define PN(x) \
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
PU(rt_nr_running);
#ifdef CONFIG_SMP
PU(rt_nr_migratory);
#endif
P(rt_throttled);
PN(rt_time);
PN(rt_runtime);
#undef PN
#undef PU
#undef P
}
void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
{
struct dl_bw *dl_bw;
SEQ_printf(m, "\n");
SEQ_printf(m, "dl_rq[%d]:\n", cpu);
#define PU(x) \
SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
PU(dl_nr_running);
#ifdef CONFIG_SMP
PU(dl_nr_migratory);
dl_bw = &cpu_rq(cpu)->rd->dl_bw;
#else
dl_bw = &dl_rq->dl_bw;
#endif
SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
#undef PU
}
static void print_cpu(struct seq_file *m, int cpu)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
#ifdef CONFIG_X86
{
unsigned int freq = cpu_khz ? : 1;
SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
cpu, freq / 1000, (freq % 1000));
}
#else
SEQ_printf(m, "cpu#%d\n", cpu);
#endif
#define P(x) \
do { \
if (sizeof(rq->x) == 4) \
SEQ_printf(m, " .%-30s: %ld\n", #x, (long)(rq->x)); \
else \
SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x));\
} while (0)
#define PN(x) \
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
P(nr_running);
P(nr_switches);
P(nr_load_updates);
P(nr_uninterruptible);
PN(next_balance);
SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
PN(clock);
PN(clock_task);
#ifdef CONFIG_SMP
P(cpu_capacity);
#endif
#ifdef CONFIG_SCHED_WALT
P(wrq.cluster->cur_freq);
P(wrq.walt_stats.nr_big_tasks);
SEQ_printf(m, " .%-30s: %llu\n", "walt_stats.cumulative_runnable_avg",
rq->wrq.walt_stats.cumulative_runnable_avg_scaled);
#endif
#undef P
#undef PN
#ifdef CONFIG_SMP
#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
P64(avg_idle);
P64(max_idle_balance_cost);
#undef P64
#endif
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, schedstat_val(rq->n));
if (schedstat_enabled()) {
P(yld_count);
P(sched_count);
P(sched_goidle);
P(ttwu_count);
P(ttwu_local);
}
#undef P
spin_lock_irqsave(&sched_debug_lock, flags);
print_cfs_stats(m, cpu);
print_rt_stats(m, cpu);
print_dl_stats(m, cpu);
print_rq(m, rq, cpu);
spin_unlock_irqrestore(&sched_debug_lock, flags);
SEQ_printf(m, "\n");
}
static const char *sched_tunable_scaling_names[] = {
"none",
"logarithmic",
"linear"
};
static void sched_debug_header(struct seq_file *m)
{
u64 ktime, sched_clk, cpu_clk;
unsigned long flags;
local_irq_save(flags);
ktime = ktime_to_ns(ktime_get());
sched_clk = sched_clock();
cpu_clk = local_clock();
local_irq_restore(flags);
SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n",
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
#define P(x) \
SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
#define PN(x) \
SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
PN(ktime);
PN(sched_clk);
PN(cpu_clk);
P(jiffies);
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
P(sched_clock_stable());
#endif
#undef PN
#undef P
SEQ_printf(m, "\n");
SEQ_printf(m, "sysctl_sched\n");
#define P(x) \
SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
#define PN(x) \
SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
PN(sysctl_sched_latency);
PN(sysctl_sched_min_granularity);
PN(sysctl_sched_wakeup_granularity);
P(sysctl_sched_child_runs_first);
P(sysctl_sched_features);
#ifdef CONFIG_SCHED_WALT
P(sched_init_task_load_windows);
P(sched_ravg_window);
P(sched_load_granule);
#endif
#undef PN
#undef P
SEQ_printf(m, " .%-40s: %d (%s)\n",
"sysctl_sched_tunable_scaling",
sysctl_sched_tunable_scaling,
sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
SEQ_printf(m, "\n");
}
static int sched_debug_show(struct seq_file *m, void *v)
{
int cpu = (unsigned long)(v - 2);
if (cpu != -1)
print_cpu(m, cpu);
else
sched_debug_header(m);
return 0;
}
void sysrq_sched_debug_show(void)
{
int cpu;
sched_debug_header(NULL);
for_each_online_cpu(cpu)
print_cpu(NULL, cpu);
}
/*
* This itererator needs some explanation.
* It returns 1 for the header position.
* This means 2 is CPU 0.
* In a hotplugged system some CPUs, including CPU 0, may be missing so we have
* to use cpumask_* to iterate over the CPUs.
*/
static void *sched_debug_start(struct seq_file *file, loff_t *offset)
{
unsigned long n = *offset;
if (n == 0)
return (void *) 1;
n--;
if (n > 0)
n = cpumask_next(n - 1, cpu_online_mask);
else
n = cpumask_first(cpu_online_mask);
*offset = n + 1;
if (n < nr_cpu_ids)
return (void *)(unsigned long)(n + 2);
return NULL;
}
static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
{
(*offset)++;
return sched_debug_start(file, offset);
}
static void sched_debug_stop(struct seq_file *file, void *data)
{
}
static const struct seq_operations sched_debug_sops = {
.start = sched_debug_start,
.next = sched_debug_next,
.stop = sched_debug_stop,
.show = sched_debug_show,
};
static int __init init_sched_debug_procfs(void)
{
if (!proc_create_seq("sched_debug", 0444, NULL, &sched_debug_sops))
return -ENOMEM;
return 0;
}
__initcall(init_sched_debug_procfs);
#define __P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
#define P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
#define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
#define PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
#ifdef CONFIG_NUMA_BALANCING
void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
unsigned long tpf, unsigned long gsf, unsigned long gpf)
{
SEQ_printf(m, "numa_faults node=%d ", node);
SEQ_printf(m, "task_private=%lu task_shared=%lu ", tpf, tsf);
SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gpf, gsf);
}
#endif
static void sched_show_numa(struct task_struct *p, struct seq_file *m)
{
#ifdef CONFIG_NUMA_BALANCING
struct mempolicy *pol;
if (p->mm)
P(mm->numa_scan_seq);
task_lock(p);
pol = p->mempolicy;
if (pol && !(pol->flags & MPOL_F_MORON))
pol = NULL;
mpol_get(pol);
task_unlock(p);
P(numa_pages_migrated);
P(numa_preferred_nid);
P(total_numa_faults);
SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
task_node(p), task_numa_group_id(p));
show_numa_stats(p, m);
mpol_put(pol);
#endif
}
void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
struct seq_file *m)
{
unsigned long nr_switches;
SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
get_nr_threads(p));
SEQ_printf(m,
"---------------------------------------------------------"
"----------\n");
#define __P(F) \
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
#define P(F) \
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
#define P_SCHEDSTAT(F) \
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
#define __PN(F) \
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
#define PN(F) \
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
#define PN_SCHEDSTAT(F) \
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
PN(se.exec_start);
PN(se.vruntime);
PN(se.sum_exec_runtime);
nr_switches = p->nvcsw + p->nivcsw;
P(se.nr_migrations);
if (schedstat_enabled()) {
u64 avg_atom, avg_per_cpu;
PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
PN_SCHEDSTAT(se.statistics.wait_start);
PN_SCHEDSTAT(se.statistics.sleep_start);
PN_SCHEDSTAT(se.statistics.block_start);
PN_SCHEDSTAT(se.statistics.sleep_max);
PN_SCHEDSTAT(se.statistics.block_max);
PN_SCHEDSTAT(se.statistics.exec_max);
PN_SCHEDSTAT(se.statistics.slice_max);
PN_SCHEDSTAT(se.statistics.wait_max);
PN_SCHEDSTAT(se.statistics.wait_sum);
P_SCHEDSTAT(se.statistics.wait_count);
PN_SCHEDSTAT(se.statistics.iowait_sum);
P_SCHEDSTAT(se.statistics.iowait_count);
P_SCHEDSTAT(se.statistics.nr_migrations_cold);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
P_SCHEDSTAT(se.statistics.nr_forced_migrations);
P_SCHEDSTAT(se.statistics.nr_wakeups);
P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
P_SCHEDSTAT(se.statistics.nr_wakeups_local);
P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
avg_atom = p->se.sum_exec_runtime;
if (nr_switches)
avg_atom = div64_ul(avg_atom, nr_switches);
else
avg_atom = -1LL;
avg_per_cpu = p->se.sum_exec_runtime;
if (p->se.nr_migrations) {
avg_per_cpu = div64_u64(avg_per_cpu,
p->se.nr_migrations);
} else {
avg_per_cpu = -1LL;
}
__PN(avg_atom);
__PN(avg_per_cpu);
}
__P(nr_switches);
SEQ_printf(m, "%-45s:%21Ld\n",
"nr_voluntary_switches", (long long)p->nvcsw);
SEQ_printf(m, "%-45s:%21Ld\n",
"nr_involuntary_switches", (long long)p->nivcsw);
P(se.load.weight);
P(se.runnable_weight);
#ifdef CONFIG_SMP
P(se.avg.load_sum);
P(se.avg.runnable_load_sum);
P(se.avg.util_sum);
P(se.avg.load_avg);
P(se.avg.runnable_load_avg);
P(se.avg.util_avg);
P(se.avg.last_update_time);
P(se.avg.util_est.ewma);
P(se.avg.util_est.enqueued);
#endif
P(policy);
P(prio);
if (task_has_dl_policy(p)) {
P(dl.runtime);
P(dl.deadline);
}
#undef PN_SCHEDSTAT
#undef PN
#undef __PN
#undef P_SCHEDSTAT
#undef P
#undef __P
{
unsigned int this_cpu = raw_smp_processor_id();
u64 t0, t1;
t0 = cpu_clock(this_cpu);
t1 = cpu_clock(this_cpu);
SEQ_printf(m, "%-45s:%21Ld\n",
"clock-delta", (long long)(t1-t0));
}
sched_show_numa(p, m);
}
void proc_sched_set_task(struct task_struct *p)
{
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
#endif
}