123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672 |
- // SPDX-License-Identifier: GPL-2.0
- /*
- * Performance event support - Processor Activity Instrumentation Extension
- * Facility
- *
- * Copyright IBM Corp. 2022
- * Author(s): Thomas Richter <[email protected]>
- */
- #define KMSG_COMPONENT "pai_ext"
- #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
- #include <linux/kernel.h>
- #include <linux/kernel_stat.h>
- #include <linux/percpu.h>
- #include <linux/notifier.h>
- #include <linux/init.h>
- #include <linux/export.h>
- #include <linux/io.h>
- #include <asm/cpu_mcf.h>
- #include <asm/ctl_reg.h>
- #include <asm/pai.h>
- #include <asm/debug.h>
- #define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */
- #define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */
- static debug_info_t *paiext_dbg;
- static unsigned int paiext_cnt; /* Extracted with QPACI instruction */
- enum paiext_mode {
- PAI_MODE_NONE,
- PAI_MODE_SAMPLING,
- PAI_MODE_COUNTER,
- };
- struct pai_userdata {
- u16 num;
- u64 value;
- } __packed;
- /* Create the PAI extension 1 control block area.
- * The PAI extension control block 1 is pointed to by lowcore
- * address 0x1508 for each CPU. This control block is 512 bytes in size
- * and requires a 512 byte boundary alignment.
- */
- struct paiext_cb { /* PAI extension 1 control block */
- u64 header; /* Not used */
- u64 reserved1;
- u64 acc; /* Addr to analytics counter control block */
- u8 reserved2[488];
- } __packed;
- struct paiext_map {
- unsigned long *area; /* Area for CPU to store counters */
- struct pai_userdata *save; /* Area to store non-zero counters */
- enum paiext_mode mode; /* Type of event */
- unsigned int active_events; /* # of PAI Extension users */
- unsigned int refcnt;
- struct perf_event *event; /* Perf event for sampling */
- struct paiext_cb *paiext_cb; /* PAI extension control block area */
- };
- struct paiext_mapptr {
- struct paiext_map *mapptr;
- };
- static struct paiext_root { /* Anchor to per CPU data */
- int refcnt; /* Overall active events */
- struct paiext_mapptr __percpu *mapptr;
- } paiext_root;
- /* Free per CPU data when the last event is removed. */
- static void paiext_root_free(void)
- {
- if (!--paiext_root.refcnt) {
- free_percpu(paiext_root.mapptr);
- paiext_root.mapptr = NULL;
- }
- }
- /* On initialization of first event also allocate per CPU data dynamically.
- * Start with an array of pointers, the array size is the maximum number of
- * CPUs possible, which might be larger than the number of CPUs currently
- * online.
- */
- static int paiext_root_alloc(void)
- {
- if (++paiext_root.refcnt == 1) {
- /* The memory is already zeroed. */
- paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
- if (!paiext_root.mapptr) {
- /* Returing without refcnt adjustment is ok. The
- * error code is handled by paiext_alloc() which
- * decrements refcnt when an event can not be
- * created.
- */
- return -ENOMEM;
- }
- }
- return 0;
- }
- /* Protects against concurrent increment of sampler and counter member
- * increments at the same time and prohibits concurrent execution of
- * counting and sampling events.
- * Ensures that analytics counter block is deallocated only when the
- * sampling and counting on that cpu is zero.
- * For details see paiext_alloc().
- */
- static DEFINE_MUTEX(paiext_reserve_mutex);
- /* Free all memory allocated for event counting/sampling setup */
- static void paiext_free(struct paiext_mapptr *mp)
- {
- kfree(mp->mapptr->area);
- kfree(mp->mapptr->paiext_cb);
- kvfree(mp->mapptr->save);
- kfree(mp->mapptr);
- mp->mapptr = NULL;
- }
- /* Release the PMU if event is the last perf event */
- static void paiext_event_destroy(struct perf_event *event)
- {
- struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
- struct paiext_map *cpump = mp->mapptr;
- mutex_lock(&paiext_reserve_mutex);
- cpump->event = NULL;
- if (!--cpump->refcnt) /* Last reference gone */
- paiext_free(mp);
- paiext_root_free();
- mutex_unlock(&paiext_reserve_mutex);
- debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__,
- event->cpu, mp->mapptr);
- }
- /* Used to avoid races in checking concurrent access of counting and
- * sampling for pai_extension events.
- *
- * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
- * allowed and when this event is running, no counting event is allowed.
- * Several counting events are allowed in parallel, but no sampling event
- * is allowed while one (or more) counting events are running.
- *
- * This function is called in process context and it is safe to block.
- * When the event initialization functions fails, no other call back will
- * be invoked.
- *
- * Allocate the memory for the event.
- */
- static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
- {
- struct paiext_mapptr *mp;
- struct paiext_map *cpump;
- int rc;
- mutex_lock(&paiext_reserve_mutex);
- rc = paiext_root_alloc();
- if (rc)
- goto unlock;
- mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
- cpump = mp->mapptr;
- if (!cpump) { /* Paiext_map allocated? */
- rc = -ENOMEM;
- cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
- if (!cpump)
- goto unlock;
- /* Allocate memory for counter area and counter extraction.
- * These are
- * - a 512 byte block and requires 512 byte boundary alignment.
- * - a 1KB byte block and requires 1KB boundary alignment.
- * Only the first counting event has to allocate the area.
- *
- * Note: This works with commit 59bb47985c1d by default.
- * Backporting this to kernels without this commit might
- * need adjustment.
- */
- mp->mapptr = cpump;
- cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
- cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
- cpump->save = kvmalloc_array(paiext_cnt + 1,
- sizeof(struct pai_userdata),
- GFP_KERNEL);
- if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
- paiext_free(mp);
- goto unlock;
- }
- cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
- : PAI_MODE_COUNTER;
- } else {
- /* Multiple invocation, check whats active.
- * Supported are multiple counter events or only one sampling
- * event concurrently at any one time.
- */
- if (cpump->mode == PAI_MODE_SAMPLING ||
- (cpump->mode == PAI_MODE_COUNTER && a->sample_period)) {
- rc = -EBUSY;
- goto unlock;
- }
- }
- rc = 0;
- cpump->event = event;
- ++cpump->refcnt;
- unlock:
- if (rc) {
- /* Error in allocation of event, decrement anchor. Since
- * the event in not created, its destroy() function is never
- * invoked. Adjust the reference counter for the anchor.
- */
- paiext_root_free();
- }
- mutex_unlock(&paiext_reserve_mutex);
- /* If rc is non-zero, no increment of counter/sampler was done. */
- return rc;
- }
- /* The PAI extension 1 control block supports up to 128 entries. Return
- * the index within PAIE1_CB given the event number. Also validate event
- * number.
- */
- static int paiext_event_valid(struct perf_event *event)
- {
- u64 cfg = event->attr.config;
- if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
- /* Offset NNPA in paiext_cb */
- event->hw.config_base = offsetof(struct paiext_cb, acc);
- return 0;
- }
- return -EINVAL;
- }
- /* Might be called on different CPU than the one the event is intended for. */
- static int paiext_event_init(struct perf_event *event)
- {
- struct perf_event_attr *a = &event->attr;
- int rc;
- /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
- if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
- return -ENOENT;
- /* PAI extension event must be valid and in supported range */
- rc = paiext_event_valid(event);
- if (rc)
- return rc;
- /* Allow only CPU wide operation, no process context for now. */
- if (event->hw.target || event->cpu == -1)
- return -ENOENT;
- /* Allow only event NNPA_ALL for sampling. */
- if (a->sample_period && a->config != PAI_NNPA_BASE)
- return -EINVAL;
- /* Prohibit exclude_user event selection */
- if (a->exclude_user)
- return -EINVAL;
- rc = paiext_alloc(a, event);
- if (rc)
- return rc;
- event->hw.last_tag = 0;
- event->destroy = paiext_event_destroy;
- if (a->sample_period) {
- a->sample_period = 1;
- a->freq = 0;
- /* Register for paicrypt_sched_task() to be called */
- event->attach_state |= PERF_ATTACH_SCHED_CB;
- /* Add raw data which are the memory mapped counters */
- a->sample_type |= PERF_SAMPLE_RAW;
- /* Turn off inheritance */
- a->inherit = 0;
- }
- return 0;
- }
- static u64 paiext_getctr(struct paiext_map *cpump, int nr)
- {
- return cpump->area[nr];
- }
- /* Read the counter values. Return value from location in buffer. For event
- * NNPA_ALL sum up all events.
- */
- static u64 paiext_getdata(struct perf_event *event)
- {
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- u64 sum = 0;
- int i;
- if (event->attr.config != PAI_NNPA_BASE)
- return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE);
- for (i = 1; i <= paiext_cnt; i++)
- sum += paiext_getctr(cpump, i);
- return sum;
- }
- static u64 paiext_getall(struct perf_event *event)
- {
- return paiext_getdata(event);
- }
- static void paiext_read(struct perf_event *event)
- {
- u64 prev, new, delta;
- prev = local64_read(&event->hw.prev_count);
- new = paiext_getall(event);
- local64_set(&event->hw.prev_count, new);
- delta = new - prev;
- local64_add(delta, &event->count);
- }
- static void paiext_start(struct perf_event *event, int flags)
- {
- u64 sum;
- if (event->hw.last_tag)
- return;
- event->hw.last_tag = 1;
- sum = paiext_getall(event); /* Get current value */
- local64_set(&event->hw.prev_count, sum);
- local64_set(&event->count, 0);
- }
- static int paiext_add(struct perf_event *event, int flags)
- {
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- struct paiext_cb *pcb = cpump->paiext_cb;
- if (++cpump->active_events == 1) {
- S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
- pcb->acc = virt_to_phys(cpump->area) | 0x1;
- /* Enable CPU instruction lookup for PAIE1 control block */
- __ctl_set_bit(0, 49);
- debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
- __func__, S390_lowcore.aicd, pcb->acc);
- }
- if (flags & PERF_EF_START && !event->attr.sample_period) {
- /* Only counting needs initial counter value */
- paiext_start(event, PERF_EF_RELOAD);
- }
- event->hw.state = 0;
- if (event->attr.sample_period) {
- cpump->event = event;
- perf_sched_cb_inc(event->pmu);
- }
- return 0;
- }
- static void paiext_stop(struct perf_event *event, int flags)
- {
- paiext_read(event);
- event->hw.state = PERF_HES_STOPPED;
- }
- static void paiext_del(struct perf_event *event, int flags)
- {
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- struct paiext_cb *pcb = cpump->paiext_cb;
- if (event->attr.sample_period)
- perf_sched_cb_dec(event->pmu);
- if (!event->attr.sample_period) {
- /* Only counting needs to read counter */
- paiext_stop(event, PERF_EF_UPDATE);
- }
- if (--cpump->active_events == 0) {
- /* Disable CPU instruction lookup for PAIE1 control block */
- __ctl_clear_bit(0, 49);
- pcb->acc = 0;
- S390_lowcore.aicd = 0;
- debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
- __func__, S390_lowcore.aicd, pcb->acc);
- }
- }
- /* Create raw data and save it in buffer. Returns number of bytes copied.
- * Saves only positive counter entries of the form
- * 2 bytes: Number of counter
- * 8 bytes: Value of counter
- */
- static size_t paiext_copy(struct paiext_map *cpump)
- {
- struct pai_userdata *userdata = cpump->save;
- int i, outidx = 0;
- for (i = 1; i <= paiext_cnt; i++) {
- u64 val = paiext_getctr(cpump, i);
- if (val) {
- userdata[outidx].num = i;
- userdata[outidx].value = val;
- outidx++;
- }
- }
- return outidx * sizeof(*userdata);
- }
- /* Write sample when one or more counters values are nonzero.
- *
- * Note: The function paiext_sched_task() and paiext_push_sample() are not
- * invoked after function paiext_del() has been called because of function
- * perf_sched_cb_dec().
- * The function paiext_sched_task() and paiext_push_sample() are only
- * called when sampling is active. Function perf_sched_cb_inc()
- * has been invoked to install function paiext_sched_task() as call back
- * to run at context switch time (see paiext_add()).
- *
- * This causes function perf_event_context_sched_out() and
- * perf_event_context_sched_in() to check whether the PMU has installed an
- * sched_task() callback. That callback is not active after paiext_del()
- * returns and has deleted the event on that CPU.
- */
- static int paiext_push_sample(void)
- {
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- struct perf_event *event = cpump->event;
- struct perf_sample_data data;
- struct perf_raw_record raw;
- struct pt_regs regs;
- size_t rawsize;
- int overflow;
- rawsize = paiext_copy(cpump);
- if (!rawsize) /* No incremented counters */
- return 0;
- /* Setup perf sample */
- memset(®s, 0, sizeof(regs));
- memset(&raw, 0, sizeof(raw));
- memset(&data, 0, sizeof(data));
- perf_sample_data_init(&data, 0, event->hw.last_period);
- if (event->attr.sample_type & PERF_SAMPLE_TID) {
- data.tid_entry.pid = task_tgid_nr(current);
- data.tid_entry.tid = task_pid_nr(current);
- }
- if (event->attr.sample_type & PERF_SAMPLE_TIME)
- data.time = event->clock();
- if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
- data.id = event->id;
- if (event->attr.sample_type & PERF_SAMPLE_CPU)
- data.cpu_entry.cpu = smp_processor_id();
- if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- raw.frag.size = rawsize;
- raw.frag.data = cpump->save;
- raw.size = raw.frag.size;
- data.raw = &raw;
- data.sample_flags |= PERF_SAMPLE_RAW;
- }
- overflow = perf_event_overflow(event, &data, ®s);
- perf_event_update_userpage(event);
- /* Clear lowcore area after read */
- memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ);
- return overflow;
- }
- /* Called on schedule-in and schedule-out. No access to event structure,
- * but for sampling only event NNPA_ALL is allowed.
- */
- static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in)
- {
- /* We started with a clean page on event installation. So read out
- * results on schedule_out and if page was dirty, clear values.
- */
- if (!sched_in)
- paiext_push_sample();
- }
- /* Attribute definitions for pai extension1 interface. As with other CPU
- * Measurement Facilities, there is one attribute per mapped counter.
- * The number of mapped counters may vary per machine generation. Use
- * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
- * to determine the number of mapped counters. The instructions returns
- * a positive number, which is the highest number of supported counters.
- * All counters less than this number are also supported, there are no
- * holes. A returned number of zero means no support for mapped counters.
- *
- * The identification of the counter is a unique number. The chosen range
- * is 0x1800 + offset in mapped kernel page.
- * All CPU Measurement Facility counters identifiers must be unique and
- * the numbers from 0 to 496 are already used for the CPU Measurement
- * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
- * counters.
- * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
- * used for the CPU Measurement Sampling facility.
- */
- PMU_FORMAT_ATTR(event, "config:0-63");
- static struct attribute *paiext_format_attr[] = {
- &format_attr_event.attr,
- NULL,
- };
- static struct attribute_group paiext_events_group = {
- .name = "events",
- .attrs = NULL, /* Filled in attr_event_init() */
- };
- static struct attribute_group paiext_format_group = {
- .name = "format",
- .attrs = paiext_format_attr,
- };
- static const struct attribute_group *paiext_attr_groups[] = {
- &paiext_events_group,
- &paiext_format_group,
- NULL,
- };
- /* Performance monitoring unit for mapped counters */
- static struct pmu paiext = {
- .task_ctx_nr = perf_invalid_context,
- .event_init = paiext_event_init,
- .add = paiext_add,
- .del = paiext_del,
- .start = paiext_start,
- .stop = paiext_stop,
- .read = paiext_read,
- .sched_task = paiext_sched_task,
- .attr_groups = paiext_attr_groups,
- };
- /* List of symbolic PAI extension 1 NNPA counter names. */
- static const char * const paiext_ctrnames[] = {
- [0] = "NNPA_ALL",
- [1] = "NNPA_ADD",
- [2] = "NNPA_SUB",
- [3] = "NNPA_MUL",
- [4] = "NNPA_DIV",
- [5] = "NNPA_MIN",
- [6] = "NNPA_MAX",
- [7] = "NNPA_LOG",
- [8] = "NNPA_EXP",
- [9] = "NNPA_IBM_RESERVED_9",
- [10] = "NNPA_RELU",
- [11] = "NNPA_TANH",
- [12] = "NNPA_SIGMOID",
- [13] = "NNPA_SOFTMAX",
- [14] = "NNPA_BATCHNORM",
- [15] = "NNPA_MAXPOOL2D",
- [16] = "NNPA_AVGPOOL2D",
- [17] = "NNPA_LSTMACT",
- [18] = "NNPA_GRUACT",
- [19] = "NNPA_CONVOLUTION",
- [20] = "NNPA_MATMUL_OP",
- [21] = "NNPA_MATMUL_OP_BCAST23",
- [22] = "NNPA_SMALLBATCH",
- [23] = "NNPA_LARGEDIM",
- [24] = "NNPA_SMALLTENSOR",
- [25] = "NNPA_1MFRAME",
- [26] = "NNPA_2GFRAME",
- [27] = "NNPA_ACCESSEXCEPT",
- };
- static void __init attr_event_free(struct attribute **attrs, int num)
- {
- struct perf_pmu_events_attr *pa;
- struct device_attribute *dap;
- int i;
- for (i = 0; i < num; i++) {
- dap = container_of(attrs[i], struct device_attribute, attr);
- pa = container_of(dap, struct perf_pmu_events_attr, attr);
- kfree(pa);
- }
- kfree(attrs);
- }
- static int __init attr_event_init_one(struct attribute **attrs, int num)
- {
- struct perf_pmu_events_attr *pa;
- pa = kzalloc(sizeof(*pa), GFP_KERNEL);
- if (!pa)
- return -ENOMEM;
- sysfs_attr_init(&pa->attr.attr);
- pa->id = PAI_NNPA_BASE + num;
- pa->attr.attr.name = paiext_ctrnames[num];
- pa->attr.attr.mode = 0444;
- pa->attr.show = cpumf_events_sysfs_show;
- pa->attr.store = NULL;
- attrs[num] = &pa->attr.attr;
- return 0;
- }
- /* Create PMU sysfs event attributes on the fly. */
- static int __init attr_event_init(void)
- {
- struct attribute **attrs;
- int ret, i;
- attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs),
- GFP_KERNEL);
- if (!attrs)
- return -ENOMEM;
- for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) {
- ret = attr_event_init_one(attrs, i);
- if (ret) {
- attr_event_free(attrs, i - 1);
- return ret;
- }
- }
- attrs[i] = NULL;
- paiext_events_group.attrs = attrs;
- return 0;
- }
- static int __init paiext_init(void)
- {
- struct qpaci_info_block ib;
- int rc = -ENOMEM;
- if (!test_facility(197))
- return 0;
- qpaci(&ib);
- paiext_cnt = ib.num_nnpa;
- if (paiext_cnt >= PAI_NNPA_MAXCTR)
- paiext_cnt = PAI_NNPA_MAXCTR;
- if (!paiext_cnt)
- return 0;
- rc = attr_event_init();
- if (rc) {
- pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
- return rc;
- }
- /* Setup s390dbf facility */
- paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
- if (!paiext_dbg) {
- pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
- rc = -ENOMEM;
- goto out_init;
- }
- debug_register_view(paiext_dbg, &debug_sprintf_view);
- rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
- if (rc) {
- pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
- "rc=%i\n", rc);
- goto out_pmu;
- }
- return 0;
- out_pmu:
- debug_unregister_view(paiext_dbg, &debug_sprintf_view);
- debug_unregister(paiext_dbg);
- out_init:
- attr_event_free(paiext_events_group.attrs,
- ARRAY_SIZE(paiext_ctrnames) + 1);
- return rc;
- }
- device_initcall(paiext_init);
|