Merge tag 'perf-tools-for-v5.10-2020-10-15' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo: - cgroup improvements for 'perf stat', allowing for compact specification of events and cgroups in the command line. - Support per thread topdown metrics in 'perf stat'. - Support sample-read topdown metric group in 'perf record' - Show start of latency in addition to its start in 'perf sched latency'. - Add min, max to 'perf script' futex-contention output, in addition to avg. - Allow usage of 'perf_event_attr->exclusive' attribute via the new ':e' event modifier. - Add 'snapshot' command to 'perf record --control', using it with Intel PT. - Support FIFO file names as alternative options to 'perf record --control'. - Introduce branch history "streams", to compare 'perf record' runs with 'perf diff' based on branch records and report hot streams. - Support PE executable symbol tables using libbfd, to profile, for instance, wine binaries. - Add filter support for option 'perf ftrace -F/--funcs'. - Allow configuring the 'disassembler_style' 'perf annotate' knob via 'perf config' - Update CascadelakeX and SkylakeX JSON vendor events files. - Add support for parsing perchip/percore JSON vendor events. - Add power9 hv_24x7 core level metric events. - Add L2 prefetch, ITLB instruction fetch hits JSON events for AMD zen1. - Enable Family 19h users by matching Zen2 AMD vendor events. - Use debuginfod in 'perf probe' when required debug files not found locally. - Display negative tid in non-sample events in 'perf script'. - Make GTK2 support opt-in - Add build test with GTK+ - Add missing -lzstd to the fast path feature detection - Add scripts to auto generate 'mmap', 'mremap' string<->id tables for use in 'perf trace'. - Show python test script in verbose mode. - Fix uncore metric expressions - Msan uninitialized use fixes. - Use condition variables in 'perf bench numa' - Autodetect python3 binary in systems without python2. - Support md5 build ids in addition to sha1. - Add build id 'perf test' regression test. - Fix printable strings in python3 scripts. - Fix off by ones in 'perf trace' in arches using libaudit. - Fix JSON event code for events referencing std arch events. - Introduce 'perf test' shell script for Arm CoreSight testing. - Add rdtsc() for Arm64 for used in the PERF_RECORD_TIME_CONV metadata event and in 'perf test tsc'. - 'perf c2c' improvements: Add "RMT Load Hit" metric, "Total Stores", fixes and documentation update. - Fix usage of reloc_sym in 'perf probe' when using both kallsyms and debuginfo files. - Do not print 'Metric Groups:' unnecessarily in 'perf list' - Refcounting fixes in the event parsing code. - Add expand cgroup event 'perf test' entry. - Fix out of bounds CPU map access when handling armv8_pmu events in 'perf stat'. - Add build-id injection 'perf bench' benchmark. - Enter namespace when reading build-id in 'perf inject'. - Do not load map/dso when injecting build-id speeding up the 'perf inject' process. - Add --buildid-all option to avoid processing all samples, just the mmap metadata events. - Add feature test to check if libbfd has buildid support - Add 'perf test' entry for PE binary format support. - Fix typos in power8 PMU vendor events JSON files. - Hide libtraceevent non API functions. * tag 'perf-tools-for-v5.10-2020-10-15' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (113 commits) perf c2c: Update documentation for metrics reorganization perf c2c: Add metrics "RMT Load Hit" perf c2c: Correct LLC load hit metrics perf c2c: Change header for LLC local hit perf c2c: Use more explicit headers for HITM perf c2c: Change header from "LLC Load Hitm" to "Load Hitm" perf c2c: Organize metrics based on memory hierarchy perf c2c: Display "Total Stores" as a standalone metrics perf c2c: Display the total numbers continuously perf bench: Use condition variables in numa. perf jevents: Fix event code for events referencing std arch events perf diff: Support hot streams comparison perf streams: Report hot streams perf streams: Calculate the sum of total streams hits perf streams: Link stream pair perf streams: Compare two streams perf streams: Get the evsel_streams by evsel_idx perf streams: Introduce branch history "streams" perf intel-pt: Improve PT documentation slightly perf tools: Add support for exclusive groups/events ...
This commit is contained in:
@@ -12,6 +12,7 @@ perf-y += epoll-ctl.o
|
||||
perf-y += synthesize.o
|
||||
perf-y += kallsyms-parse.o
|
||||
perf-y += find-bit-bench.o
|
||||
perf-y += inject-buildid.o
|
||||
|
||||
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
|
||||
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
|
||||
|
@@ -47,6 +47,7 @@ int bench_epoll_wait(int argc, const char **argv);
|
||||
int bench_epoll_ctl(int argc, const char **argv);
|
||||
int bench_synthesize(int argc, const char **argv);
|
||||
int bench_kallsyms_parse(int argc, const char **argv);
|
||||
int bench_inject_build_id(int argc, const char **argv);
|
||||
|
||||
#define BENCH_FORMAT_DEFAULT_STR "default"
|
||||
#define BENCH_FORMAT_DEFAULT 0
|
||||
|
476
tools/perf/bench/inject-buildid.c
Normal file
476
tools/perf/bench/inject-buildid.c
Normal file
@@ -0,0 +1,476 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h>
|
||||
#include <ftw.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <pthread.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/time64.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/err.h>
|
||||
#include <internal/lib.h>
|
||||
#include <subcmd/parse-options.h>
|
||||
|
||||
#include "bench.h"
|
||||
#include "util/data.h"
|
||||
#include "util/stat.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/event.h"
|
||||
#include "util/symbol.h"
|
||||
#include "util/session.h"
|
||||
#include "util/build-id.h"
|
||||
#include "util/synthetic-events.h"
|
||||
|
||||
#define MMAP_DEV_MAJOR 8
|
||||
#define DSO_MMAP_RATIO 4
|
||||
|
||||
static unsigned int iterations = 100;
|
||||
static unsigned int nr_mmaps = 100;
|
||||
static unsigned int nr_samples = 100; /* samples per mmap */
|
||||
|
||||
static u64 bench_sample_type;
|
||||
static u16 bench_id_hdr_size;
|
||||
|
||||
struct bench_data {
|
||||
int pid;
|
||||
int input_pipe[2];
|
||||
int output_pipe[2];
|
||||
pthread_t th;
|
||||
};
|
||||
|
||||
struct bench_dso {
|
||||
struct list_head list;
|
||||
char *name;
|
||||
int ino;
|
||||
};
|
||||
|
||||
static int nr_dsos;
|
||||
static struct bench_dso *dsos;
|
||||
|
||||
extern int cmd_inject(int argc, const char *argv[]);
|
||||
|
||||
static const struct option options[] = {
|
||||
OPT_UINTEGER('i', "iterations", &iterations,
|
||||
"Number of iterations used to compute average (default: 100)"),
|
||||
OPT_UINTEGER('m', "nr-mmaps", &nr_mmaps,
|
||||
"Number of mmap events for each iteration (default: 100)"),
|
||||
OPT_UINTEGER('n', "nr-samples", &nr_samples,
|
||||
"Number of sample events per mmap event (default: 100)"),
|
||||
OPT_INCR('v', "verbose", &verbose,
|
||||
"be more verbose (show iteration count, DSO name, etc)"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
static const char *const bench_usage[] = {
|
||||
"perf bench internals inject-build-id <options>",
|
||||
NULL
|
||||
};
|
||||
|
||||
/*
|
||||
* Helper for collect_dso that adds the given file as a dso to dso_list
|
||||
* if it contains a build-id. Stops after collecting 4 times more than
|
||||
* we need (for MMAP2 events).
|
||||
*/
|
||||
static int add_dso(const char *fpath, const struct stat *sb __maybe_unused,
|
||||
int typeflag, struct FTW *ftwbuf __maybe_unused)
|
||||
{
|
||||
struct bench_dso *dso = &dsos[nr_dsos];
|
||||
struct build_id bid;
|
||||
|
||||
if (typeflag == FTW_D || typeflag == FTW_SL)
|
||||
return 0;
|
||||
|
||||
if (filename__read_build_id(fpath, &bid) < 0)
|
||||
return 0;
|
||||
|
||||
dso->name = realpath(fpath, NULL);
|
||||
if (dso->name == NULL)
|
||||
return -1;
|
||||
|
||||
dso->ino = nr_dsos++;
|
||||
pr_debug2(" Adding DSO: %s\n", fpath);
|
||||
|
||||
/* stop if we collected enough DSOs */
|
||||
if ((unsigned int)nr_dsos == DSO_MMAP_RATIO * nr_mmaps)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void collect_dso(void)
|
||||
{
|
||||
dsos = calloc(nr_mmaps * DSO_MMAP_RATIO, sizeof(*dsos));
|
||||
if (dsos == NULL) {
|
||||
printf(" Memory allocation failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (nftw("/usr/lib/", add_dso, 10, FTW_PHYS) < 0)
|
||||
return;
|
||||
|
||||
pr_debug(" Collected %d DSOs\n", nr_dsos);
|
||||
}
|
||||
|
||||
static void release_dso(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_dsos; i++) {
|
||||
struct bench_dso *dso = &dsos[i];
|
||||
|
||||
free(dso->name);
|
||||
}
|
||||
free(dsos);
|
||||
}
|
||||
|
||||
/* Fake address used by mmap and sample events */
|
||||
static u64 dso_map_addr(struct bench_dso *dso)
|
||||
{
|
||||
return 0x400000ULL + dso->ino * 8192ULL;
|
||||
}
|
||||
|
||||
static u32 synthesize_attr(struct bench_data *data)
|
||||
{
|
||||
union perf_event event;
|
||||
|
||||
memset(&event, 0, sizeof(event.attr) + sizeof(u64));
|
||||
|
||||
event.header.type = PERF_RECORD_HEADER_ATTR;
|
||||
event.header.size = sizeof(event.attr) + sizeof(u64);
|
||||
|
||||
event.attr.attr.type = PERF_TYPE_SOFTWARE;
|
||||
event.attr.attr.config = PERF_COUNT_SW_TASK_CLOCK;
|
||||
event.attr.attr.exclude_kernel = 1;
|
||||
event.attr.attr.sample_id_all = 1;
|
||||
event.attr.attr.sample_type = bench_sample_type;
|
||||
|
||||
return writen(data->input_pipe[1], &event, event.header.size);
|
||||
}
|
||||
|
||||
static u32 synthesize_fork(struct bench_data *data)
|
||||
{
|
||||
union perf_event event;
|
||||
|
||||
memset(&event, 0, sizeof(event.fork) + bench_id_hdr_size);
|
||||
|
||||
event.header.type = PERF_RECORD_FORK;
|
||||
event.header.misc = PERF_RECORD_MISC_FORK_EXEC;
|
||||
event.header.size = sizeof(event.fork) + bench_id_hdr_size;
|
||||
|
||||
event.fork.ppid = 1;
|
||||
event.fork.ptid = 1;
|
||||
event.fork.pid = data->pid;
|
||||
event.fork.tid = data->pid;
|
||||
|
||||
return writen(data->input_pipe[1], &event, event.header.size);
|
||||
}
|
||||
|
||||
static u32 synthesize_mmap(struct bench_data *data, struct bench_dso *dso,
|
||||
u64 timestamp)
|
||||
{
|
||||
union perf_event event;
|
||||
size_t len = offsetof(struct perf_record_mmap2, filename);
|
||||
u64 *id_hdr_ptr = (void *)&event;
|
||||
int ts_idx;
|
||||
|
||||
len += roundup(strlen(dso->name) + 1, 8) + bench_id_hdr_size;
|
||||
|
||||
memset(&event, 0, min(len, sizeof(event.mmap2)));
|
||||
|
||||
event.header.type = PERF_RECORD_MMAP2;
|
||||
event.header.misc = PERF_RECORD_MISC_USER;
|
||||
event.header.size = len;
|
||||
|
||||
event.mmap2.pid = data->pid;
|
||||
event.mmap2.tid = data->pid;
|
||||
event.mmap2.maj = MMAP_DEV_MAJOR;
|
||||
event.mmap2.ino = dso->ino;
|
||||
|
||||
strcpy(event.mmap2.filename, dso->name);
|
||||
|
||||
event.mmap2.start = dso_map_addr(dso);
|
||||
event.mmap2.len = 4096;
|
||||
event.mmap2.prot = PROT_EXEC;
|
||||
|
||||
if (len > sizeof(event.mmap2)) {
|
||||
/* write mmap2 event first */
|
||||
writen(data->input_pipe[1], &event, len - bench_id_hdr_size);
|
||||
/* zero-fill sample id header */
|
||||
memset(id_hdr_ptr, 0, bench_id_hdr_size);
|
||||
/* put timestamp in the right position */
|
||||
ts_idx = (bench_id_hdr_size / sizeof(u64)) - 2;
|
||||
id_hdr_ptr[ts_idx] = timestamp;
|
||||
writen(data->input_pipe[1], id_hdr_ptr, bench_id_hdr_size);
|
||||
} else {
|
||||
ts_idx = (len / sizeof(u64)) - 2;
|
||||
id_hdr_ptr[ts_idx] = timestamp;
|
||||
writen(data->input_pipe[1], &event, len);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static u32 synthesize_sample(struct bench_data *data, struct bench_dso *dso,
|
||||
u64 timestamp)
|
||||
{
|
||||
union perf_event event;
|
||||
struct perf_sample sample = {
|
||||
.tid = data->pid,
|
||||
.pid = data->pid,
|
||||
.ip = dso_map_addr(dso),
|
||||
.time = timestamp,
|
||||
};
|
||||
|
||||
event.header.type = PERF_RECORD_SAMPLE;
|
||||
event.header.misc = PERF_RECORD_MISC_USER;
|
||||
event.header.size = perf_event__sample_event_size(&sample, bench_sample_type, 0);
|
||||
|
||||
perf_event__synthesize_sample(&event, bench_sample_type, 0, &sample);
|
||||
|
||||
return writen(data->input_pipe[1], &event, event.header.size);
|
||||
}
|
||||
|
||||
static u32 synthesize_flush(struct bench_data *data)
|
||||
{
|
||||
struct perf_event_header header = {
|
||||
.size = sizeof(header),
|
||||
.type = PERF_RECORD_FINISHED_ROUND,
|
||||
};
|
||||
|
||||
return writen(data->input_pipe[1], &header, header.size);
|
||||
}
|
||||
|
||||
static void *data_reader(void *arg)
|
||||
{
|
||||
struct bench_data *data = arg;
|
||||
char buf[8192];
|
||||
int flag;
|
||||
int n;
|
||||
|
||||
flag = fcntl(data->output_pipe[0], F_GETFL);
|
||||
fcntl(data->output_pipe[0], F_SETFL, flag | O_NONBLOCK);
|
||||
|
||||
/* read out data from child */
|
||||
while (true) {
|
||||
n = read(data->output_pipe[0], buf, sizeof(buf));
|
||||
if (n > 0)
|
||||
continue;
|
||||
if (n == 0)
|
||||
break;
|
||||
|
||||
if (errno != EINTR && errno != EAGAIN)
|
||||
break;
|
||||
|
||||
usleep(100);
|
||||
}
|
||||
|
||||
close(data->output_pipe[0]);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int setup_injection(struct bench_data *data, bool build_id_all)
|
||||
{
|
||||
int ready_pipe[2];
|
||||
int dev_null_fd;
|
||||
char buf;
|
||||
|
||||
if (pipe(ready_pipe) < 0)
|
||||
return -1;
|
||||
|
||||
if (pipe(data->input_pipe) < 0)
|
||||
return -1;
|
||||
|
||||
if (pipe(data->output_pipe) < 0)
|
||||
return -1;
|
||||
|
||||
data->pid = fork();
|
||||
if (data->pid < 0)
|
||||
return -1;
|
||||
|
||||
if (data->pid == 0) {
|
||||
const char **inject_argv;
|
||||
int inject_argc = 2;
|
||||
|
||||
close(data->input_pipe[1]);
|
||||
close(data->output_pipe[0]);
|
||||
close(ready_pipe[0]);
|
||||
|
||||
dup2(data->input_pipe[0], STDIN_FILENO);
|
||||
close(data->input_pipe[0]);
|
||||
dup2(data->output_pipe[1], STDOUT_FILENO);
|
||||
close(data->output_pipe[1]);
|
||||
|
||||
dev_null_fd = open("/dev/null", O_WRONLY);
|
||||
if (dev_null_fd < 0)
|
||||
exit(1);
|
||||
|
||||
dup2(dev_null_fd, STDERR_FILENO);
|
||||
|
||||
if (build_id_all)
|
||||
inject_argc++;
|
||||
|
||||
inject_argv = calloc(inject_argc + 1, sizeof(*inject_argv));
|
||||
if (inject_argv == NULL)
|
||||
exit(1);
|
||||
|
||||
inject_argv[0] = strdup("inject");
|
||||
inject_argv[1] = strdup("-b");
|
||||
if (build_id_all)
|
||||
inject_argv[2] = strdup("--buildid-all");
|
||||
|
||||
/* signal that we're ready to go */
|
||||
close(ready_pipe[1]);
|
||||
|
||||
cmd_inject(inject_argc, inject_argv);
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
pthread_create(&data->th, NULL, data_reader, data);
|
||||
|
||||
close(ready_pipe[1]);
|
||||
close(data->input_pipe[0]);
|
||||
close(data->output_pipe[1]);
|
||||
|
||||
/* wait for child ready */
|
||||
if (read(ready_pipe[0], &buf, 1) < 0)
|
||||
return -1;
|
||||
close(ready_pipe[0]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int inject_build_id(struct bench_data *data, u64 *max_rss)
|
||||
{
|
||||
int status;
|
||||
unsigned int i, k;
|
||||
struct rusage rusage;
|
||||
u64 len = 0;
|
||||
|
||||
/* this makes the child to run */
|
||||
if (perf_header__write_pipe(data->input_pipe[1]) < 0)
|
||||
return -1;
|
||||
|
||||
len += synthesize_attr(data);
|
||||
len += synthesize_fork(data);
|
||||
|
||||
for (i = 0; i < nr_mmaps; i++) {
|
||||
int idx = rand() % (nr_dsos - 1);
|
||||
struct bench_dso *dso = &dsos[idx];
|
||||
u64 timestamp = rand() % 1000000;
|
||||
|
||||
pr_debug2(" [%d] injecting: %s\n", i+1, dso->name);
|
||||
len += synthesize_mmap(data, dso, timestamp);
|
||||
|
||||
for (k = 0; k < nr_samples; k++)
|
||||
len += synthesize_sample(data, dso, timestamp + k * 1000);
|
||||
|
||||
if ((i + 1) % 10 == 0)
|
||||
len += synthesize_flush(data);
|
||||
}
|
||||
|
||||
/* tihs makes the child to finish */
|
||||
close(data->input_pipe[1]);
|
||||
|
||||
wait4(data->pid, &status, 0, &rusage);
|
||||
*max_rss = rusage.ru_maxrss;
|
||||
|
||||
pr_debug(" Child %d exited with %d\n", data->pid, status);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void do_inject_loop(struct bench_data *data, bool build_id_all)
|
||||
{
|
||||
unsigned int i;
|
||||
struct stats time_stats, mem_stats;
|
||||
double time_average, time_stddev;
|
||||
double mem_average, mem_stddev;
|
||||
|
||||
init_stats(&time_stats);
|
||||
init_stats(&mem_stats);
|
||||
|
||||
pr_debug(" Build-id%s injection benchmark\n", build_id_all ? "-all" : "");
|
||||
|
||||
for (i = 0; i < iterations; i++) {
|
||||
struct timeval start, end, diff;
|
||||
u64 runtime_us, max_rss;
|
||||
|
||||
pr_debug(" Iteration #%d\n", i+1);
|
||||
|
||||
if (setup_injection(data, build_id_all) < 0) {
|
||||
printf(" Build-id injection setup failed\n");
|
||||
break;
|
||||
}
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
if (inject_build_id(data, &max_rss) < 0) {
|
||||
printf(" Build-id injection failed\n");
|
||||
break;
|
||||
}
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
timersub(&end, &start, &diff);
|
||||
runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
|
||||
update_stats(&time_stats, runtime_us);
|
||||
update_stats(&mem_stats, max_rss);
|
||||
|
||||
pthread_join(data->th, NULL);
|
||||
}
|
||||
|
||||
time_average = avg_stats(&time_stats) / USEC_PER_MSEC;
|
||||
time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC;
|
||||
printf(" Average build-id%s injection took: %.3f msec (+- %.3f msec)\n",
|
||||
build_id_all ? "-all" : "", time_average, time_stddev);
|
||||
|
||||
/* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */
|
||||
time_average = avg_stats(&time_stats) / (nr_mmaps * (nr_samples + 2));
|
||||
time_stddev = stddev_stats(&time_stats) / (nr_mmaps * (nr_samples + 2));
|
||||
printf(" Average time per event: %.3f usec (+- %.3f usec)\n",
|
||||
time_average, time_stddev);
|
||||
|
||||
mem_average = avg_stats(&mem_stats);
|
||||
mem_stddev = stddev_stats(&mem_stats);
|
||||
printf(" Average memory usage: %.0f KB (+- %.0f KB)\n",
|
||||
mem_average, mem_stddev);
|
||||
}
|
||||
|
||||
static int do_inject_loops(struct bench_data *data)
|
||||
{
|
||||
|
||||
srand(time(NULL));
|
||||
symbol__init(NULL);
|
||||
|
||||
bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP;
|
||||
bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME;
|
||||
bench_id_hdr_size = 32;
|
||||
|
||||
collect_dso();
|
||||
if (nr_dsos == 0) {
|
||||
printf(" Cannot collect DSOs for injection\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
do_inject_loop(data, false);
|
||||
do_inject_loop(data, true);
|
||||
|
||||
release_dso();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bench_inject_build_id(int argc, const char **argv)
|
||||
{
|
||||
struct bench_data data;
|
||||
|
||||
argc = parse_options(argc, argv, options, bench_usage, 0);
|
||||
if (argc) {
|
||||
usage_with_options(bench_usage, options);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return do_inject_loops(&data);
|
||||
}
|
||||
|
@@ -137,12 +137,13 @@ struct global_info {
|
||||
u8 *data;
|
||||
|
||||
pthread_mutex_t startup_mutex;
|
||||
pthread_cond_t startup_cond;
|
||||
int nr_tasks_started;
|
||||
|
||||
pthread_mutex_t startup_done_mutex;
|
||||
|
||||
pthread_mutex_t start_work_mutex;
|
||||
pthread_cond_t start_work_cond;
|
||||
int nr_tasks_working;
|
||||
bool start_work;
|
||||
|
||||
pthread_mutex_t stop_work_mutex;
|
||||
u64 bytes_done;
|
||||
@@ -483,6 +484,18 @@ static void init_global_mutex(pthread_mutex_t *mutex)
|
||||
pthread_mutex_init(mutex, &attr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a process-shared (global) condition variable:
|
||||
*/
|
||||
static void init_global_cond(pthread_cond_t *cond)
|
||||
{
|
||||
pthread_condattr_t attr;
|
||||
|
||||
pthread_condattr_init(&attr);
|
||||
pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
|
||||
pthread_cond_init(cond, &attr);
|
||||
}
|
||||
|
||||
static int parse_cpu_list(const char *arg)
|
||||
{
|
||||
p0.cpu_list_str = strdup(arg);
|
||||
@@ -1136,15 +1149,18 @@ static void *worker_thread(void *__tdata)
|
||||
if (g->p.serialize_startup) {
|
||||
pthread_mutex_lock(&g->startup_mutex);
|
||||
g->nr_tasks_started++;
|
||||
/* The last thread wakes the main process. */
|
||||
if (g->nr_tasks_started == g->p.nr_tasks)
|
||||
pthread_cond_signal(&g->startup_cond);
|
||||
|
||||
pthread_mutex_unlock(&g->startup_mutex);
|
||||
|
||||
/* Here we will wait for the main process to start us all at once: */
|
||||
pthread_mutex_lock(&g->start_work_mutex);
|
||||
g->start_work = false;
|
||||
g->nr_tasks_working++;
|
||||
|
||||
/* Last one wake the main process: */
|
||||
if (g->nr_tasks_working == g->p.nr_tasks)
|
||||
pthread_mutex_unlock(&g->startup_done_mutex);
|
||||
while (!g->start_work)
|
||||
pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex);
|
||||
|
||||
pthread_mutex_unlock(&g->start_work_mutex);
|
||||
}
|
||||
@@ -1441,8 +1457,9 @@ static int init(void)
|
||||
|
||||
/* Startup serialization: */
|
||||
init_global_mutex(&g->start_work_mutex);
|
||||
init_global_cond(&g->start_work_cond);
|
||||
init_global_mutex(&g->startup_mutex);
|
||||
init_global_mutex(&g->startup_done_mutex);
|
||||
init_global_cond(&g->startup_cond);
|
||||
init_global_mutex(&g->stop_work_mutex);
|
||||
|
||||
init_thread_data();
|
||||
@@ -1502,9 +1519,6 @@ static int __bench_numa(const char *name)
|
||||
pids = zalloc(g->p.nr_proc * sizeof(*pids));
|
||||
pid = -1;
|
||||
|
||||
/* All threads try to acquire it, this way we can wait for them to start up: */
|
||||
pthread_mutex_lock(&g->start_work_mutex);
|
||||
|
||||
if (g->p.serialize_startup) {
|
||||
tprintf(" #\n");
|
||||
tprintf(" # Startup synchronization: ..."); fflush(stdout);
|
||||
@@ -1526,22 +1540,29 @@ static int __bench_numa(const char *name)
|
||||
pids[i] = pid;
|
||||
|
||||
}
|
||||
/* Wait for all the threads to start up: */
|
||||
while (g->nr_tasks_started != g->p.nr_tasks)
|
||||
usleep(USEC_PER_MSEC);
|
||||
|
||||
BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
|
||||
|
||||
if (g->p.serialize_startup) {
|
||||
bool threads_ready = false;
|
||||
double startup_sec;
|
||||
|
||||
pthread_mutex_lock(&g->startup_done_mutex);
|
||||
/*
|
||||
* Wait for all the threads to start up. The last thread will
|
||||
* signal this process.
|
||||
*/
|
||||
pthread_mutex_lock(&g->startup_mutex);
|
||||
while (g->nr_tasks_started != g->p.nr_tasks)
|
||||
pthread_cond_wait(&g->startup_cond, &g->startup_mutex);
|
||||
|
||||
/* This will start all threads: */
|
||||
pthread_mutex_unlock(&g->start_work_mutex);
|
||||
pthread_mutex_unlock(&g->startup_mutex);
|
||||
|
||||
/* This mutex is locked - the last started thread will wake us: */
|
||||
pthread_mutex_lock(&g->startup_done_mutex);
|
||||
/* Wait for all threads to be at the start_work_cond. */
|
||||
while (!threads_ready) {
|
||||
pthread_mutex_lock(&g->start_work_mutex);
|
||||
threads_ready = (g->nr_tasks_working == g->p.nr_tasks);
|
||||
pthread_mutex_unlock(&g->start_work_mutex);
|
||||
if (!threads_ready)
|
||||
usleep(1);
|
||||
}
|
||||
|
||||
gettimeofday(&stop, NULL);
|
||||
|
||||
@@ -1555,7 +1576,11 @@ static int __bench_numa(const char *name)
|
||||
tprintf(" #\n");
|
||||
|
||||
start = stop;
|
||||
pthread_mutex_unlock(&g->startup_done_mutex);
|
||||
/* Start all threads running. */
|
||||
pthread_mutex_lock(&g->start_work_mutex);
|
||||
g->start_work = true;
|
||||
pthread_mutex_unlock(&g->start_work_mutex);
|
||||
pthread_cond_broadcast(&g->start_work_cond);
|
||||
} else {
|
||||
gettimeofday(&start, NULL);
|
||||
}
|
||||
|
Reference in New Issue
Block a user