Merge branch 'locking/urgent'
This commit is contained in:
@@ -1101,13 +1101,11 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
|
||||
audit_log_end(ab);
|
||||
}
|
||||
|
||||
static int audit_set_feature(struct sk_buff *skb)
|
||||
static int audit_set_feature(struct audit_features *uaf)
|
||||
{
|
||||
struct audit_features *uaf;
|
||||
int i;
|
||||
|
||||
BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names));
|
||||
uaf = nlmsg_data(nlmsg_hdr(skb));
|
||||
|
||||
/* if there is ever a version 2 we should handle that here */
|
||||
|
||||
@@ -1175,6 +1173,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
{
|
||||
u32 seq;
|
||||
void *data;
|
||||
int data_len;
|
||||
int err;
|
||||
struct audit_buffer *ab;
|
||||
u16 msg_type = nlh->nlmsg_type;
|
||||
@@ -1188,6 +1187,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
|
||||
seq = nlh->nlmsg_seq;
|
||||
data = nlmsg_data(nlh);
|
||||
data_len = nlmsg_len(nlh);
|
||||
|
||||
switch (msg_type) {
|
||||
case AUDIT_GET: {
|
||||
@@ -1211,7 +1211,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
struct audit_status s;
|
||||
memset(&s, 0, sizeof(s));
|
||||
/* guard against past and future API changes */
|
||||
memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
|
||||
memcpy(&s, data, min_t(size_t, sizeof(s), data_len));
|
||||
if (s.mask & AUDIT_STATUS_ENABLED) {
|
||||
err = audit_set_enabled(s.enabled);
|
||||
if (err < 0)
|
||||
@@ -1315,7 +1315,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
return err;
|
||||
break;
|
||||
case AUDIT_SET_FEATURE:
|
||||
err = audit_set_feature(skb);
|
||||
if (data_len < sizeof(struct audit_features))
|
||||
return -EINVAL;
|
||||
err = audit_set_feature(data);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
@@ -1327,6 +1329,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
|
||||
err = audit_filter(msg_type, AUDIT_FILTER_USER);
|
||||
if (err == 1) { /* match or error */
|
||||
char *str = data;
|
||||
|
||||
err = 0;
|
||||
if (msg_type == AUDIT_USER_TTY) {
|
||||
err = tty_audit_push();
|
||||
@@ -1334,26 +1338,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
break;
|
||||
}
|
||||
audit_log_user_recv_msg(&ab, msg_type);
|
||||
if (msg_type != AUDIT_USER_TTY)
|
||||
if (msg_type != AUDIT_USER_TTY) {
|
||||
/* ensure NULL termination */
|
||||
str[data_len - 1] = '\0';
|
||||
audit_log_format(ab, " msg='%.*s'",
|
||||
AUDIT_MESSAGE_TEXT_MAX,
|
||||
(char *)data);
|
||||
else {
|
||||
int size;
|
||||
|
||||
str);
|
||||
} else {
|
||||
audit_log_format(ab, " data=");
|
||||
size = nlmsg_len(nlh);
|
||||
if (size > 0 &&
|
||||
((unsigned char *)data)[size - 1] == '\0')
|
||||
size--;
|
||||
audit_log_n_untrustedstring(ab, data, size);
|
||||
if (data_len > 0 && str[data_len - 1] == '\0')
|
||||
data_len--;
|
||||
audit_log_n_untrustedstring(ab, str, data_len);
|
||||
}
|
||||
audit_log_end(ab);
|
||||
}
|
||||
break;
|
||||
case AUDIT_ADD_RULE:
|
||||
case AUDIT_DEL_RULE:
|
||||
if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
|
||||
if (data_len < sizeof(struct audit_rule_data))
|
||||
return -EINVAL;
|
||||
if (audit_enabled == AUDIT_LOCKED) {
|
||||
audit_log_common_recv_msg(audit_context(), &ab,
|
||||
@@ -1365,7 +1367,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
audit_log_end(ab);
|
||||
return -EPERM;
|
||||
}
|
||||
err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh));
|
||||
err = audit_rule_change(msg_type, seq, data, data_len);
|
||||
break;
|
||||
case AUDIT_LIST_RULES:
|
||||
err = audit_list_rules_send(skb, seq);
|
||||
@@ -1380,7 +1382,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
case AUDIT_MAKE_EQUIV: {
|
||||
void *bufp = data;
|
||||
u32 sizes[2];
|
||||
size_t msglen = nlmsg_len(nlh);
|
||||
size_t msglen = data_len;
|
||||
char *old, *new;
|
||||
|
||||
err = -EINVAL;
|
||||
@@ -1456,7 +1458,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
/* guard against past and future API changes */
|
||||
memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
|
||||
memcpy(&s, data, min_t(size_t, sizeof(s), data_len));
|
||||
/* check if new data is valid */
|
||||
if ((s.enabled != 0 && s.enabled != 1) ||
|
||||
(s.log_passwd != 0 && s.log_passwd != 1))
|
||||
|
@@ -456,6 +456,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
bufp = data->buf;
|
||||
for (i = 0; i < data->field_count; i++) {
|
||||
struct audit_field *f = &entry->rule.fields[i];
|
||||
u32 f_val;
|
||||
|
||||
err = -EINVAL;
|
||||
|
||||
@@ -464,12 +465,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
goto exit_free;
|
||||
|
||||
f->type = data->fields[i];
|
||||
f->val = data->values[i];
|
||||
f_val = data->values[i];
|
||||
|
||||
/* Support legacy tests for a valid loginuid */
|
||||
if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
|
||||
if ((f->type == AUDIT_LOGINUID) && (f_val == AUDIT_UID_UNSET)) {
|
||||
f->type = AUDIT_LOGINUID_SET;
|
||||
f->val = 0;
|
||||
f_val = 0;
|
||||
entry->rule.pflags |= AUDIT_LOGINUID_LEGACY;
|
||||
}
|
||||
|
||||
@@ -485,7 +486,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
case AUDIT_SUID:
|
||||
case AUDIT_FSUID:
|
||||
case AUDIT_OBJ_UID:
|
||||
f->uid = make_kuid(current_user_ns(), f->val);
|
||||
f->uid = make_kuid(current_user_ns(), f_val);
|
||||
if (!uid_valid(f->uid))
|
||||
goto exit_free;
|
||||
break;
|
||||
@@ -494,11 +495,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
case AUDIT_SGID:
|
||||
case AUDIT_FSGID:
|
||||
case AUDIT_OBJ_GID:
|
||||
f->gid = make_kgid(current_user_ns(), f->val);
|
||||
f->gid = make_kgid(current_user_ns(), f_val);
|
||||
if (!gid_valid(f->gid))
|
||||
goto exit_free;
|
||||
break;
|
||||
case AUDIT_ARCH:
|
||||
f->val = f_val;
|
||||
entry->rule.arch_f = f;
|
||||
break;
|
||||
case AUDIT_SUBJ_USER:
|
||||
@@ -511,11 +513,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
case AUDIT_OBJ_TYPE:
|
||||
case AUDIT_OBJ_LEV_LOW:
|
||||
case AUDIT_OBJ_LEV_HIGH:
|
||||
str = audit_unpack_string(&bufp, &remain, f->val);
|
||||
if (IS_ERR(str))
|
||||
str = audit_unpack_string(&bufp, &remain, f_val);
|
||||
if (IS_ERR(str)) {
|
||||
err = PTR_ERR(str);
|
||||
goto exit_free;
|
||||
entry->rule.buflen += f->val;
|
||||
|
||||
}
|
||||
entry->rule.buflen += f_val;
|
||||
f->lsm_str = str;
|
||||
err = security_audit_rule_init(f->type, f->op, str,
|
||||
(void **)&f->lsm_rule);
|
||||
/* Keep currently invalid fields around in case they
|
||||
@@ -524,68 +528,71 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
pr_warn("audit rule for LSM \'%s\' is invalid\n",
|
||||
str);
|
||||
err = 0;
|
||||
}
|
||||
if (err) {
|
||||
kfree(str);
|
||||
} else if (err)
|
||||
goto exit_free;
|
||||
} else
|
||||
f->lsm_str = str;
|
||||
break;
|
||||
case AUDIT_WATCH:
|
||||
str = audit_unpack_string(&bufp, &remain, f->val);
|
||||
if (IS_ERR(str))
|
||||
str = audit_unpack_string(&bufp, &remain, f_val);
|
||||
if (IS_ERR(str)) {
|
||||
err = PTR_ERR(str);
|
||||
goto exit_free;
|
||||
entry->rule.buflen += f->val;
|
||||
|
||||
err = audit_to_watch(&entry->rule, str, f->val, f->op);
|
||||
}
|
||||
err = audit_to_watch(&entry->rule, str, f_val, f->op);
|
||||
if (err) {
|
||||
kfree(str);
|
||||
goto exit_free;
|
||||
}
|
||||
entry->rule.buflen += f_val;
|
||||
break;
|
||||
case AUDIT_DIR:
|
||||
str = audit_unpack_string(&bufp, &remain, f->val);
|
||||
if (IS_ERR(str))
|
||||
str = audit_unpack_string(&bufp, &remain, f_val);
|
||||
if (IS_ERR(str)) {
|
||||
err = PTR_ERR(str);
|
||||
goto exit_free;
|
||||
entry->rule.buflen += f->val;
|
||||
|
||||
}
|
||||
err = audit_make_tree(&entry->rule, str, f->op);
|
||||
kfree(str);
|
||||
if (err)
|
||||
goto exit_free;
|
||||
entry->rule.buflen += f_val;
|
||||
break;
|
||||
case AUDIT_INODE:
|
||||
f->val = f_val;
|
||||
err = audit_to_inode(&entry->rule, f);
|
||||
if (err)
|
||||
goto exit_free;
|
||||
break;
|
||||
case AUDIT_FILTERKEY:
|
||||
if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN)
|
||||
if (entry->rule.filterkey || f_val > AUDIT_MAX_KEY_LEN)
|
||||
goto exit_free;
|
||||
str = audit_unpack_string(&bufp, &remain, f->val);
|
||||
if (IS_ERR(str))
|
||||
goto exit_free;
|
||||
entry->rule.buflen += f->val;
|
||||
entry->rule.filterkey = str;
|
||||
break;
|
||||
case AUDIT_EXE:
|
||||
if (entry->rule.exe || f->val > PATH_MAX)
|
||||
goto exit_free;
|
||||
str = audit_unpack_string(&bufp, &remain, f->val);
|
||||
str = audit_unpack_string(&bufp, &remain, f_val);
|
||||
if (IS_ERR(str)) {
|
||||
err = PTR_ERR(str);
|
||||
goto exit_free;
|
||||
}
|
||||
entry->rule.buflen += f->val;
|
||||
|
||||
audit_mark = audit_alloc_mark(&entry->rule, str, f->val);
|
||||
entry->rule.buflen += f_val;
|
||||
entry->rule.filterkey = str;
|
||||
break;
|
||||
case AUDIT_EXE:
|
||||
if (entry->rule.exe || f_val > PATH_MAX)
|
||||
goto exit_free;
|
||||
str = audit_unpack_string(&bufp, &remain, f_val);
|
||||
if (IS_ERR(str)) {
|
||||
err = PTR_ERR(str);
|
||||
goto exit_free;
|
||||
}
|
||||
audit_mark = audit_alloc_mark(&entry->rule, str, f_val);
|
||||
if (IS_ERR(audit_mark)) {
|
||||
kfree(str);
|
||||
err = PTR_ERR(audit_mark);
|
||||
goto exit_free;
|
||||
}
|
||||
entry->rule.buflen += f_val;
|
||||
entry->rule.exe = audit_mark;
|
||||
break;
|
||||
default:
|
||||
f->val = f_val;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -4142,9 +4142,9 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
|
||||
* EFAULT - verifier bug
|
||||
* 0 - 99% match. The last 1% is validated by the verifier.
|
||||
*/
|
||||
int btf_check_func_type_match(struct bpf_verifier_log *log,
|
||||
struct btf *btf1, const struct btf_type *t1,
|
||||
struct btf *btf2, const struct btf_type *t2)
|
||||
static int btf_check_func_type_match(struct bpf_verifier_log *log,
|
||||
struct btf *btf1, const struct btf_type *t1,
|
||||
struct btf *btf2, const struct btf_type *t2)
|
||||
{
|
||||
const struct btf_param *args1, *args2;
|
||||
const char *fn1, *fn2, *s1, *s2;
|
||||
|
@@ -56,6 +56,7 @@ struct htab_elem {
|
||||
union {
|
||||
struct bpf_htab *htab;
|
||||
struct pcpu_freelist_node fnode;
|
||||
struct htab_elem *batch_flink;
|
||||
};
|
||||
};
|
||||
};
|
||||
@@ -126,6 +127,17 @@ free_elems:
|
||||
bpf_map_area_free(htab->elems);
|
||||
}
|
||||
|
||||
/* The LRU list has a lock (lru_lock). Each htab bucket has a lock
|
||||
* (bucket_lock). If both locks need to be acquired together, the lock
|
||||
* order is always lru_lock -> bucket_lock and this only happens in
|
||||
* bpf_lru_list.c logic. For example, certain code path of
|
||||
* bpf_lru_pop_free(), which is called by function prealloc_lru_pop(),
|
||||
* will acquire lru_lock first followed by acquiring bucket_lock.
|
||||
*
|
||||
* In hashtab.c, to avoid deadlock, lock acquisition of
|
||||
* bucket_lock followed by lru_lock is not allowed. In such cases,
|
||||
* bucket_lock needs to be released first before acquiring lru_lock.
|
||||
*/
|
||||
static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
|
||||
u32 hash)
|
||||
{
|
||||
@@ -1256,10 +1268,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
|
||||
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
|
||||
void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
|
||||
u32 batch, max_count, size, bucket_size;
|
||||
struct htab_elem *node_to_free = NULL;
|
||||
u64 elem_map_flags, map_flags;
|
||||
struct hlist_nulls_head *head;
|
||||
struct hlist_nulls_node *n;
|
||||
unsigned long flags;
|
||||
unsigned long flags = 0;
|
||||
bool locked = false;
|
||||
struct htab_elem *l;
|
||||
struct bucket *b;
|
||||
int ret = 0;
|
||||
@@ -1319,15 +1333,25 @@ again_nocopy:
|
||||
dst_val = values;
|
||||
b = &htab->buckets[batch];
|
||||
head = &b->head;
|
||||
raw_spin_lock_irqsave(&b->lock, flags);
|
||||
/* do not grab the lock unless need it (bucket_cnt > 0). */
|
||||
if (locked)
|
||||
raw_spin_lock_irqsave(&b->lock, flags);
|
||||
|
||||
bucket_cnt = 0;
|
||||
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
|
||||
bucket_cnt++;
|
||||
|
||||
if (bucket_cnt && !locked) {
|
||||
locked = true;
|
||||
goto again_nocopy;
|
||||
}
|
||||
|
||||
if (bucket_cnt > (max_count - total)) {
|
||||
if (total == 0)
|
||||
ret = -ENOSPC;
|
||||
/* Note that since bucket_cnt > 0 here, it is implicit
|
||||
* that the locked was grabbed, so release it.
|
||||
*/
|
||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
||||
rcu_read_unlock();
|
||||
this_cpu_dec(bpf_prog_active);
|
||||
@@ -1337,6 +1361,9 @@ again_nocopy:
|
||||
|
||||
if (bucket_cnt > bucket_size) {
|
||||
bucket_size = bucket_cnt;
|
||||
/* Note that since bucket_cnt > 0 here, it is implicit
|
||||
* that the locked was grabbed, so release it.
|
||||
*/
|
||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
||||
rcu_read_unlock();
|
||||
this_cpu_dec(bpf_prog_active);
|
||||
@@ -1346,6 +1373,10 @@ again_nocopy:
|
||||
goto alloc;
|
||||
}
|
||||
|
||||
/* Next block is only safe to run if you have grabbed the lock */
|
||||
if (!locked)
|
||||
goto next_batch;
|
||||
|
||||
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
|
||||
memcpy(dst_key, l->key, key_size);
|
||||
|
||||
@@ -1370,16 +1401,33 @@ again_nocopy:
|
||||
}
|
||||
if (do_delete) {
|
||||
hlist_nulls_del_rcu(&l->hash_node);
|
||||
if (is_lru_map)
|
||||
bpf_lru_push_free(&htab->lru, &l->lru_node);
|
||||
else
|
||||
|
||||
/* bpf_lru_push_free() will acquire lru_lock, which
|
||||
* may cause deadlock. See comments in function
|
||||
* prealloc_lru_pop(). Let us do bpf_lru_push_free()
|
||||
* after releasing the bucket lock.
|
||||
*/
|
||||
if (is_lru_map) {
|
||||
l->batch_flink = node_to_free;
|
||||
node_to_free = l;
|
||||
} else {
|
||||
free_htab_elem(htab, l);
|
||||
}
|
||||
}
|
||||
dst_key += key_size;
|
||||
dst_val += value_size;
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
||||
locked = false;
|
||||
|
||||
while (node_to_free) {
|
||||
l = node_to_free;
|
||||
node_to_free = node_to_free->batch_flink;
|
||||
bpf_lru_push_free(&htab->lru, &l->lru_node);
|
||||
}
|
||||
|
||||
next_batch:
|
||||
/* If we are not copying data, we can go to next bucket and avoid
|
||||
* unlocking the rcu.
|
||||
*/
|
||||
|
@@ -321,7 +321,7 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
|
||||
|
||||
ulen = info->jited_prog_len;
|
||||
info->jited_prog_len = aux->offload->jited_len;
|
||||
if (info->jited_prog_len & ulen) {
|
||||
if (info->jited_prog_len && ulen) {
|
||||
uinsns = u64_to_user_ptr(info->jited_prog_insns);
|
||||
ulen = min_t(u32, info->jited_prog_len, ulen);
|
||||
if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {
|
||||
|
@@ -26,70 +26,6 @@
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
static int __compat_get_timeval(struct timeval *tv, const struct old_timeval32 __user *ctv)
|
||||
{
|
||||
return (!access_ok(ctv, sizeof(*ctv)) ||
|
||||
__get_user(tv->tv_sec, &ctv->tv_sec) ||
|
||||
__get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int __compat_put_timeval(const struct timeval *tv, struct old_timeval32 __user *ctv)
|
||||
{
|
||||
return (!access_ok(ctv, sizeof(*ctv)) ||
|
||||
__put_user(tv->tv_sec, &ctv->tv_sec) ||
|
||||
__put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int __compat_get_timespec(struct timespec *ts, const struct old_timespec32 __user *cts)
|
||||
{
|
||||
return (!access_ok(cts, sizeof(*cts)) ||
|
||||
__get_user(ts->tv_sec, &cts->tv_sec) ||
|
||||
__get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
static int __compat_put_timespec(const struct timespec *ts, struct old_timespec32 __user *cts)
|
||||
{
|
||||
return (!access_ok(cts, sizeof(*cts)) ||
|
||||
__put_user(ts->tv_sec, &cts->tv_sec) ||
|
||||
__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
int compat_get_timeval(struct timeval *tv, const void __user *utv)
|
||||
{
|
||||
if (COMPAT_USE_64BIT_TIME)
|
||||
return copy_from_user(tv, utv, sizeof(*tv)) ? -EFAULT : 0;
|
||||
else
|
||||
return __compat_get_timeval(tv, utv);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(compat_get_timeval);
|
||||
|
||||
int compat_put_timeval(const struct timeval *tv, void __user *utv)
|
||||
{
|
||||
if (COMPAT_USE_64BIT_TIME)
|
||||
return copy_to_user(utv, tv, sizeof(*tv)) ? -EFAULT : 0;
|
||||
else
|
||||
return __compat_put_timeval(tv, utv);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(compat_put_timeval);
|
||||
|
||||
int compat_get_timespec(struct timespec *ts, const void __user *uts)
|
||||
{
|
||||
if (COMPAT_USE_64BIT_TIME)
|
||||
return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0;
|
||||
else
|
||||
return __compat_get_timespec(ts, uts);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(compat_get_timespec);
|
||||
|
||||
int compat_put_timespec(const struct timespec *ts, void __user *uts)
|
||||
{
|
||||
if (COMPAT_USE_64BIT_TIME)
|
||||
return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0;
|
||||
else
|
||||
return __compat_put_timespec(ts, uts);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(compat_put_timespec);
|
||||
|
||||
#ifdef __ARCH_WANT_SYS_SIGPROCMASK
|
||||
|
||||
/*
|
||||
|
@@ -302,9 +302,16 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
|
||||
phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
|
||||
phys_addr_t mask = align - 1;
|
||||
unsigned long node = rmem->fdt_node;
|
||||
bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
|
||||
struct cma *cma;
|
||||
int err;
|
||||
|
||||
if (size_cmdline != -1 && default_cma) {
|
||||
pr_info("Reserved memory: bypass %s node, using cmdline CMA params instead\n",
|
||||
rmem->name);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
|
||||
of_get_flat_dt_prop(node, "no-map", NULL))
|
||||
return -EINVAL;
|
||||
@@ -322,7 +329,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
|
||||
/* Architecture specific contiguous memory fixup. */
|
||||
dma_contiguous_early_fixup(rmem->base, rmem->size);
|
||||
|
||||
if (of_get_flat_dt_prop(node, "linux,cma-default", NULL))
|
||||
if (default_cma)
|
||||
dma_contiguous_set_default(cma);
|
||||
|
||||
rmem->ops = &rmem_cma_ops;
|
||||
|
@@ -23,18 +23,6 @@
|
||||
*/
|
||||
unsigned int zone_dma_bits __ro_after_init = 24;
|
||||
|
||||
static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
|
||||
{
|
||||
if (!dev->dma_mask) {
|
||||
dev_err_once(dev, "DMA map on device without dma_mask\n");
|
||||
} else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_limit) {
|
||||
dev_err_once(dev,
|
||||
"overflow %pad+%zu of DMA mask %llx bus limit %llx\n",
|
||||
&dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
|
||||
}
|
||||
WARN_ON_ONCE(1);
|
||||
}
|
||||
|
||||
static inline dma_addr_t phys_to_dma_direct(struct device *dev,
|
||||
phys_addr_t phys)
|
||||
{
|
||||
@@ -357,13 +345,6 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
|
||||
EXPORT_SYMBOL(dma_direct_unmap_sg);
|
||||
#endif
|
||||
|
||||
static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr,
|
||||
size_t size)
|
||||
{
|
||||
return swiotlb_force != SWIOTLB_FORCE &&
|
||||
dma_capable(dev, dma_addr, size, true);
|
||||
}
|
||||
|
||||
dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
|
||||
unsigned long offset, size_t size, enum dma_data_direction dir,
|
||||
unsigned long attrs)
|
||||
@@ -371,9 +352,16 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
|
||||
phys_addr_t phys = page_to_phys(page) + offset;
|
||||
dma_addr_t dma_addr = phys_to_dma(dev, phys);
|
||||
|
||||
if (unlikely(!dma_direct_possible(dev, dma_addr, size)) &&
|
||||
!swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) {
|
||||
report_addr(dev, dma_addr, size);
|
||||
if (unlikely(swiotlb_force == SWIOTLB_FORCE))
|
||||
return swiotlb_map(dev, phys, size, dir, attrs);
|
||||
|
||||
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
|
||||
if (swiotlb_force != SWIOTLB_NO_FORCE)
|
||||
return swiotlb_map(dev, phys, size, dir, attrs);
|
||||
|
||||
dev_WARN_ONCE(dev, 1,
|
||||
"DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
|
||||
&dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
|
||||
return DMA_MAPPING_ERROR;
|
||||
}
|
||||
|
||||
@@ -411,7 +399,10 @@ dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
|
||||
dma_addr_t dma_addr = paddr;
|
||||
|
||||
if (unlikely(!dma_capable(dev, dma_addr, size, false))) {
|
||||
report_addr(dev, dma_addr, size);
|
||||
dev_err_once(dev,
|
||||
"DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
|
||||
&dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
|
||||
WARN_ON_ONCE(1);
|
||||
return DMA_MAPPING_ERROR;
|
||||
}
|
||||
|
||||
@@ -472,28 +463,26 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
|
||||
}
|
||||
#endif /* CONFIG_MMU */
|
||||
|
||||
/*
|
||||
* Because 32-bit DMA masks are so common we expect every architecture to be
|
||||
* able to satisfy them - either by not supporting more physical memory, or by
|
||||
* providing a ZONE_DMA32. If neither is the case, the architecture needs to
|
||||
* use an IOMMU instead of the direct mapping.
|
||||
*/
|
||||
int dma_direct_supported(struct device *dev, u64 mask)
|
||||
{
|
||||
u64 min_mask;
|
||||
u64 min_mask = (max_pfn - 1) << PAGE_SHIFT;
|
||||
|
||||
if (IS_ENABLED(CONFIG_ZONE_DMA))
|
||||
min_mask = DMA_BIT_MASK(zone_dma_bits);
|
||||
else
|
||||
min_mask = DMA_BIT_MASK(32);
|
||||
|
||||
min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT);
|
||||
/*
|
||||
* Because 32-bit DMA masks are so common we expect every architecture
|
||||
* to be able to satisfy them - either by not supporting more physical
|
||||
* memory, or by providing a ZONE_DMA32. If neither is the case, the
|
||||
* architecture needs to use an IOMMU instead of the direct mapping.
|
||||
*/
|
||||
if (mask >= DMA_BIT_MASK(32))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* This check needs to be against the actual bit mask value, so
|
||||
* use __phys_to_dma() here so that the SME encryption mask isn't
|
||||
* part of the check.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_ZONE_DMA))
|
||||
min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits));
|
||||
return mask >= __phys_to_dma(dev, min_mask);
|
||||
}
|
||||
|
||||
|
@@ -22,6 +22,7 @@
|
||||
|
||||
#include <linux/cache.h>
|
||||
#include <linux/dma-direct.h>
|
||||
#include <linux/dma-noncoherent.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/spinlock.h>
|
||||
@@ -656,35 +657,38 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a swiotlb mapping for the buffer at @phys, and in case of DMAing
|
||||
* Create a swiotlb mapping for the buffer at @paddr, and in case of DMAing
|
||||
* to the device copy the data into it as well.
|
||||
*/
|
||||
bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
|
||||
size_t size, enum dma_data_direction dir, unsigned long attrs)
|
||||
dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
|
||||
enum dma_data_direction dir, unsigned long attrs)
|
||||
{
|
||||
trace_swiotlb_bounced(dev, *dma_addr, size, swiotlb_force);
|
||||
phys_addr_t swiotlb_addr;
|
||||
dma_addr_t dma_addr;
|
||||
|
||||
if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) {
|
||||
dev_warn_ratelimited(dev,
|
||||
"Cannot do DMA to address %pa\n", phys);
|
||||
return false;
|
||||
}
|
||||
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
|
||||
swiotlb_force);
|
||||
|
||||
/* Oh well, have to allocate and map a bounce buffer. */
|
||||
*phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
|
||||
*phys, size, size, dir, attrs);
|
||||
if (*phys == (phys_addr_t)DMA_MAPPING_ERROR)
|
||||
return false;
|
||||
swiotlb_addr = swiotlb_tbl_map_single(dev,
|
||||
__phys_to_dma(dev, io_tlb_start),
|
||||
paddr, size, size, dir, attrs);
|
||||
if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
|
||||
return DMA_MAPPING_ERROR;
|
||||
|
||||
/* Ensure that the address returned is DMA'ble */
|
||||
*dma_addr = __phys_to_dma(dev, *phys);
|
||||
if (unlikely(!dma_capable(dev, *dma_addr, size, true))) {
|
||||
swiotlb_tbl_unmap_single(dev, *phys, size, size, dir,
|
||||
dma_addr = __phys_to_dma(dev, swiotlb_addr);
|
||||
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
|
||||
swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir,
|
||||
attrs | DMA_ATTR_SKIP_CPU_SYNC);
|
||||
return false;
|
||||
dev_WARN_ONCE(dev, 1,
|
||||
"swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
|
||||
&dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
|
||||
return DMA_MAPPING_ERROR;
|
||||
}
|
||||
|
||||
return true;
|
||||
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
|
||||
arch_sync_dma_for_device(swiotlb_addr, size, dir);
|
||||
return dma_addr;
|
||||
}
|
||||
|
||||
size_t swiotlb_max_mapping_size(struct device *dev)
|
||||
|
@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key)
|
||||
|
||||
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
|
||||
case FUT_OFF_INODE:
|
||||
ihold(key->shared.inode); /* implies smp_mb(); (B) */
|
||||
smp_mb(); /* explicit smp_mb(); (B) */
|
||||
break;
|
||||
case FUT_OFF_MMSHARED:
|
||||
futex_get_mm(key); /* implies smp_mb(); (B) */
|
||||
@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key)
|
||||
|
||||
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
|
||||
case FUT_OFF_INODE:
|
||||
iput(key->shared.inode);
|
||||
break;
|
||||
case FUT_OFF_MMSHARED:
|
||||
mmdrop(key->private.mm);
|
||||
@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
|
||||
return timeout;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a machine wide unique identifier for this inode.
|
||||
*
|
||||
* This relies on u64 not wrapping in the life-time of the machine; which with
|
||||
* 1ns resolution means almost 585 years.
|
||||
*
|
||||
* This further relies on the fact that a well formed program will not unmap
|
||||
* the file while it has a (shared) futex waiting on it. This mapping will have
|
||||
* a file reference which pins the mount and inode.
|
||||
*
|
||||
* If for some reason an inode gets evicted and read back in again, it will get
|
||||
* a new sequence number and will _NOT_ match, even though it is the exact same
|
||||
* file.
|
||||
*
|
||||
* It is important that match_futex() will never have a false-positive, esp.
|
||||
* for PI futexes that can mess up the state. The above argues that false-negatives
|
||||
* are only possible for malformed programs.
|
||||
*/
|
||||
static u64 get_inode_sequence_number(struct inode *inode)
|
||||
{
|
||||
static atomic64_t i_seq;
|
||||
u64 old;
|
||||
|
||||
/* Does the inode already have a sequence number? */
|
||||
old = atomic64_read(&inode->i_sequence);
|
||||
if (likely(old))
|
||||
return old;
|
||||
|
||||
for (;;) {
|
||||
u64 new = atomic64_add_return(1, &i_seq);
|
||||
if (WARN_ON_ONCE(!new))
|
||||
continue;
|
||||
|
||||
old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
|
||||
if (old)
|
||||
return old;
|
||||
return new;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* get_futex_key() - Get parameters which are the keys for a futex
|
||||
* @uaddr: virtual address of the futex
|
||||
@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
|
||||
*
|
||||
* The key words are stored in @key on success.
|
||||
*
|
||||
* For shared mappings, it's (page->index, file_inode(vma->vm_file),
|
||||
* offset_within_page). For private mappings, it's (uaddr, current->mm).
|
||||
* We can usually work out the index without swapping in the page.
|
||||
* For shared mappings (when @fshared), the key is:
|
||||
* ( inode->i_sequence, page->index, offset_within_page )
|
||||
* [ also see get_inode_sequence_number() ]
|
||||
*
|
||||
* For private mappings (or when !@fshared), the key is:
|
||||
* ( current->mm, address, 0 )
|
||||
*
|
||||
* This allows (cross process, where applicable) identification of the futex
|
||||
* without keeping the page pinned for the duration of the FUTEX_WAIT.
|
||||
*
|
||||
* lock_page() might sleep, the caller should not hold a spinlock.
|
||||
*/
|
||||
@@ -659,8 +704,6 @@ again:
|
||||
key->private.mm = mm;
|
||||
key->private.address = address;
|
||||
|
||||
get_futex_key_refs(key); /* implies smp_mb(); (B) */
|
||||
|
||||
} else {
|
||||
struct inode *inode;
|
||||
|
||||
@@ -692,40 +735,14 @@ again:
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a reference unless it is about to be freed. Previously
|
||||
* this reference was taken by ihold under the page lock
|
||||
* pinning the inode in place so i_lock was unnecessary. The
|
||||
* only way for this check to fail is if the inode was
|
||||
* truncated in parallel which is almost certainly an
|
||||
* application bug. In such a case, just retry.
|
||||
*
|
||||
* We are not calling into get_futex_key_refs() in file-backed
|
||||
* cases, therefore a successful atomic_inc return below will
|
||||
* guarantee that get_futex_key() will still imply smp_mb(); (B).
|
||||
*/
|
||||
if (!atomic_inc_not_zero(&inode->i_count)) {
|
||||
rcu_read_unlock();
|
||||
put_page(page);
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* Should be impossible but lets be paranoid for now */
|
||||
if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
|
||||
err = -EFAULT;
|
||||
rcu_read_unlock();
|
||||
iput(inode);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
|
||||
key->shared.inode = inode;
|
||||
key->shared.i_seq = get_inode_sequence_number(inode);
|
||||
key->shared.pgoff = basepage_index(tail);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
get_futex_key_refs(key); /* implies smp_mb(); (B) */
|
||||
|
||||
out:
|
||||
put_page(page);
|
||||
return err;
|
||||
|
@@ -128,8 +128,6 @@ static inline void unregister_handler_proc(unsigned int irq,
|
||||
|
||||
extern bool irq_can_set_affinity_usr(unsigned int irq);
|
||||
|
||||
extern int irq_select_affinity_usr(unsigned int irq);
|
||||
|
||||
extern void irq_set_thread_affinity(struct irq_desc *desc);
|
||||
|
||||
extern int irq_do_set_affinity(struct irq_data *data,
|
||||
|
@@ -481,23 +481,9 @@ int irq_setup_affinity(struct irq_desc *desc)
|
||||
{
|
||||
return irq_select_affinity(irq_desc_get_irq(desc));
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_AUTO_IRQ_AFFINITY */
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* Called when a bogus affinity is set via /proc/irq
|
||||
*/
|
||||
int irq_select_affinity_usr(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
raw_spin_lock_irqsave(&desc->lock, flags);
|
||||
ret = irq_setup_affinity(desc);
|
||||
raw_spin_unlock_irqrestore(&desc->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* irq_set_vcpu_affinity - Set vcpu affinity for the interrupt
|
||||
|
@@ -111,6 +111,28 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
|
||||
return show_irq_affinity(AFFINITY_LIST, m);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_AUTO_IRQ_AFFINITY
|
||||
static inline int irq_select_affinity_usr(unsigned int irq)
|
||||
{
|
||||
/*
|
||||
* If the interrupt is started up already then this fails. The
|
||||
* interrupt is assigned to an online CPU already. There is no
|
||||
* point to move it around randomly. Tell user space that the
|
||||
* selected mask is bogus.
|
||||
*
|
||||
* If not then any change to the affinity is pointless because the
|
||||
* startup code invokes irq_setup_affinity() which will select
|
||||
* a online CPU anyway.
|
||||
*/
|
||||
return -EINVAL;
|
||||
}
|
||||
#else
|
||||
/* ALPHA magic affinity auto selector. Keep it for historical reasons. */
|
||||
static inline int irq_select_affinity_usr(unsigned int irq)
|
||||
{
|
||||
return irq_select_affinity(irq);
|
||||
}
|
||||
#endif
|
||||
|
||||
static ssize_t write_irq_affinity(int type, struct file *file,
|
||||
const char __user *buffer, size_t count, loff_t *pos)
|
||||
|
@@ -1681,7 +1681,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
|
||||
* hibernation for allocations made while saving the image and for device
|
||||
* drivers, in case they need to allocate memory from their hibernation
|
||||
* callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
|
||||
* estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through
|
||||
* estimate) and reserved_size divided by PAGE_SIZE (which is tunable through
|
||||
* /sys/power/reserved_size, respectively). To make this happen, we compute the
|
||||
* total number of available page frames and allocate at least
|
||||
*
|
||||
|
@@ -131,11 +131,12 @@ static void s2idle_loop(void)
|
||||
* to avoid them upfront.
|
||||
*/
|
||||
for (;;) {
|
||||
if (s2idle_ops && s2idle_ops->wake)
|
||||
s2idle_ops->wake();
|
||||
|
||||
if (pm_wakeup_pending())
|
||||
if (s2idle_ops && s2idle_ops->wake) {
|
||||
if (s2idle_ops->wake())
|
||||
break;
|
||||
} else if (pm_wakeup_pending()) {
|
||||
break;
|
||||
}
|
||||
|
||||
pm_wakeup_clear(false);
|
||||
|
||||
|
@@ -552,27 +552,32 @@ void resched_cpu(int cpu)
|
||||
*/
|
||||
int get_nohz_timer_target(void)
|
||||
{
|
||||
int i, cpu = smp_processor_id();
|
||||
int i, cpu = smp_processor_id(), default_cpu = -1;
|
||||
struct sched_domain *sd;
|
||||
|
||||
if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
|
||||
return cpu;
|
||||
if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
|
||||
if (!idle_cpu(cpu))
|
||||
return cpu;
|
||||
default_cpu = cpu;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_domain(cpu, sd) {
|
||||
for_each_cpu(i, sched_domain_span(sd)) {
|
||||
for_each_cpu_and(i, sched_domain_span(sd),
|
||||
housekeeping_cpumask(HK_FLAG_TIMER)) {
|
||||
if (cpu == i)
|
||||
continue;
|
||||
|
||||
if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) {
|
||||
if (!idle_cpu(i)) {
|
||||
cpu = i;
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
|
||||
cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
|
||||
if (default_cpu == -1)
|
||||
default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
|
||||
cpu = default_cpu;
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
return cpu;
|
||||
@@ -1442,17 +1447,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static inline bool is_per_cpu_kthread(struct task_struct *p)
|
||||
{
|
||||
if (!(p->flags & PF_KTHREAD))
|
||||
return false;
|
||||
|
||||
if (p->nr_cpus_allowed != 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Per-CPU kthreads are allowed to run on !active && online CPUs, see
|
||||
* __set_cpus_allowed_ptr() and select_fallback_rq().
|
||||
@@ -3669,28 +3663,32 @@ static void sched_tick_remote(struct work_struct *work)
|
||||
* statistics and checks timeslices in a time-independent way, regardless
|
||||
* of when exactly it is running.
|
||||
*/
|
||||
if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
|
||||
if (!tick_nohz_tick_stopped_cpu(cpu))
|
||||
goto out_requeue;
|
||||
|
||||
rq_lock_irq(rq, &rf);
|
||||
curr = rq->curr;
|
||||
if (is_idle_task(curr) || cpu_is_offline(cpu))
|
||||
if (cpu_is_offline(cpu))
|
||||
goto out_unlock;
|
||||
|
||||
curr = rq->curr;
|
||||
update_rq_clock(rq);
|
||||
delta = rq_clock_task(rq) - curr->se.exec_start;
|
||||
|
||||
/*
|
||||
* Make sure the next tick runs within a reasonable
|
||||
* amount of time.
|
||||
*/
|
||||
WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
|
||||
if (!is_idle_task(curr)) {
|
||||
/*
|
||||
* Make sure the next tick runs within a reasonable
|
||||
* amount of time.
|
||||
*/
|
||||
delta = rq_clock_task(rq) - curr->se.exec_start;
|
||||
WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
|
||||
}
|
||||
curr->sched_class->task_tick(rq, curr, 0);
|
||||
|
||||
calc_load_nohz_remote(rq);
|
||||
out_unlock:
|
||||
rq_unlock_irq(rq, &rf);
|
||||
|
||||
out_requeue:
|
||||
|
||||
/*
|
||||
* Run the remote tick once per second (1Hz). This arbitrary
|
||||
* frequency is large enough to avoid overload but short enough
|
||||
@@ -7063,8 +7061,15 @@ void sched_move_task(struct task_struct *tsk)
|
||||
|
||||
if (queued)
|
||||
enqueue_task(rq, tsk, queue_flags);
|
||||
if (running)
|
||||
if (running) {
|
||||
set_next_task(rq, tsk);
|
||||
/*
|
||||
* After changing group, the running task may have joined a
|
||||
* throttled one but it's still the running task. Trigger a
|
||||
* resched to make sure that task can still run.
|
||||
*/
|
||||
resched_curr(rq);
|
||||
}
|
||||
|
||||
task_rq_unlock(rq, tsk, &rf);
|
||||
}
|
||||
@@ -7260,7 +7265,7 @@ capacity_from_percent(char *buf)
|
||||
&req.percent);
|
||||
if (req.ret)
|
||||
return req;
|
||||
if (req.percent > UCLAMP_PERCENT_SCALE) {
|
||||
if ((u64)req.percent > UCLAMP_PERCENT_SCALE) {
|
||||
req.ret = -ERANGE;
|
||||
return req;
|
||||
}
|
||||
|
@@ -3516,7 +3516,6 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
|
||||
* attach_entity_load_avg - attach this entity to its cfs_rq load avg
|
||||
* @cfs_rq: cfs_rq to attach to
|
||||
* @se: sched_entity to attach
|
||||
* @flags: migration hints
|
||||
*
|
||||
* Must call update_cfs_rq_load_avg() before this, since we rely on
|
||||
* cfs_rq->avg.last_update_time being current.
|
||||
@@ -5912,6 +5911,20 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
(available_idle_cpu(prev) || sched_idle_cpu(prev)))
|
||||
return prev;
|
||||
|
||||
/*
|
||||
* Allow a per-cpu kthread to stack with the wakee if the
|
||||
* kworker thread and the tasks previous CPUs are the same.
|
||||
* The assumption is that the wakee queued work for the
|
||||
* per-cpu kthread that is now complete and the wakeup is
|
||||
* essentially a sync wakeup. An obvious example of this
|
||||
* pattern is IO completions.
|
||||
*/
|
||||
if (is_per_cpu_kthread(current) &&
|
||||
prev == smp_processor_id() &&
|
||||
this_rq()->nr_running <= 1) {
|
||||
return prev;
|
||||
}
|
||||
|
||||
/* Check a recently used CPU as a potential idle candidate: */
|
||||
recent_used_cpu = p->recent_used_cpu;
|
||||
if (recent_used_cpu != prev &&
|
||||
@@ -8658,10 +8671,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
/*
|
||||
* Try to use spare capacity of local group without overloading it or
|
||||
* emptying busiest.
|
||||
* XXX Spreading tasks across NUMA nodes is not always the best policy
|
||||
* and special care should be taken for SD_NUMA domain level before
|
||||
* spreading the tasks. For now, load_balance() fully relies on
|
||||
* NUMA_BALANCING and fbq_classify_group/rq to override the decision.
|
||||
*/
|
||||
if (local->group_type == group_has_spare) {
|
||||
if (busiest->group_type > group_fully_busy) {
|
||||
@@ -8701,16 +8710,37 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
env->migration_type = migrate_task;
|
||||
lsub_positive(&nr_diff, local->sum_nr_running);
|
||||
env->imbalance = nr_diff >> 1;
|
||||
return;
|
||||
} else {
|
||||
|
||||
/*
|
||||
* If there is no overload, we just want to even the number of
|
||||
* idle cpus.
|
||||
*/
|
||||
env->migration_type = migrate_task;
|
||||
env->imbalance = max_t(long, 0, (local->idle_cpus -
|
||||
busiest->idle_cpus) >> 1);
|
||||
}
|
||||
|
||||
/* Consider allowing a small imbalance between NUMA groups */
|
||||
if (env->sd->flags & SD_NUMA) {
|
||||
unsigned int imbalance_min;
|
||||
|
||||
/*
|
||||
* Compute an allowed imbalance based on a simple
|
||||
* pair of communicating tasks that should remain
|
||||
* local and ignore them.
|
||||
*
|
||||
* NOTE: Generally this would have been based on
|
||||
* the domain size and this was evaluated. However,
|
||||
* the benefit is similar across a range of workloads
|
||||
* and machines but scaling by the domain size adds
|
||||
* the risk that lower domains have to be rebalanced.
|
||||
*/
|
||||
imbalance_min = 2;
|
||||
if (busiest->sum_nr_running <= imbalance_min)
|
||||
env->imbalance = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is no overload, we just want to even the number of
|
||||
* idle cpus.
|
||||
*/
|
||||
env->migration_type = migrate_task;
|
||||
env->imbalance = max_t(long, 0, (local->idle_cpus -
|
||||
busiest->idle_cpus) >> 1);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@@ -231,16 +231,11 @@ static inline int calc_load_read_idx(void)
|
||||
return calc_load_idx & 1;
|
||||
}
|
||||
|
||||
void calc_load_nohz_start(void)
|
||||
static void calc_load_nohz_fold(struct rq *rq)
|
||||
{
|
||||
struct rq *this_rq = this_rq();
|
||||
long delta;
|
||||
|
||||
/*
|
||||
* We're going into NO_HZ mode, if there's any pending delta, fold it
|
||||
* into the pending NO_HZ delta.
|
||||
*/
|
||||
delta = calc_load_fold_active(this_rq, 0);
|
||||
delta = calc_load_fold_active(rq, 0);
|
||||
if (delta) {
|
||||
int idx = calc_load_write_idx();
|
||||
|
||||
@@ -248,6 +243,24 @@ void calc_load_nohz_start(void)
|
||||
}
|
||||
}
|
||||
|
||||
void calc_load_nohz_start(void)
|
||||
{
|
||||
/*
|
||||
* We're going into NO_HZ mode, if there's any pending delta, fold it
|
||||
* into the pending NO_HZ delta.
|
||||
*/
|
||||
calc_load_nohz_fold(this_rq());
|
||||
}
|
||||
|
||||
/*
|
||||
* Keep track of the load for NOHZ_FULL, must be called between
|
||||
* calc_load_nohz_{start,stop}().
|
||||
*/
|
||||
void calc_load_nohz_remote(struct rq *rq)
|
||||
{
|
||||
calc_load_nohz_fold(rq);
|
||||
}
|
||||
|
||||
void calc_load_nohz_stop(void)
|
||||
{
|
||||
struct rq *this_rq = this_rq();
|
||||
@@ -268,7 +281,7 @@ void calc_load_nohz_stop(void)
|
||||
this_rq->calc_load_update += LOAD_FREQ;
|
||||
}
|
||||
|
||||
static long calc_load_nohz_fold(void)
|
||||
static long calc_load_nohz_read(void)
|
||||
{
|
||||
int idx = calc_load_read_idx();
|
||||
long delta = 0;
|
||||
@@ -323,7 +336,7 @@ static void calc_global_nohz(void)
|
||||
}
|
||||
#else /* !CONFIG_NO_HZ_COMMON */
|
||||
|
||||
static inline long calc_load_nohz_fold(void) { return 0; }
|
||||
static inline long calc_load_nohz_read(void) { return 0; }
|
||||
static inline void calc_global_nohz(void) { }
|
||||
|
||||
#endif /* CONFIG_NO_HZ_COMMON */
|
||||
@@ -346,7 +359,7 @@ void calc_global_load(unsigned long ticks)
|
||||
/*
|
||||
* Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
|
||||
*/
|
||||
delta = calc_load_nohz_fold();
|
||||
delta = calc_load_nohz_read();
|
||||
if (delta)
|
||||
atomic_long_add(delta, &calc_load_tasks);
|
||||
|
||||
|
@@ -1199,6 +1199,9 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!nbytes)
|
||||
return -EINVAL;
|
||||
|
||||
buf_size = min(nbytes, sizeof(buf));
|
||||
if (copy_from_user(buf, user_buf, buf_size))
|
||||
return -EFAULT;
|
||||
|
@@ -896,7 +896,7 @@ struct rq {
|
||||
*/
|
||||
unsigned long nr_uninterruptible;
|
||||
|
||||
struct task_struct *curr;
|
||||
struct task_struct __rcu *curr;
|
||||
struct task_struct *idle;
|
||||
struct task_struct *stop;
|
||||
unsigned long next_balance;
|
||||
@@ -2479,3 +2479,16 @@ static inline void membarrier_switch_mm(struct rq *rq,
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline bool is_per_cpu_kthread(struct task_struct *p)
|
||||
{
|
||||
if (!(p->flags & PF_KTHREAD))
|
||||
return false;
|
||||
|
||||
if (p->nr_cpus_allowed != 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
@@ -413,27 +413,32 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
|
||||
{
|
||||
struct sigqueue *q = NULL;
|
||||
struct user_struct *user;
|
||||
int sigpending;
|
||||
|
||||
/*
|
||||
* Protect access to @t credentials. This can go away when all
|
||||
* callers hold rcu read lock.
|
||||
*
|
||||
* NOTE! A pending signal will hold on to the user refcount,
|
||||
* and we get/put the refcount only when the sigpending count
|
||||
* changes from/to zero.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
user = get_uid(__task_cred(t)->user);
|
||||
atomic_inc(&user->sigpending);
|
||||
user = __task_cred(t)->user;
|
||||
sigpending = atomic_inc_return(&user->sigpending);
|
||||
if (sigpending == 1)
|
||||
get_uid(user);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (override_rlimit ||
|
||||
atomic_read(&user->sigpending) <=
|
||||
task_rlimit(t, RLIMIT_SIGPENDING)) {
|
||||
if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
|
||||
q = kmem_cache_alloc(sigqueue_cachep, flags);
|
||||
} else {
|
||||
print_dropped_signal(sig);
|
||||
}
|
||||
|
||||
if (unlikely(q == NULL)) {
|
||||
atomic_dec(&user->sigpending);
|
||||
free_uid(user);
|
||||
if (atomic_dec_and_test(&user->sigpending))
|
||||
free_uid(user);
|
||||
} else {
|
||||
INIT_LIST_HEAD(&q->list);
|
||||
q->flags = 0;
|
||||
@@ -447,8 +452,8 @@ static void __sigqueue_free(struct sigqueue *q)
|
||||
{
|
||||
if (q->flags & SIGQUEUE_PREALLOC)
|
||||
return;
|
||||
atomic_dec(&q->user->sigpending);
|
||||
free_uid(q->user);
|
||||
if (atomic_dec_and_test(&q->user->sigpending))
|
||||
free_uid(q->user);
|
||||
kmem_cache_free(sigqueue_cachep, q);
|
||||
}
|
||||
|
||||
|
@@ -805,15 +805,6 @@ static struct ctl_table kern_table[] = {
|
||||
.extra2 = &maxolduid,
|
||||
},
|
||||
#ifdef CONFIG_S390
|
||||
#ifdef CONFIG_MATHEMU
|
||||
{
|
||||
.procname = "ieee_emulation_warnings",
|
||||
.data = &sysctl_ieee_emulation_warnings,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.procname = "userprocess_debug",
|
||||
.data = &show_unhandled_signals,
|
||||
|
@@ -449,49 +449,6 @@ time64_t mktime64(const unsigned int year0, const unsigned int mon0,
|
||||
}
|
||||
EXPORT_SYMBOL(mktime64);
|
||||
|
||||
/**
|
||||
* ns_to_timespec - Convert nanoseconds to timespec
|
||||
* @nsec: the nanoseconds value to be converted
|
||||
*
|
||||
* Returns the timespec representation of the nsec parameter.
|
||||
*/
|
||||
struct timespec ns_to_timespec(const s64 nsec)
|
||||
{
|
||||
struct timespec ts;
|
||||
s32 rem;
|
||||
|
||||
if (!nsec)
|
||||
return (struct timespec) {0, 0};
|
||||
|
||||
ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem);
|
||||
if (unlikely(rem < 0)) {
|
||||
ts.tv_sec--;
|
||||
rem += NSEC_PER_SEC;
|
||||
}
|
||||
ts.tv_nsec = rem;
|
||||
|
||||
return ts;
|
||||
}
|
||||
EXPORT_SYMBOL(ns_to_timespec);
|
||||
|
||||
/**
|
||||
* ns_to_timeval - Convert nanoseconds to timeval
|
||||
* @nsec: the nanoseconds value to be converted
|
||||
*
|
||||
* Returns the timeval representation of the nsec parameter.
|
||||
*/
|
||||
struct timeval ns_to_timeval(const s64 nsec)
|
||||
{
|
||||
struct timespec ts = ns_to_timespec(nsec);
|
||||
struct timeval tv;
|
||||
|
||||
tv.tv_sec = ts.tv_sec;
|
||||
tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000;
|
||||
|
||||
return tv;
|
||||
}
|
||||
EXPORT_SYMBOL(ns_to_timeval);
|
||||
|
||||
struct __kernel_old_timeval ns_to_kernel_old_timeval(const s64 nsec)
|
||||
{
|
||||
struct timespec64 ts = ns_to_timespec64(nsec);
|
||||
|
@@ -143,8 +143,8 @@ if FTRACE
|
||||
|
||||
config BOOTTIME_TRACING
|
||||
bool "Boot-time Tracing support"
|
||||
depends on BOOT_CONFIG && TRACING
|
||||
default y
|
||||
depends on TRACING
|
||||
select BOOT_CONFIG
|
||||
help
|
||||
Enable developer to setup ftrace subsystem via supplemental
|
||||
kernel cmdline at boot time for debugging (tracing) driver
|
||||
|
@@ -335,6 +335,7 @@ static void put_probe_ref(void)
|
||||
|
||||
static void blk_trace_cleanup(struct blk_trace *bt)
|
||||
{
|
||||
synchronize_rcu();
|
||||
blk_trace_free(bt);
|
||||
put_probe_ref();
|
||||
}
|
||||
@@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
|
||||
static int __blk_trace_startstop(struct request_queue *q, int start)
|
||||
{
|
||||
int ret;
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
bt = rcu_dereference_protected(q->blk_trace,
|
||||
lockdep_is_held(&q->blk_trace_mutex));
|
||||
if (bt == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
|
||||
void blk_trace_shutdown(struct request_queue *q)
|
||||
{
|
||||
mutex_lock(&q->blk_trace_mutex);
|
||||
|
||||
if (q->blk_trace) {
|
||||
if (rcu_dereference_protected(q->blk_trace,
|
||||
lockdep_is_held(&q->blk_trace_mutex))) {
|
||||
__blk_trace_startstop(q, 0);
|
||||
__blk_trace_remove(q);
|
||||
}
|
||||
@@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q)
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
/* We don't use the 'bt' value here except as an optimization... */
|
||||
bt = rcu_dereference_protected(q->blk_trace, 1);
|
||||
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
|
||||
return 0;
|
||||
|
||||
@@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
|
||||
static void blk_add_trace_rq(struct request *rq, int error,
|
||||
unsigned int nr_bytes, u32 what, u64 cgid)
|
||||
{
|
||||
struct blk_trace *bt = rq->q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
if (likely(!bt))
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(rq->q->blk_trace);
|
||||
if (likely(!bt)) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
if (blk_rq_is_passthrough(rq))
|
||||
what |= BLK_TC_ACT(BLK_TC_PC);
|
||||
@@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error,
|
||||
|
||||
__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
|
||||
rq->cmd_flags, what, error, 0, NULL, cgid);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void blk_add_trace_rq_insert(void *ignore,
|
||||
@@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
|
||||
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
|
||||
u32 what, int error)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
if (likely(!bt))
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (likely(!bt)) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
|
||||
bio_op(bio), bio->bi_opf, what, error, 0, NULL,
|
||||
blk_trace_bio_get_cgid(q, bio));
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void blk_add_trace_bio_bounce(void *ignore,
|
||||
@@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore,
|
||||
if (bio)
|
||||
blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
|
||||
else {
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (bt)
|
||||
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
|
||||
NULL, 0);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore,
|
||||
if (bio)
|
||||
blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
|
||||
else {
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (bt)
|
||||
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
|
||||
0, 0, NULL, 0);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_add_trace_plug(void *ignore, struct request_queue *q)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (bt)
|
||||
__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
|
||||
unsigned int depth, bool explicit)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (bt) {
|
||||
__be64 rpdu = cpu_to_be64(depth);
|
||||
u32 what;
|
||||
@@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
|
||||
|
||||
__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void blk_add_trace_split(void *ignore,
|
||||
struct request_queue *q, struct bio *bio,
|
||||
unsigned int pdu)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (bt) {
|
||||
__be64 rpdu = cpu_to_be64(pdu);
|
||||
|
||||
@@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore,
|
||||
BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
|
||||
&rpdu, blk_trace_bio_get_cgid(q, bio));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore,
|
||||
struct request_queue *q, struct bio *bio,
|
||||
dev_t dev, sector_t from)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
struct blk_io_trace_remap r;
|
||||
|
||||
if (likely(!bt))
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (likely(!bt)) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
r.device_from = cpu_to_be32(dev);
|
||||
r.device_to = cpu_to_be32(bio_dev(bio));
|
||||
@@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore,
|
||||
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
|
||||
bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
|
||||
sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore,
|
||||
struct request *rq, dev_t dev,
|
||||
sector_t from)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
struct blk_io_trace_remap r;
|
||||
|
||||
if (likely(!bt))
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (likely(!bt)) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
r.device_from = cpu_to_be32(dev);
|
||||
r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
|
||||
@@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore,
|
||||
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
|
||||
rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
|
||||
sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q,
|
||||
struct request *rq,
|
||||
void *data, size_t len)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
struct blk_trace *bt;
|
||||
|
||||
if (likely(!bt))
|
||||
rcu_read_lock();
|
||||
bt = rcu_dereference(q->blk_trace);
|
||||
if (likely(!bt)) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
|
||||
BLK_TA_DRV_DATA, 0, len, data,
|
||||
blk_trace_request_get_cgid(q, rq));
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_add_driver_data);
|
||||
|
||||
@@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
|
||||
return -EINVAL;
|
||||
|
||||
put_probe_ref();
|
||||
synchronize_rcu();
|
||||
blk_trace_free(bt);
|
||||
return 0;
|
||||
}
|
||||
@@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
|
||||
struct hd_struct *p = dev_to_part(dev);
|
||||
struct request_queue *q;
|
||||
struct block_device *bdev;
|
||||
struct blk_trace *bt;
|
||||
ssize_t ret = -ENXIO;
|
||||
|
||||
bdev = bdget(part_devt(p));
|
||||
@@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
|
||||
|
||||
mutex_lock(&q->blk_trace_mutex);
|
||||
|
||||
bt = rcu_dereference_protected(q->blk_trace,
|
||||
lockdep_is_held(&q->blk_trace_mutex));
|
||||
if (attr == &dev_attr_enable) {
|
||||
ret = sprintf(buf, "%u\n", !!q->blk_trace);
|
||||
ret = sprintf(buf, "%u\n", !!bt);
|
||||
goto out_unlock_bdev;
|
||||
}
|
||||
|
||||
if (q->blk_trace == NULL)
|
||||
if (bt == NULL)
|
||||
ret = sprintf(buf, "disabled\n");
|
||||
else if (attr == &dev_attr_act_mask)
|
||||
ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
|
||||
ret = blk_trace_mask2str(buf, bt->act_mask);
|
||||
else if (attr == &dev_attr_pid)
|
||||
ret = sprintf(buf, "%u\n", q->blk_trace->pid);
|
||||
ret = sprintf(buf, "%u\n", bt->pid);
|
||||
else if (attr == &dev_attr_start_lba)
|
||||
ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
|
||||
ret = sprintf(buf, "%llu\n", bt->start_lba);
|
||||
else if (attr == &dev_attr_end_lba)
|
||||
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
|
||||
ret = sprintf(buf, "%llu\n", bt->end_lba);
|
||||
|
||||
out_unlock_bdev:
|
||||
mutex_unlock(&q->blk_trace_mutex);
|
||||
@@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
|
||||
struct block_device *bdev;
|
||||
struct request_queue *q;
|
||||
struct hd_struct *p;
|
||||
struct blk_trace *bt;
|
||||
u64 value;
|
||||
ssize_t ret = -EINVAL;
|
||||
|
||||
@@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
|
||||
|
||||
mutex_lock(&q->blk_trace_mutex);
|
||||
|
||||
bt = rcu_dereference_protected(q->blk_trace,
|
||||
lockdep_is_held(&q->blk_trace_mutex));
|
||||
if (attr == &dev_attr_enable) {
|
||||
if (!!value == !!q->blk_trace) {
|
||||
if (!!value == !!bt) {
|
||||
ret = 0;
|
||||
goto out_unlock_bdev;
|
||||
}
|
||||
@@ -1844,18 +1896,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
if (q->blk_trace == NULL)
|
||||
if (bt == NULL)
|
||||
ret = blk_trace_setup_queue(q, bdev);
|
||||
|
||||
if (ret == 0) {
|
||||
if (attr == &dev_attr_act_mask)
|
||||
q->blk_trace->act_mask = value;
|
||||
bt->act_mask = value;
|
||||
else if (attr == &dev_attr_pid)
|
||||
q->blk_trace->pid = value;
|
||||
bt->pid = value;
|
||||
else if (attr == &dev_attr_start_lba)
|
||||
q->blk_trace->start_lba = value;
|
||||
bt->start_lba = value;
|
||||
else if (attr == &dev_attr_end_lba)
|
||||
q->blk_trace->end_lba = value;
|
||||
bt->end_lba = value;
|
||||
}
|
||||
|
||||
out_unlock_bdev:
|
||||
|
@@ -111,11 +111,11 @@ static int __init test_gen_synth_cmd(void)
|
||||
/* Create some bogus values just for testing */
|
||||
|
||||
vals[0] = 777; /* next_pid_field */
|
||||
vals[1] = (u64)"hula hoops"; /* next_comm_field */
|
||||
vals[1] = (u64)(long)"hula hoops"; /* next_comm_field */
|
||||
vals[2] = 1000000; /* ts_ns */
|
||||
vals[3] = 1000; /* ts_ms */
|
||||
vals[4] = smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)"thneed"; /* my_string_field */
|
||||
vals[4] = raw_smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)(long)"thneed"; /* my_string_field */
|
||||
vals[6] = 598; /* my_int_field */
|
||||
|
||||
/* Now generate a gen_synth_test event */
|
||||
@@ -218,11 +218,11 @@ static int __init test_empty_synth_event(void)
|
||||
/* Create some bogus values just for testing */
|
||||
|
||||
vals[0] = 777; /* next_pid_field */
|
||||
vals[1] = (u64)"tiddlywinks"; /* next_comm_field */
|
||||
vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */
|
||||
vals[2] = 1000000; /* ts_ns */
|
||||
vals[3] = 1000; /* ts_ms */
|
||||
vals[4] = smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)"thneed_2.0"; /* my_string_field */
|
||||
vals[4] = raw_smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)(long)"thneed_2.0"; /* my_string_field */
|
||||
vals[6] = 399; /* my_int_field */
|
||||
|
||||
/* Now trace an empty_synth_test event */
|
||||
@@ -290,11 +290,11 @@ static int __init test_create_synth_event(void)
|
||||
/* Create some bogus values just for testing */
|
||||
|
||||
vals[0] = 777; /* next_pid_field */
|
||||
vals[1] = (u64)"tiddlywinks"; /* next_comm_field */
|
||||
vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */
|
||||
vals[2] = 1000000; /* ts_ns */
|
||||
vals[3] = 1000; /* ts_ms */
|
||||
vals[4] = smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)"thneed"; /* my_string_field */
|
||||
vals[4] = raw_smp_processor_id(); /* cpu */
|
||||
vals[5] = (u64)(long)"thneed"; /* my_string_field */
|
||||
vals[6] = 398; /* my_int_field */
|
||||
|
||||
/* Now generate a create_synth_test event */
|
||||
@@ -330,7 +330,7 @@ static int __init test_add_next_synth_val(void)
|
||||
goto out;
|
||||
|
||||
/* next_comm_field */
|
||||
ret = synth_event_add_next_val((u64)"slinky", &trace_state);
|
||||
ret = synth_event_add_next_val((u64)(long)"slinky", &trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -345,12 +345,12 @@ static int __init test_add_next_synth_val(void)
|
||||
goto out;
|
||||
|
||||
/* cpu */
|
||||
ret = synth_event_add_next_val(smp_processor_id(), &trace_state);
|
||||
ret = synth_event_add_next_val(raw_smp_processor_id(), &trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* my_string_field */
|
||||
ret = synth_event_add_next_val((u64)"thneed_2.01", &trace_state);
|
||||
ret = synth_event_add_next_val((u64)(long)"thneed_2.01", &trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -388,7 +388,7 @@ static int __init test_add_synth_val(void)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = synth_event_add_val("cpu", smp_processor_id(), &trace_state);
|
||||
ret = synth_event_add_val("cpu", raw_smp_processor_id(), &trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -396,12 +396,12 @@ static int __init test_add_synth_val(void)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = synth_event_add_val("next_comm_field", (u64)"silly putty",
|
||||
ret = synth_event_add_val("next_comm_field", (u64)(long)"silly putty",
|
||||
&trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = synth_event_add_val("my_string_field", (u64)"thneed_9",
|
||||
ret = synth_event_add_val("my_string_field", (u64)(long)"thneed_9",
|
||||
&trace_state);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -423,13 +423,13 @@ static int __init test_trace_synth_event(void)
|
||||
|
||||
/* Trace some bogus values just for testing */
|
||||
ret = synth_event_trace(create_synth_test, 7, /* number of values */
|
||||
444, /* next_pid_field */
|
||||
(u64)"clackers", /* next_comm_field */
|
||||
1000000, /* ts_ns */
|
||||
1000, /* ts_ms */
|
||||
smp_processor_id(), /* cpu */
|
||||
(u64)"Thneed", /* my_string_field */
|
||||
999); /* my_int_field */
|
||||
(u64)444, /* next_pid_field */
|
||||
(u64)(long)"clackers", /* next_comm_field */
|
||||
(u64)1000000, /* ts_ns */
|
||||
(u64)1000, /* ts_ms */
|
||||
(u64)raw_smp_processor_id(), /* cpu */
|
||||
(u64)(long)"Thneed", /* my_string_field */
|
||||
(u64)999); /* my_int_field */
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@@ -1837,6 +1837,7 @@ static __init int init_trace_selftests(void)
|
||||
|
||||
pr_info("Running postponed tracer tests:\n");
|
||||
|
||||
tracing_selftest_running = true;
|
||||
list_for_each_entry_safe(p, n, &postponed_selftests, list) {
|
||||
/* This loop can take minutes when sanitizers are enabled, so
|
||||
* lets make sure we allow RCU processing.
|
||||
@@ -1859,6 +1860,7 @@ static __init int init_trace_selftests(void)
|
||||
list_del(&p->list);
|
||||
kfree(p);
|
||||
}
|
||||
tracing_selftest_running = false;
|
||||
|
||||
out:
|
||||
mutex_unlock(&trace_types_lock);
|
||||
|
@@ -821,6 +821,29 @@ static const char *synth_field_fmt(char *type)
|
||||
return fmt;
|
||||
}
|
||||
|
||||
static void print_synth_event_num_val(struct trace_seq *s,
|
||||
char *print_fmt, char *name,
|
||||
int size, u64 val, char *space)
|
||||
{
|
||||
switch (size) {
|
||||
case 1:
|
||||
trace_seq_printf(s, print_fmt, name, (u8)val, space);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
trace_seq_printf(s, print_fmt, name, (u16)val, space);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
trace_seq_printf(s, print_fmt, name, (u32)val, space);
|
||||
break;
|
||||
|
||||
default:
|
||||
trace_seq_printf(s, print_fmt, name, val, space);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static enum print_line_t print_synth_event(struct trace_iterator *iter,
|
||||
int flags,
|
||||
struct trace_event *event)
|
||||
@@ -859,10 +882,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
|
||||
} else {
|
||||
struct trace_print_flags __flags[] = {
|
||||
__def_gfpflag_names, {-1, NULL} };
|
||||
char *space = (i == se->n_fields - 1 ? "" : " ");
|
||||
|
||||
trace_seq_printf(s, print_fmt, se->fields[i]->name,
|
||||
entry->fields[n_u64],
|
||||
i == se->n_fields - 1 ? "" : " ");
|
||||
print_synth_event_num_val(s, print_fmt,
|
||||
se->fields[i]->name,
|
||||
se->fields[i]->size,
|
||||
entry->fields[n_u64],
|
||||
space);
|
||||
|
||||
if (strcmp(se->fields[i]->type, "gfp_t") == 0) {
|
||||
trace_seq_puts(s, " (");
|
||||
@@ -1798,6 +1824,62 @@ void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synth_event_cmd_init);
|
||||
|
||||
static inline int
|
||||
__synth_event_trace_start(struct trace_event_file *file,
|
||||
struct synth_event_trace_state *trace_state)
|
||||
{
|
||||
int entry_size, fields_size = 0;
|
||||
int ret = 0;
|
||||
|
||||
memset(trace_state, '\0', sizeof(*trace_state));
|
||||
|
||||
/*
|
||||
* Normal event tracing doesn't get called at all unless the
|
||||
* ENABLED bit is set (which attaches the probe thus allowing
|
||||
* this code to be called, etc). Because this is called
|
||||
* directly by the user, we don't have that but we still need
|
||||
* to honor not logging when disabled. For the the iterated
|
||||
* trace case, we save the enabed state upon start and just
|
||||
* ignore the following data calls.
|
||||
*/
|
||||
if (!(file->flags & EVENT_FILE_FL_ENABLED) ||
|
||||
trace_trigger_soft_disabled(file)) {
|
||||
trace_state->disabled = true;
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
trace_state->event = file->event_call->data;
|
||||
|
||||
fields_size = trace_state->event->n_u64 * sizeof(u64);
|
||||
|
||||
/*
|
||||
* Avoid ring buffer recursion detection, as this event
|
||||
* is being performed within another event.
|
||||
*/
|
||||
trace_state->buffer = file->tr->array_buffer.buffer;
|
||||
ring_buffer_nest_start(trace_state->buffer);
|
||||
|
||||
entry_size = sizeof(*trace_state->entry) + fields_size;
|
||||
trace_state->entry = trace_event_buffer_reserve(&trace_state->fbuffer,
|
||||
file,
|
||||
entry_size);
|
||||
if (!trace_state->entry) {
|
||||
ring_buffer_nest_end(trace_state->buffer);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void
|
||||
__synth_event_trace_end(struct synth_event_trace_state *trace_state)
|
||||
{
|
||||
trace_event_buffer_commit(&trace_state->fbuffer);
|
||||
|
||||
ring_buffer_nest_end(trace_state->buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* synth_event_trace - Trace a synthetic event
|
||||
* @file: The trace_event_file representing the synthetic event
|
||||
@@ -1819,71 +1901,61 @@ EXPORT_SYMBOL_GPL(synth_event_cmd_init);
|
||||
*/
|
||||
int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
|
||||
{
|
||||
struct trace_event_buffer fbuffer;
|
||||
struct synth_trace_event *entry;
|
||||
struct trace_buffer *buffer;
|
||||
struct synth_event *event;
|
||||
struct synth_event_trace_state state;
|
||||
unsigned int i, n_u64;
|
||||
int fields_size = 0;
|
||||
va_list args;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Normal event generation doesn't get called at all unless
|
||||
* the ENABLED bit is set (which attaches the probe thus
|
||||
* allowing this code to be called, etc). Because this is
|
||||
* called directly by the user, we don't have that but we
|
||||
* still need to honor not logging when disabled.
|
||||
*/
|
||||
if (!(file->flags & EVENT_FILE_FL_ENABLED))
|
||||
return 0;
|
||||
ret = __synth_event_trace_start(file, &state);
|
||||
if (ret) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0; /* just disabled, not really an error */
|
||||
return ret;
|
||||
}
|
||||
|
||||
event = file->event_call->data;
|
||||
|
||||
if (n_vals != event->n_fields)
|
||||
return -EINVAL;
|
||||
|
||||
if (trace_trigger_soft_disabled(file))
|
||||
return -EINVAL;
|
||||
|
||||
fields_size = event->n_u64 * sizeof(u64);
|
||||
|
||||
/*
|
||||
* Avoid ring buffer recursion detection, as this event
|
||||
* is being performed within another event.
|
||||
*/
|
||||
buffer = file->tr->array_buffer.buffer;
|
||||
ring_buffer_nest_start(buffer);
|
||||
|
||||
entry = trace_event_buffer_reserve(&fbuffer, file,
|
||||
sizeof(*entry) + fields_size);
|
||||
if (!entry) {
|
||||
if (n_vals != state.event->n_fields) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
va_start(args, n_vals);
|
||||
for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
|
||||
for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
|
||||
u64 val;
|
||||
|
||||
val = va_arg(args, u64);
|
||||
|
||||
if (event->fields[i]->is_string) {
|
||||
if (state.event->fields[i]->is_string) {
|
||||
char *str_val = (char *)(long)val;
|
||||
char *str_field = (char *)&entry->fields[n_u64];
|
||||
char *str_field = (char *)&state.entry->fields[n_u64];
|
||||
|
||||
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
|
||||
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
|
||||
} else {
|
||||
entry->fields[n_u64] = val;
|
||||
struct synth_field *field = state.event->fields[i];
|
||||
|
||||
switch (field->size) {
|
||||
case 1:
|
||||
*(u8 *)&state.entry->fields[n_u64] = (u8)val;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*(u16 *)&state.entry->fields[n_u64] = (u16)val;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
*(u32 *)&state.entry->fields[n_u64] = (u32)val;
|
||||
break;
|
||||
|
||||
default:
|
||||
state.entry->fields[n_u64] = val;
|
||||
break;
|
||||
}
|
||||
n_u64++;
|
||||
}
|
||||
}
|
||||
va_end(args);
|
||||
|
||||
trace_event_buffer_commit(&fbuffer);
|
||||
out:
|
||||
ring_buffer_nest_end(buffer);
|
||||
__synth_event_trace_end(&state);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1910,64 +1982,55 @@ EXPORT_SYMBOL_GPL(synth_event_trace);
|
||||
int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
|
||||
unsigned int n_vals)
|
||||
{
|
||||
struct trace_event_buffer fbuffer;
|
||||
struct synth_trace_event *entry;
|
||||
struct trace_buffer *buffer;
|
||||
struct synth_event *event;
|
||||
struct synth_event_trace_state state;
|
||||
unsigned int i, n_u64;
|
||||
int fields_size = 0;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Normal event generation doesn't get called at all unless
|
||||
* the ENABLED bit is set (which attaches the probe thus
|
||||
* allowing this code to be called, etc). Because this is
|
||||
* called directly by the user, we don't have that but we
|
||||
* still need to honor not logging when disabled.
|
||||
*/
|
||||
if (!(file->flags & EVENT_FILE_FL_ENABLED))
|
||||
return 0;
|
||||
ret = __synth_event_trace_start(file, &state);
|
||||
if (ret) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0; /* just disabled, not really an error */
|
||||
return ret;
|
||||
}
|
||||
|
||||
event = file->event_call->data;
|
||||
|
||||
if (n_vals != event->n_fields)
|
||||
return -EINVAL;
|
||||
|
||||
if (trace_trigger_soft_disabled(file))
|
||||
return -EINVAL;
|
||||
|
||||
fields_size = event->n_u64 * sizeof(u64);
|
||||
|
||||
/*
|
||||
* Avoid ring buffer recursion detection, as this event
|
||||
* is being performed within another event.
|
||||
*/
|
||||
buffer = file->tr->array_buffer.buffer;
|
||||
ring_buffer_nest_start(buffer);
|
||||
|
||||
entry = trace_event_buffer_reserve(&fbuffer, file,
|
||||
sizeof(*entry) + fields_size);
|
||||
if (!entry) {
|
||||
if (n_vals != state.event->n_fields) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
|
||||
if (event->fields[i]->is_string) {
|
||||
for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
|
||||
if (state.event->fields[i]->is_string) {
|
||||
char *str_val = (char *)(long)vals[i];
|
||||
char *str_field = (char *)&entry->fields[n_u64];
|
||||
char *str_field = (char *)&state.entry->fields[n_u64];
|
||||
|
||||
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
|
||||
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
|
||||
} else {
|
||||
entry->fields[n_u64] = vals[i];
|
||||
struct synth_field *field = state.event->fields[i];
|
||||
u64 val = vals[i];
|
||||
|
||||
switch (field->size) {
|
||||
case 1:
|
||||
*(u8 *)&state.entry->fields[n_u64] = (u8)val;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*(u16 *)&state.entry->fields[n_u64] = (u16)val;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
*(u32 *)&state.entry->fields[n_u64] = (u32)val;
|
||||
break;
|
||||
|
||||
default:
|
||||
state.entry->fields[n_u64] = val;
|
||||
break;
|
||||
}
|
||||
n_u64++;
|
||||
}
|
||||
}
|
||||
|
||||
trace_event_buffer_commit(&fbuffer);
|
||||
out:
|
||||
ring_buffer_nest_end(buffer);
|
||||
__synth_event_trace_end(&state);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2004,58 +2067,15 @@ EXPORT_SYMBOL_GPL(synth_event_trace_array);
|
||||
int synth_event_trace_start(struct trace_event_file *file,
|
||||
struct synth_event_trace_state *trace_state)
|
||||
{
|
||||
struct synth_trace_event *entry;
|
||||
int fields_size = 0;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
if (!trace_state) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (!trace_state)
|
||||
return -EINVAL;
|
||||
|
||||
memset(trace_state, '\0', sizeof(*trace_state));
|
||||
ret = __synth_event_trace_start(file, trace_state);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0; /* just disabled, not really an error */
|
||||
|
||||
/*
|
||||
* Normal event tracing doesn't get called at all unless the
|
||||
* ENABLED bit is set (which attaches the probe thus allowing
|
||||
* this code to be called, etc). Because this is called
|
||||
* directly by the user, we don't have that but we still need
|
||||
* to honor not logging when disabled. For the the iterated
|
||||
* trace case, we save the enabed state upon start and just
|
||||
* ignore the following data calls.
|
||||
*/
|
||||
if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
|
||||
trace_state->enabled = false;
|
||||
goto out;
|
||||
}
|
||||
|
||||
trace_state->enabled = true;
|
||||
|
||||
trace_state->event = file->event_call->data;
|
||||
|
||||
if (trace_trigger_soft_disabled(file)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
fields_size = trace_state->event->n_u64 * sizeof(u64);
|
||||
|
||||
/*
|
||||
* Avoid ring buffer recursion detection, as this event
|
||||
* is being performed within another event.
|
||||
*/
|
||||
trace_state->buffer = file->tr->array_buffer.buffer;
|
||||
ring_buffer_nest_start(trace_state->buffer);
|
||||
|
||||
entry = trace_event_buffer_reserve(&trace_state->fbuffer, file,
|
||||
sizeof(*entry) + fields_size);
|
||||
if (!entry) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
trace_state->entry = entry;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synth_event_trace_start);
|
||||
@@ -2088,7 +2108,7 @@ static int __synth_event_add_val(const char *field_name, u64 val,
|
||||
trace_state->add_next = true;
|
||||
}
|
||||
|
||||
if (!trace_state->enabled)
|
||||
if (trace_state->disabled)
|
||||
goto out;
|
||||
|
||||
event = trace_state->event;
|
||||
@@ -2122,8 +2142,25 @@ static int __synth_event_add_val(const char *field_name, u64 val,
|
||||
|
||||
str_field = (char *)&entry->fields[field->offset];
|
||||
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
|
||||
} else
|
||||
entry->fields[field->offset] = val;
|
||||
} else {
|
||||
switch (field->size) {
|
||||
case 1:
|
||||
*(u8 *)&trace_state->entry->fields[field->offset] = (u8)val;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*(u16 *)&trace_state->entry->fields[field->offset] = (u16)val;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
*(u32 *)&trace_state->entry->fields[field->offset] = (u32)val;
|
||||
break;
|
||||
|
||||
default:
|
||||
trace_state->entry->fields[field->offset] = val;
|
||||
break;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -2223,9 +2260,7 @@ int synth_event_trace_end(struct synth_event_trace_state *trace_state)
|
||||
if (!trace_state)
|
||||
return -EINVAL;
|
||||
|
||||
trace_event_buffer_commit(&trace_state->fbuffer);
|
||||
|
||||
ring_buffer_nest_end(trace_state->buffer);
|
||||
__synth_event_trace_end(trace_state);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1012,7 +1012,7 @@ int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...)
|
||||
{
|
||||
struct dynevent_arg arg;
|
||||
va_list args;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
if (cmd->type != DYNEVENT_TYPE_KPROBE)
|
||||
return -EINVAL;
|
||||
|
Reference in New Issue
Block a user