Merge remote-tracking branch 'origin' into irqdomain/next
This commit is contained in:
@@ -62,8 +62,10 @@ static async_cookie_t next_cookie = 1;
|
||||
#define MAX_WORK 32768
|
||||
|
||||
static LIST_HEAD(async_pending);
|
||||
static LIST_HEAD(async_running);
|
||||
static ASYNC_DOMAIN(async_running);
|
||||
static LIST_HEAD(async_domains);
|
||||
static DEFINE_SPINLOCK(async_lock);
|
||||
static DEFINE_MUTEX(async_register_mutex);
|
||||
|
||||
struct async_entry {
|
||||
struct list_head list;
|
||||
@@ -71,7 +73,7 @@ struct async_entry {
|
||||
async_cookie_t cookie;
|
||||
async_func_ptr *func;
|
||||
void *data;
|
||||
struct list_head *running;
|
||||
struct async_domain *running;
|
||||
};
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(async_done);
|
||||
@@ -82,13 +84,12 @@ static atomic_t entry_count;
|
||||
/*
|
||||
* MUST be called with the lock held!
|
||||
*/
|
||||
static async_cookie_t __lowest_in_progress(struct list_head *running)
|
||||
static async_cookie_t __lowest_in_progress(struct async_domain *running)
|
||||
{
|
||||
struct async_entry *entry;
|
||||
|
||||
if (!list_empty(running)) {
|
||||
entry = list_first_entry(running,
|
||||
struct async_entry, list);
|
||||
if (!list_empty(&running->domain)) {
|
||||
entry = list_first_entry(&running->domain, typeof(*entry), list);
|
||||
return entry->cookie;
|
||||
}
|
||||
|
||||
@@ -99,7 +100,7 @@ static async_cookie_t __lowest_in_progress(struct list_head *running)
|
||||
return next_cookie; /* "infinity" value */
|
||||
}
|
||||
|
||||
static async_cookie_t lowest_in_progress(struct list_head *running)
|
||||
static async_cookie_t lowest_in_progress(struct async_domain *running)
|
||||
{
|
||||
unsigned long flags;
|
||||
async_cookie_t ret;
|
||||
@@ -119,10 +120,11 @@ static void async_run_entry_fn(struct work_struct *work)
|
||||
container_of(work, struct async_entry, work);
|
||||
unsigned long flags;
|
||||
ktime_t uninitialized_var(calltime), delta, rettime;
|
||||
struct async_domain *running = entry->running;
|
||||
|
||||
/* 1) move self to the running queue */
|
||||
spin_lock_irqsave(&async_lock, flags);
|
||||
list_move_tail(&entry->list, entry->running);
|
||||
list_move_tail(&entry->list, &running->domain);
|
||||
spin_unlock_irqrestore(&async_lock, flags);
|
||||
|
||||
/* 2) run (and print duration) */
|
||||
@@ -145,6 +147,8 @@ static void async_run_entry_fn(struct work_struct *work)
|
||||
/* 3) remove self from the running queue */
|
||||
spin_lock_irqsave(&async_lock, flags);
|
||||
list_del(&entry->list);
|
||||
if (running->registered && --running->count == 0)
|
||||
list_del_init(&running->node);
|
||||
|
||||
/* 4) free the entry */
|
||||
kfree(entry);
|
||||
@@ -156,7 +160,7 @@ static void async_run_entry_fn(struct work_struct *work)
|
||||
wake_up(&async_done);
|
||||
}
|
||||
|
||||
static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct list_head *running)
|
||||
static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct async_domain *running)
|
||||
{
|
||||
struct async_entry *entry;
|
||||
unsigned long flags;
|
||||
@@ -187,6 +191,8 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct l
|
||||
spin_lock_irqsave(&async_lock, flags);
|
||||
newcookie = entry->cookie = next_cookie++;
|
||||
list_add_tail(&entry->list, &async_pending);
|
||||
if (running->registered && running->count++ == 0)
|
||||
list_add_tail(&running->node, &async_domains);
|
||||
atomic_inc(&entry_count);
|
||||
spin_unlock_irqrestore(&async_lock, flags);
|
||||
|
||||
@@ -223,7 +229,7 @@ EXPORT_SYMBOL_GPL(async_schedule);
|
||||
* Note: This function may be called from atomic or non-atomic contexts.
|
||||
*/
|
||||
async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data,
|
||||
struct list_head *running)
|
||||
struct async_domain *running)
|
||||
{
|
||||
return __async_schedule(ptr, data, running);
|
||||
}
|
||||
@@ -236,22 +242,52 @@ EXPORT_SYMBOL_GPL(async_schedule_domain);
|
||||
*/
|
||||
void async_synchronize_full(void)
|
||||
{
|
||||
mutex_lock(&async_register_mutex);
|
||||
do {
|
||||
async_synchronize_cookie(next_cookie);
|
||||
} while (!list_empty(&async_running) || !list_empty(&async_pending));
|
||||
struct async_domain *domain = NULL;
|
||||
|
||||
spin_lock_irq(&async_lock);
|
||||
if (!list_empty(&async_domains))
|
||||
domain = list_first_entry(&async_domains, typeof(*domain), node);
|
||||
spin_unlock_irq(&async_lock);
|
||||
|
||||
async_synchronize_cookie_domain(next_cookie, domain);
|
||||
} while (!list_empty(&async_domains));
|
||||
mutex_unlock(&async_register_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_synchronize_full);
|
||||
|
||||
/**
|
||||
* async_unregister_domain - ensure no more anonymous waiters on this domain
|
||||
* @domain: idle domain to flush out of any async_synchronize_full instances
|
||||
*
|
||||
* async_synchronize_{cookie|full}_domain() are not flushed since callers
|
||||
* of these routines should know the lifetime of @domain
|
||||
*
|
||||
* Prefer ASYNC_DOMAIN_EXCLUSIVE() declarations over flushing
|
||||
*/
|
||||
void async_unregister_domain(struct async_domain *domain)
|
||||
{
|
||||
mutex_lock(&async_register_mutex);
|
||||
spin_lock_irq(&async_lock);
|
||||
WARN_ON(!domain->registered || !list_empty(&domain->node) ||
|
||||
!list_empty(&domain->domain));
|
||||
domain->registered = 0;
|
||||
spin_unlock_irq(&async_lock);
|
||||
mutex_unlock(&async_register_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_unregister_domain);
|
||||
|
||||
/**
|
||||
* async_synchronize_full_domain - synchronize all asynchronous function within a certain domain
|
||||
* @list: running list to synchronize on
|
||||
* @domain: running list to synchronize on
|
||||
*
|
||||
* This function waits until all asynchronous function calls for the
|
||||
* synchronization domain specified by the running list @list have been done.
|
||||
* synchronization domain specified by the running list @domain have been done.
|
||||
*/
|
||||
void async_synchronize_full_domain(struct list_head *list)
|
||||
void async_synchronize_full_domain(struct async_domain *domain)
|
||||
{
|
||||
async_synchronize_cookie_domain(next_cookie, list);
|
||||
async_synchronize_cookie_domain(next_cookie, domain);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
|
||||
|
||||
@@ -261,14 +297,16 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
|
||||
* @running: running list to synchronize on
|
||||
*
|
||||
* This function waits until all asynchronous function calls for the
|
||||
* synchronization domain specified by the running list @list submitted
|
||||
* synchronization domain specified by running list @running submitted
|
||||
* prior to @cookie have been done.
|
||||
*/
|
||||
void async_synchronize_cookie_domain(async_cookie_t cookie,
|
||||
struct list_head *running)
|
||||
void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *running)
|
||||
{
|
||||
ktime_t uninitialized_var(starttime), delta, endtime;
|
||||
|
||||
if (!running)
|
||||
return;
|
||||
|
||||
if (initcall_debug && system_state == SYSTEM_BOOTING) {
|
||||
printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current));
|
||||
starttime = ktime_get();
|
||||
|
@@ -384,7 +384,7 @@ static void audit_hold_skb(struct sk_buff *skb)
|
||||
static void audit_printk_skb(struct sk_buff *skb)
|
||||
{
|
||||
struct nlmsghdr *nlh = nlmsg_hdr(skb);
|
||||
char *data = NLMSG_DATA(nlh);
|
||||
char *data = nlmsg_data(nlh);
|
||||
|
||||
if (nlh->nlmsg_type != AUDIT_EOE) {
|
||||
if (printk_ratelimit())
|
||||
@@ -516,14 +516,15 @@ struct sk_buff *audit_make_reply(int pid, int seq, int type, int done,
|
||||
if (!skb)
|
||||
return NULL;
|
||||
|
||||
nlh = NLMSG_NEW(skb, pid, seq, t, size, flags);
|
||||
data = NLMSG_DATA(nlh);
|
||||
nlh = nlmsg_put(skb, pid, seq, t, size, flags);
|
||||
if (!nlh)
|
||||
goto out_kfree_skb;
|
||||
data = nlmsg_data(nlh);
|
||||
memcpy(data, payload, size);
|
||||
return skb;
|
||||
|
||||
nlmsg_failure: /* Used by NLMSG_NEW */
|
||||
if (skb)
|
||||
kfree_skb(skb);
|
||||
out_kfree_skb:
|
||||
kfree_skb(skb);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -680,7 +681,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
sessionid = audit_get_sessionid(current);
|
||||
security_task_getsecid(current, &sid);
|
||||
seq = nlh->nlmsg_seq;
|
||||
data = NLMSG_DATA(nlh);
|
||||
data = nlmsg_data(nlh);
|
||||
|
||||
switch (msg_type) {
|
||||
case AUDIT_GET:
|
||||
@@ -961,14 +962,17 @@ static void audit_receive(struct sk_buff *skb)
|
||||
static int __init audit_init(void)
|
||||
{
|
||||
int i;
|
||||
struct netlink_kernel_cfg cfg = {
|
||||
.input = audit_receive,
|
||||
};
|
||||
|
||||
if (audit_initialized == AUDIT_DISABLED)
|
||||
return 0;
|
||||
|
||||
printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
|
||||
audit_default ? "enabled" : "disabled");
|
||||
audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0,
|
||||
audit_receive, NULL, THIS_MODULE);
|
||||
audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT,
|
||||
THIS_MODULE, &cfg);
|
||||
if (!audit_sock)
|
||||
audit_panic("cannot initialize netlink socket");
|
||||
else
|
||||
@@ -1060,13 +1064,15 @@ static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx,
|
||||
|
||||
ab->skb = nlmsg_new(AUDIT_BUFSIZ, gfp_mask);
|
||||
if (!ab->skb)
|
||||
goto nlmsg_failure;
|
||||
goto err;
|
||||
|
||||
nlh = NLMSG_NEW(ab->skb, 0, 0, type, 0, 0);
|
||||
nlh = nlmsg_put(ab->skb, 0, 0, type, 0, 0);
|
||||
if (!nlh)
|
||||
goto out_kfree_skb;
|
||||
|
||||
return ab;
|
||||
|
||||
nlmsg_failure: /* Used by NLMSG_NEW */
|
||||
out_kfree_skb:
|
||||
kfree_skb(ab->skb);
|
||||
ab->skb = NULL;
|
||||
err:
|
||||
|
@@ -595,7 +595,7 @@ void audit_trim_trees(void)
|
||||
|
||||
root_mnt = collect_mounts(&path);
|
||||
path_put(&path);
|
||||
if (!root_mnt)
|
||||
if (IS_ERR(root_mnt))
|
||||
goto skip_it;
|
||||
|
||||
spin_lock(&hash_lock);
|
||||
@@ -669,8 +669,8 @@ int audit_add_tree_rule(struct audit_krule *rule)
|
||||
goto Err;
|
||||
mnt = collect_mounts(&path);
|
||||
path_put(&path);
|
||||
if (!mnt) {
|
||||
err = -ENOMEM;
|
||||
if (IS_ERR(mnt)) {
|
||||
err = PTR_ERR(mnt);
|
||||
goto Err;
|
||||
}
|
||||
|
||||
@@ -719,8 +719,8 @@ int audit_tag_tree(char *old, char *new)
|
||||
return err;
|
||||
tagged = collect_mounts(&path2);
|
||||
path_put(&path2);
|
||||
if (!tagged)
|
||||
return -ENOMEM;
|
||||
if (IS_ERR(tagged))
|
||||
return PTR_ERR(tagged);
|
||||
|
||||
err = kern_path(old, 0, &path1);
|
||||
if (err) {
|
||||
|
@@ -355,34 +355,15 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
|
||||
/* Get path information necessary for adding watches. */
|
||||
static int audit_get_nd(struct audit_watch *watch, struct path *parent)
|
||||
{
|
||||
struct nameidata nd;
|
||||
struct dentry *d;
|
||||
int err;
|
||||
|
||||
err = kern_path_parent(watch->path, &nd);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (nd.last_type != LAST_NORM) {
|
||||
path_put(&nd.path);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
|
||||
d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
|
||||
if (IS_ERR(d)) {
|
||||
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
|
||||
path_put(&nd.path);
|
||||
struct dentry *d = kern_path_locked(watch->path, parent);
|
||||
if (IS_ERR(d))
|
||||
return PTR_ERR(d);
|
||||
}
|
||||
mutex_unlock(&parent->dentry->d_inode->i_mutex);
|
||||
if (d->d_inode) {
|
||||
/* update watch filter fields */
|
||||
watch->dev = d->d_inode->i_sb->s_dev;
|
||||
watch->ino = d->d_inode->i_ino;
|
||||
}
|
||||
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
|
||||
|
||||
*parent = nd.path;
|
||||
dput(d);
|
||||
return 0;
|
||||
}
|
||||
|
@@ -822,7 +822,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock);
|
||||
*/
|
||||
|
||||
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
|
||||
static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
|
||||
static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
|
||||
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
|
||||
static int cgroup_populate_dir(struct cgroup *cgrp);
|
||||
static const struct inode_operations cgroup_dir_inode_operations;
|
||||
@@ -901,13 +901,10 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
|
||||
/*
|
||||
* We want to drop the active superblock reference from the
|
||||
* cgroup creation after all the dentry refs are gone -
|
||||
* kill_sb gets mighty unhappy otherwise. Mark
|
||||
* dentry->d_fsdata with cgroup_diput() to tell
|
||||
* cgroup_d_release() to call deactivate_super().
|
||||
* Drop the active superblock reference that we took when we
|
||||
* created the cgroup
|
||||
*/
|
||||
dentry->d_fsdata = cgroup_diput;
|
||||
deactivate_super(cgrp->root->sb);
|
||||
|
||||
/*
|
||||
* if we're getting rid of the cgroup, refcount should ensure
|
||||
@@ -933,13 +930,6 @@ static int cgroup_delete(const struct dentry *d)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void cgroup_d_release(struct dentry *dentry)
|
||||
{
|
||||
/* did cgroup_diput() tell me to deactivate super? */
|
||||
if (dentry->d_fsdata == cgroup_diput)
|
||||
deactivate_super(dentry->d_sb);
|
||||
}
|
||||
|
||||
static void remove_dir(struct dentry *d)
|
||||
{
|
||||
struct dentry *parent = dget(d->d_parent);
|
||||
@@ -964,7 +954,7 @@ static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
|
||||
|
||||
dget(d);
|
||||
d_delete(d);
|
||||
simple_unlink(d->d_inode, d);
|
||||
simple_unlink(cgrp->dentry->d_inode, d);
|
||||
list_del_init(&cfe->node);
|
||||
dput(d);
|
||||
|
||||
@@ -1078,28 +1068,24 @@ static int rebind_subsystems(struct cgroupfs_root *root,
|
||||
BUG_ON(cgrp->subsys[i]);
|
||||
BUG_ON(!dummytop->subsys[i]);
|
||||
BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
|
||||
mutex_lock(&ss->hierarchy_mutex);
|
||||
cgrp->subsys[i] = dummytop->subsys[i];
|
||||
cgrp->subsys[i]->cgroup = cgrp;
|
||||
list_move(&ss->sibling, &root->subsys_list);
|
||||
ss->root = root;
|
||||
if (ss->bind)
|
||||
ss->bind(cgrp);
|
||||
mutex_unlock(&ss->hierarchy_mutex);
|
||||
/* refcount was already taken, and we're keeping it */
|
||||
} else if (bit & removed_bits) {
|
||||
/* We're removing this subsystem */
|
||||
BUG_ON(ss == NULL);
|
||||
BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
|
||||
BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
|
||||
mutex_lock(&ss->hierarchy_mutex);
|
||||
if (ss->bind)
|
||||
ss->bind(dummytop);
|
||||
dummytop->subsys[i]->cgroup = dummytop;
|
||||
cgrp->subsys[i] = NULL;
|
||||
subsys[i]->root = &rootnode;
|
||||
list_move(&ss->sibling, &rootnode.subsys_list);
|
||||
mutex_unlock(&ss->hierarchy_mutex);
|
||||
/* subsystem is now free - drop reference on module */
|
||||
module_put(ss->module);
|
||||
} else if (bit & final_bits) {
|
||||
@@ -1547,7 +1533,6 @@ static int cgroup_get_rootdir(struct super_block *sb)
|
||||
static const struct dentry_operations cgroup_dops = {
|
||||
.d_iput = cgroup_diput,
|
||||
.d_delete = cgroup_delete,
|
||||
.d_release = cgroup_d_release,
|
||||
};
|
||||
|
||||
struct inode *inode =
|
||||
@@ -1598,7 +1583,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
|
||||
opts.new_root = new_root;
|
||||
|
||||
/* Locate an existing or new sb for this hierarchy */
|
||||
sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
|
||||
sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
|
||||
if (IS_ERR(sb)) {
|
||||
ret = PTR_ERR(sb);
|
||||
cgroup_drop_root(opts.new_root);
|
||||
@@ -2581,7 +2566,7 @@ static const struct inode_operations cgroup_dir_inode_operations = {
|
||||
.rename = cgroup_rename,
|
||||
};
|
||||
|
||||
static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
|
||||
static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
|
||||
{
|
||||
if (dentry->d_name.len > NAME_MAX)
|
||||
return ERR_PTR(-ENAMETOOLONG);
|
||||
@@ -3894,8 +3879,12 @@ static void css_dput_fn(struct work_struct *work)
|
||||
{
|
||||
struct cgroup_subsys_state *css =
|
||||
container_of(work, struct cgroup_subsys_state, dput_work);
|
||||
struct dentry *dentry = css->cgroup->dentry;
|
||||
struct super_block *sb = dentry->d_sb;
|
||||
|
||||
dput(css->cgroup->dentry);
|
||||
atomic_inc(&sb->s_active);
|
||||
dput(dentry);
|
||||
deactivate_super(sb);
|
||||
}
|
||||
|
||||
static void init_cgroup_css(struct cgroup_subsys_state *css,
|
||||
@@ -3922,37 +3911,6 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
|
||||
set_bit(CSS_CLEAR_CSS_REFS, &css->flags);
|
||||
}
|
||||
|
||||
static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
|
||||
{
|
||||
/* We need to take each hierarchy_mutex in a consistent order */
|
||||
int i;
|
||||
|
||||
/*
|
||||
* No worry about a race with rebind_subsystems that might mess up the
|
||||
* locking order, since both parties are under cgroup_mutex.
|
||||
*/
|
||||
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
||||
struct cgroup_subsys *ss = subsys[i];
|
||||
if (ss == NULL)
|
||||
continue;
|
||||
if (ss->root == root)
|
||||
mutex_lock(&ss->hierarchy_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
||||
struct cgroup_subsys *ss = subsys[i];
|
||||
if (ss == NULL)
|
||||
continue;
|
||||
if (ss->root == root)
|
||||
mutex_unlock(&ss->hierarchy_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* cgroup_create - create a cgroup
|
||||
* @parent: cgroup that will be parent of the new cgroup
|
||||
@@ -4013,9 +3971,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
||||
ss->post_clone(cgrp);
|
||||
}
|
||||
|
||||
cgroup_lock_hierarchy(root);
|
||||
list_add(&cgrp->sibling, &cgrp->parent->children);
|
||||
cgroup_unlock_hierarchy(root);
|
||||
root->number_of_cgroups++;
|
||||
|
||||
err = cgroup_create_dir(cgrp, dentry, mode);
|
||||
@@ -4042,9 +3998,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
|
||||
|
||||
err_remove:
|
||||
|
||||
cgroup_lock_hierarchy(root);
|
||||
list_del(&cgrp->sibling);
|
||||
cgroup_unlock_hierarchy(root);
|
||||
root->number_of_cgroups--;
|
||||
|
||||
err_destroy:
|
||||
@@ -4252,10 +4206,8 @@ again:
|
||||
list_del_init(&cgrp->release_list);
|
||||
raw_spin_unlock(&release_list_lock);
|
||||
|
||||
cgroup_lock_hierarchy(cgrp->root);
|
||||
/* delete this cgroup from parent->children */
|
||||
list_del_init(&cgrp->sibling);
|
||||
cgroup_unlock_hierarchy(cgrp->root);
|
||||
|
||||
list_del_init(&cgrp->allcg_node);
|
||||
|
||||
@@ -4329,8 +4281,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
|
||||
* need to invoke fork callbacks here. */
|
||||
BUG_ON(!list_empty(&init_task.tasks));
|
||||
|
||||
mutex_init(&ss->hierarchy_mutex);
|
||||
lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
|
||||
ss->active = 1;
|
||||
|
||||
/* this function shouldn't be used with modular subsystems, since they
|
||||
@@ -4457,8 +4407,6 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
|
||||
}
|
||||
write_unlock(&css_set_lock);
|
||||
|
||||
mutex_init(&ss->hierarchy_mutex);
|
||||
lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
|
||||
ss->active = 1;
|
||||
|
||||
/* success! */
|
||||
|
@@ -14,6 +14,7 @@
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kmsg_dump.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sysrq.h>
|
||||
@@ -2040,8 +2041,15 @@ static int kdb_env(int argc, const char **argv)
|
||||
*/
|
||||
static int kdb_dmesg(int argc, const char **argv)
|
||||
{
|
||||
char *syslog_data[4], *start, *end, c = '\0', *p;
|
||||
int diag, logging, logsize, lines = 0, adjust = 0, n;
|
||||
int diag;
|
||||
int logging;
|
||||
int lines = 0;
|
||||
int adjust = 0;
|
||||
int n = 0;
|
||||
int skip = 0;
|
||||
struct kmsg_dumper dumper = { .active = 1 };
|
||||
size_t len;
|
||||
char buf[201];
|
||||
|
||||
if (argc > 2)
|
||||
return KDB_ARGCOUNT;
|
||||
@@ -2064,22 +2072,10 @@ static int kdb_dmesg(int argc, const char **argv)
|
||||
kdb_set(2, setargs);
|
||||
}
|
||||
|
||||
/* syslog_data[0,1] physical start, end+1. syslog_data[2,3]
|
||||
* logical start, end+1. */
|
||||
kdb_syslog_data(syslog_data);
|
||||
if (syslog_data[2] == syslog_data[3])
|
||||
return 0;
|
||||
logsize = syslog_data[1] - syslog_data[0];
|
||||
start = syslog_data[2];
|
||||
end = syslog_data[3];
|
||||
#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0])
|
||||
for (n = 0, p = start; p < end; ++p) {
|
||||
c = *KDB_WRAP(p);
|
||||
if (c == '\n')
|
||||
++n;
|
||||
}
|
||||
if (c != '\n')
|
||||
++n;
|
||||
kmsg_dump_rewind_nolock(&dumper);
|
||||
while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL))
|
||||
n++;
|
||||
|
||||
if (lines < 0) {
|
||||
if (adjust >= n)
|
||||
kdb_printf("buffer only contains %d lines, nothing "
|
||||
@@ -2087,21 +2083,11 @@ static int kdb_dmesg(int argc, const char **argv)
|
||||
else if (adjust - lines >= n)
|
||||
kdb_printf("buffer only contains %d lines, last %d "
|
||||
"lines printed\n", n, n - adjust);
|
||||
if (adjust) {
|
||||
for (; start < end && adjust; ++start) {
|
||||
if (*KDB_WRAP(start) == '\n')
|
||||
--adjust;
|
||||
}
|
||||
if (start < end)
|
||||
++start;
|
||||
}
|
||||
for (p = start; p < end && lines; ++p) {
|
||||
if (*KDB_WRAP(p) == '\n')
|
||||
++lines;
|
||||
}
|
||||
end = p;
|
||||
skip = adjust;
|
||||
lines = abs(lines);
|
||||
} else if (lines > 0) {
|
||||
int skip = n - (adjust + lines);
|
||||
skip = n - lines - adjust;
|
||||
lines = abs(lines);
|
||||
if (adjust >= n) {
|
||||
kdb_printf("buffer only contains %d lines, "
|
||||
"nothing printed\n", n);
|
||||
@@ -2112,35 +2098,24 @@ static int kdb_dmesg(int argc, const char **argv)
|
||||
kdb_printf("buffer only contains %d lines, first "
|
||||
"%d lines printed\n", n, lines);
|
||||
}
|
||||
for (; start < end && skip; ++start) {
|
||||
if (*KDB_WRAP(start) == '\n')
|
||||
--skip;
|
||||
}
|
||||
for (p = start; p < end && lines; ++p) {
|
||||
if (*KDB_WRAP(p) == '\n')
|
||||
--lines;
|
||||
}
|
||||
end = p;
|
||||
} else {
|
||||
lines = n;
|
||||
}
|
||||
/* Do a line at a time (max 200 chars) to reduce protocol overhead */
|
||||
c = '\n';
|
||||
while (start != end) {
|
||||
char buf[201];
|
||||
p = buf;
|
||||
if (KDB_FLAG(CMD_INTERRUPT))
|
||||
return 0;
|
||||
while (start < end && (c = *KDB_WRAP(start)) &&
|
||||
(p - buf) < sizeof(buf)-1) {
|
||||
++start;
|
||||
*p++ = c;
|
||||
if (c == '\n')
|
||||
break;
|
||||
|
||||
if (skip >= n || skip < 0)
|
||||
return 0;
|
||||
|
||||
kmsg_dump_rewind_nolock(&dumper);
|
||||
while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) {
|
||||
if (skip) {
|
||||
skip--;
|
||||
continue;
|
||||
}
|
||||
*p = '\0';
|
||||
kdb_printf("%s", buf);
|
||||
if (!lines--)
|
||||
break;
|
||||
|
||||
kdb_printf("%.*s\n", (int)len - 1, buf);
|
||||
}
|
||||
if (c != '\n')
|
||||
kdb_printf("\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -205,7 +205,6 @@ extern char kdb_grep_string[];
|
||||
extern int kdb_grep_leading;
|
||||
extern int kdb_grep_trailing;
|
||||
extern char *kdb_cmds[];
|
||||
extern void kdb_syslog_data(char *syslog_data[]);
|
||||
extern unsigned long kdb_task_state_string(const char *);
|
||||
extern char kdb_task_state_char (const struct task_struct *);
|
||||
extern unsigned long kdb_task_state(const struct task_struct *p,
|
||||
|
@@ -1645,6 +1645,8 @@ perf_install_in_context(struct perf_event_context *ctx,
|
||||
lockdep_assert_held(&ctx->mutex);
|
||||
|
||||
event->ctx = ctx;
|
||||
if (event->cpu != -1)
|
||||
event->cpu = cpu;
|
||||
|
||||
if (!task) {
|
||||
/*
|
||||
@@ -6252,6 +6254,8 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
}
|
||||
}
|
||||
|
||||
get_online_cpus();
|
||||
|
||||
event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
|
||||
NULL, NULL);
|
||||
if (IS_ERR(event)) {
|
||||
@@ -6304,7 +6308,7 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
/*
|
||||
* Get the target context (task or percpu):
|
||||
*/
|
||||
ctx = find_get_context(pmu, task, cpu);
|
||||
ctx = find_get_context(pmu, task, event->cpu);
|
||||
if (IS_ERR(ctx)) {
|
||||
err = PTR_ERR(ctx);
|
||||
goto err_alloc;
|
||||
@@ -6377,20 +6381,23 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
mutex_lock(&ctx->mutex);
|
||||
|
||||
if (move_group) {
|
||||
perf_install_in_context(ctx, group_leader, cpu);
|
||||
synchronize_rcu();
|
||||
perf_install_in_context(ctx, group_leader, event->cpu);
|
||||
get_ctx(ctx);
|
||||
list_for_each_entry(sibling, &group_leader->sibling_list,
|
||||
group_entry) {
|
||||
perf_install_in_context(ctx, sibling, cpu);
|
||||
perf_install_in_context(ctx, sibling, event->cpu);
|
||||
get_ctx(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
perf_install_in_context(ctx, event, cpu);
|
||||
perf_install_in_context(ctx, event, event->cpu);
|
||||
++ctx->generation;
|
||||
perf_unpin_context(ctx);
|
||||
mutex_unlock(&ctx->mutex);
|
||||
|
||||
put_online_cpus();
|
||||
|
||||
event->owner = current;
|
||||
|
||||
mutex_lock(¤t->perf_event_mutex);
|
||||
@@ -6419,6 +6426,7 @@ err_context:
|
||||
err_alloc:
|
||||
free_event(event);
|
||||
err_task:
|
||||
put_online_cpus();
|
||||
if (task)
|
||||
put_task_struct(task);
|
||||
err_group_fd:
|
||||
@@ -6479,6 +6487,39 @@ err:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
|
||||
|
||||
void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
|
||||
{
|
||||
struct perf_event_context *src_ctx;
|
||||
struct perf_event_context *dst_ctx;
|
||||
struct perf_event *event, *tmp;
|
||||
LIST_HEAD(events);
|
||||
|
||||
src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
|
||||
dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
|
||||
|
||||
mutex_lock(&src_ctx->mutex);
|
||||
list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
|
||||
event_entry) {
|
||||
perf_remove_from_context(event);
|
||||
put_ctx(src_ctx);
|
||||
list_add(&event->event_entry, &events);
|
||||
}
|
||||
mutex_unlock(&src_ctx->mutex);
|
||||
|
||||
synchronize_rcu();
|
||||
|
||||
mutex_lock(&dst_ctx->mutex);
|
||||
list_for_each_entry_safe(event, tmp, &events, event_entry) {
|
||||
list_del(&event->event_entry);
|
||||
if (event->state >= PERF_EVENT_STATE_OFF)
|
||||
event->state = PERF_EVENT_STATE_INACTIVE;
|
||||
perf_install_in_context(dst_ctx, event, dst_cpu);
|
||||
get_ctx(dst_ctx);
|
||||
}
|
||||
mutex_unlock(&dst_ctx->mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
|
||||
|
||||
static void sync_child_event(struct perf_event *child_event,
|
||||
struct task_struct *child)
|
||||
{
|
||||
|
@@ -38,13 +38,29 @@
|
||||
#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES)
|
||||
#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE
|
||||
|
||||
static struct srcu_struct uprobes_srcu;
|
||||
static struct rb_root uprobes_tree = RB_ROOT;
|
||||
|
||||
static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
|
||||
|
||||
#define UPROBES_HASH_SZ 13
|
||||
|
||||
/*
|
||||
* We need separate register/unregister and mmap/munmap lock hashes because
|
||||
* of mmap_sem nesting.
|
||||
*
|
||||
* uprobe_register() needs to install probes on (potentially) all processes
|
||||
* and thus needs to acquire multiple mmap_sems (consequtively, not
|
||||
* concurrently), whereas uprobe_mmap() is called while holding mmap_sem
|
||||
* for the particular process doing the mmap.
|
||||
*
|
||||
* uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
|
||||
* because of lock order against i_mmap_mutex. This means there's a hole in
|
||||
* the register vma iteration where a mmap() can happen.
|
||||
*
|
||||
* Thus uprobe_register() can race with uprobe_mmap() and we can try and
|
||||
* install a probe where one is already installed.
|
||||
*/
|
||||
|
||||
/* serialize (un)register */
|
||||
static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
|
||||
|
||||
@@ -61,17 +77,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
|
||||
*/
|
||||
static atomic_t uprobe_events = ATOMIC_INIT(0);
|
||||
|
||||
/*
|
||||
* Maintain a temporary per vma info that can be used to search if a vma
|
||||
* has already been handled. This structure is introduced since extending
|
||||
* vm_area_struct wasnt recommended.
|
||||
*/
|
||||
struct vma_info {
|
||||
struct list_head probe_list;
|
||||
struct mm_struct *mm;
|
||||
loff_t vaddr;
|
||||
};
|
||||
|
||||
struct uprobe {
|
||||
struct rb_node rb_node; /* node in the rb tree */
|
||||
atomic_t ref;
|
||||
@@ -100,7 +105,8 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register)
|
||||
if (!is_register)
|
||||
return true;
|
||||
|
||||
if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC))
|
||||
if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED))
|
||||
== (VM_READ|VM_EXEC))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -129,33 +135,17 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
|
||||
static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *ptep;
|
||||
spinlock_t *ptl;
|
||||
unsigned long addr;
|
||||
int err = -EFAULT;
|
||||
spinlock_t *ptl;
|
||||
pte_t *ptep;
|
||||
|
||||
addr = page_address_in_vma(page, vma);
|
||||
if (addr == -EFAULT)
|
||||
goto out;
|
||||
return -EFAULT;
|
||||
|
||||
pgd = pgd_offset(mm, addr);
|
||||
if (!pgd_present(*pgd))
|
||||
goto out;
|
||||
|
||||
pud = pud_offset(pgd, addr);
|
||||
if (!pud_present(*pud))
|
||||
goto out;
|
||||
|
||||
pmd = pmd_offset(pud, addr);
|
||||
if (!pmd_present(*pmd))
|
||||
goto out;
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
ptep = page_check_address(page, mm, addr, &ptl, 0);
|
||||
if (!ptep)
|
||||
goto out;
|
||||
return -EAGAIN;
|
||||
|
||||
get_page(kpage);
|
||||
page_add_new_anon_rmap(kpage, vma, addr);
|
||||
@@ -174,10 +164,8 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct
|
||||
try_to_free_swap(page);
|
||||
put_page(page);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
err = 0;
|
||||
|
||||
out:
|
||||
return err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -222,9 +210,8 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
|
||||
void *vaddr_old, *vaddr_new;
|
||||
struct vm_area_struct *vma;
|
||||
struct uprobe *uprobe;
|
||||
loff_t addr;
|
||||
int ret;
|
||||
|
||||
retry:
|
||||
/* Read the page with vaddr into memory */
|
||||
ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
|
||||
if (ret <= 0)
|
||||
@@ -246,10 +233,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
|
||||
if (mapping != vma->vm_file->f_mapping)
|
||||
goto put_out;
|
||||
|
||||
addr = vma_address(vma, uprobe->offset);
|
||||
if (vaddr != (unsigned long)addr)
|
||||
goto put_out;
|
||||
|
||||
ret = -ENOMEM;
|
||||
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
|
||||
if (!new_page)
|
||||
@@ -267,11 +250,7 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
|
||||
vaddr_new = kmap_atomic(new_page);
|
||||
|
||||
memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
|
||||
|
||||
/* poke the new insn in, ASSUMES we don't cross page boundary */
|
||||
vaddr &= ~PAGE_MASK;
|
||||
BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
|
||||
memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
|
||||
memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE);
|
||||
|
||||
kunmap_atomic(vaddr_new);
|
||||
kunmap_atomic(vaddr_old);
|
||||
@@ -291,6 +270,8 @@ unlock_out:
|
||||
put_out:
|
||||
put_page(old_page);
|
||||
|
||||
if (unlikely(ret == -EAGAIN))
|
||||
goto retry;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -312,7 +293,7 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_
|
||||
void *vaddr_new;
|
||||
int ret;
|
||||
|
||||
ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL);
|
||||
ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
@@ -333,10 +314,20 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
|
||||
uprobe_opcode_t opcode;
|
||||
int result;
|
||||
|
||||
if (current->mm == mm) {
|
||||
pagefault_disable();
|
||||
result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr,
|
||||
sizeof(opcode));
|
||||
pagefault_enable();
|
||||
|
||||
if (likely(result == 0))
|
||||
goto out;
|
||||
}
|
||||
|
||||
result = read_opcode(mm, vaddr, &opcode);
|
||||
if (result)
|
||||
return result;
|
||||
|
||||
out:
|
||||
if (is_swbp_insn(&opcode))
|
||||
return 1;
|
||||
|
||||
@@ -355,7 +346,9 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
|
||||
int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
|
||||
{
|
||||
int result;
|
||||
|
||||
/*
|
||||
* See the comment near uprobes_hash().
|
||||
*/
|
||||
result = is_swbp_at_addr(mm, vaddr);
|
||||
if (result == 1)
|
||||
return -EEXIST;
|
||||
@@ -520,7 +513,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
|
||||
uprobe->inode = igrab(inode);
|
||||
uprobe->offset = offset;
|
||||
init_rwsem(&uprobe->consumer_rwsem);
|
||||
INIT_LIST_HEAD(&uprobe->pending_list);
|
||||
|
||||
/* add to uprobes_tree, sorted on inode:offset */
|
||||
cur_uprobe = insert_uprobe(uprobe);
|
||||
@@ -588,20 +580,22 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
|
||||
}
|
||||
|
||||
static int
|
||||
__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn,
|
||||
unsigned long nbytes, unsigned long offset)
|
||||
__copy_insn(struct address_space *mapping, struct file *filp, char *insn,
|
||||
unsigned long nbytes, loff_t offset)
|
||||
{
|
||||
struct file *filp = vma->vm_file;
|
||||
struct page *page;
|
||||
void *vaddr;
|
||||
unsigned long off1;
|
||||
unsigned long idx;
|
||||
unsigned long off;
|
||||
pgoff_t idx;
|
||||
|
||||
if (!filp)
|
||||
return -EINVAL;
|
||||
|
||||
idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT);
|
||||
off1 = offset &= ~PAGE_MASK;
|
||||
if (!mapping->a_ops->readpage)
|
||||
return -EIO;
|
||||
|
||||
idx = offset >> PAGE_CACHE_SHIFT;
|
||||
off = offset & ~PAGE_MASK;
|
||||
|
||||
/*
|
||||
* Ensure that the page that has the original instruction is
|
||||
@@ -612,22 +606,20 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins
|
||||
return PTR_ERR(page);
|
||||
|
||||
vaddr = kmap_atomic(page);
|
||||
memcpy(insn, vaddr + off1, nbytes);
|
||||
memcpy(insn, vaddr + off, nbytes);
|
||||
kunmap_atomic(vaddr);
|
||||
page_cache_release(page);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
|
||||
static int copy_insn(struct uprobe *uprobe, struct file *filp)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
unsigned long nbytes;
|
||||
int bytes;
|
||||
|
||||
addr &= ~PAGE_MASK;
|
||||
nbytes = PAGE_SIZE - addr;
|
||||
nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK);
|
||||
mapping = uprobe->inode->i_mapping;
|
||||
|
||||
/* Instruction at end of binary; copy only available bytes */
|
||||
@@ -638,13 +630,13 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
|
||||
|
||||
/* Instruction at the page-boundary; copy bytes in second page */
|
||||
if (nbytes < bytes) {
|
||||
if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes,
|
||||
bytes - nbytes, uprobe->offset + nbytes))
|
||||
return -ENOMEM;
|
||||
|
||||
int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes,
|
||||
bytes - nbytes, uprobe->offset + nbytes);
|
||||
if (err)
|
||||
return err;
|
||||
bytes = nbytes;
|
||||
}
|
||||
return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset);
|
||||
return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -672,9 +664,8 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
|
||||
*/
|
||||
static int
|
||||
install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
|
||||
struct vm_area_struct *vma, loff_t vaddr)
|
||||
struct vm_area_struct *vma, unsigned long vaddr)
|
||||
{
|
||||
unsigned long addr;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
@@ -687,20 +678,22 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
|
||||
if (!uprobe->consumers)
|
||||
return -EEXIST;
|
||||
|
||||
addr = (unsigned long)vaddr;
|
||||
|
||||
if (!(uprobe->flags & UPROBE_COPY_INSN)) {
|
||||
ret = copy_insn(uprobe, vma, addr);
|
||||
ret = copy_insn(uprobe, vma->vm_file);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
|
||||
return -EEXIST;
|
||||
return -ENOTSUPP;
|
||||
|
||||
ret = arch_uprobe_analyze_insn(&uprobe->arch, mm);
|
||||
ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* write_opcode() assumes we don't cross page boundary */
|
||||
BUG_ON((uprobe->offset & ~PAGE_MASK) +
|
||||
UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
|
||||
|
||||
uprobe->flags |= UPROBE_COPY_INSN;
|
||||
}
|
||||
|
||||
@@ -713,7 +706,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
|
||||
* Hence increment before and decrement on failure.
|
||||
*/
|
||||
atomic_inc(&mm->uprobes_state.count);
|
||||
ret = set_swbp(&uprobe->arch, mm, addr);
|
||||
ret = set_swbp(&uprobe->arch, mm, vaddr);
|
||||
if (ret)
|
||||
atomic_dec(&mm->uprobes_state.count);
|
||||
|
||||
@@ -721,27 +714,21 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
|
||||
}
|
||||
|
||||
static void
|
||||
remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr)
|
||||
remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
|
||||
{
|
||||
if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true))
|
||||
if (!set_orig_insn(&uprobe->arch, mm, vaddr, true))
|
||||
atomic_dec(&mm->uprobes_state.count);
|
||||
}
|
||||
|
||||
/*
|
||||
* There could be threads that have hit the breakpoint and are entering the
|
||||
* notifier code and trying to acquire the uprobes_treelock. The thread
|
||||
* calling delete_uprobe() that is removing the uprobe from the rb_tree can
|
||||
* race with these threads and might acquire the uprobes_treelock compared
|
||||
* to some of the breakpoint hit threads. In such a case, the breakpoint
|
||||
* hit threads will not find the uprobe. The current unregistering thread
|
||||
* waits till all other threads have hit a breakpoint, to acquire the
|
||||
* uprobes_treelock before the uprobe is removed from the rbtree.
|
||||
* There could be threads that have already hit the breakpoint. They
|
||||
* will recheck the current insn and restart if find_uprobe() fails.
|
||||
* See find_active_uprobe().
|
||||
*/
|
||||
static void delete_uprobe(struct uprobe *uprobe)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
synchronize_srcu(&uprobes_srcu);
|
||||
spin_lock_irqsave(&uprobes_treelock, flags);
|
||||
rb_erase(&uprobe->rb_node, &uprobes_tree);
|
||||
spin_unlock_irqrestore(&uprobes_treelock, flags);
|
||||
@@ -750,139 +737,135 @@ static void delete_uprobe(struct uprobe *uprobe)
|
||||
atomic_dec(&uprobe_events);
|
||||
}
|
||||
|
||||
static struct vma_info *
|
||||
__find_next_vma_info(struct address_space *mapping, struct list_head *head,
|
||||
struct vma_info *vi, loff_t offset, bool is_register)
|
||||
struct map_info {
|
||||
struct map_info *next;
|
||||
struct mm_struct *mm;
|
||||
unsigned long vaddr;
|
||||
};
|
||||
|
||||
static inline struct map_info *free_map_info(struct map_info *info)
|
||||
{
|
||||
struct map_info *next = info->next;
|
||||
kfree(info);
|
||||
return next;
|
||||
}
|
||||
|
||||
static struct map_info *
|
||||
build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
|
||||
{
|
||||
unsigned long pgoff = offset >> PAGE_SHIFT;
|
||||
struct prio_tree_iter iter;
|
||||
struct vm_area_struct *vma;
|
||||
struct vma_info *tmpvi;
|
||||
unsigned long pgoff;
|
||||
int existing_vma;
|
||||
loff_t vaddr;
|
||||
|
||||
pgoff = offset >> PAGE_SHIFT;
|
||||
struct map_info *curr = NULL;
|
||||
struct map_info *prev = NULL;
|
||||
struct map_info *info;
|
||||
int more = 0;
|
||||
|
||||
again:
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
||||
if (!valid_vma(vma, is_register))
|
||||
continue;
|
||||
|
||||
existing_vma = 0;
|
||||
vaddr = vma_address(vma, offset);
|
||||
|
||||
list_for_each_entry(tmpvi, head, probe_list) {
|
||||
if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) {
|
||||
existing_vma = 1;
|
||||
break;
|
||||
}
|
||||
if (!prev && !more) {
|
||||
/*
|
||||
* Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through
|
||||
* reclaim. This is optimistic, no harm done if it fails.
|
||||
*/
|
||||
prev = kmalloc(sizeof(struct map_info),
|
||||
GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
|
||||
if (prev)
|
||||
prev->next = NULL;
|
||||
}
|
||||
if (!prev) {
|
||||
more++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Another vma needs a probe to be installed. However skip
|
||||
* installing the probe if the vma is about to be unlinked.
|
||||
*/
|
||||
if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) {
|
||||
vi->mm = vma->vm_mm;
|
||||
vi->vaddr = vaddr;
|
||||
list_add(&vi->probe_list, head);
|
||||
if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
|
||||
continue;
|
||||
|
||||
return vi;
|
||||
}
|
||||
info = prev;
|
||||
prev = prev->next;
|
||||
info->next = curr;
|
||||
curr = info;
|
||||
|
||||
info->mm = vma->vm_mm;
|
||||
info->vaddr = vma_address(vma, offset);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate in the rmap prio tree and find a vma where a probe has not
|
||||
* yet been inserted.
|
||||
*/
|
||||
static struct vma_info *
|
||||
find_next_vma_info(struct address_space *mapping, struct list_head *head,
|
||||
loff_t offset, bool is_register)
|
||||
{
|
||||
struct vma_info *vi, *retvi;
|
||||
|
||||
vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL);
|
||||
if (!vi)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
retvi = __find_next_vma_info(mapping, head, vi, offset, is_register);
|
||||
mutex_unlock(&mapping->i_mmap_mutex);
|
||||
|
||||
if (!retvi)
|
||||
kfree(vi);
|
||||
if (!more)
|
||||
goto out;
|
||||
|
||||
return retvi;
|
||||
prev = curr;
|
||||
while (curr) {
|
||||
mmput(curr->mm);
|
||||
curr = curr->next;
|
||||
}
|
||||
|
||||
do {
|
||||
info = kmalloc(sizeof(struct map_info), GFP_KERNEL);
|
||||
if (!info) {
|
||||
curr = ERR_PTR(-ENOMEM);
|
||||
goto out;
|
||||
}
|
||||
info->next = prev;
|
||||
prev = info;
|
||||
} while (--more);
|
||||
|
||||
goto again;
|
||||
out:
|
||||
while (prev)
|
||||
prev = free_map_info(prev);
|
||||
return curr;
|
||||
}
|
||||
|
||||
static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
|
||||
{
|
||||
struct list_head try_list;
|
||||
struct vm_area_struct *vma;
|
||||
struct address_space *mapping;
|
||||
struct vma_info *vi, *tmpvi;
|
||||
struct mm_struct *mm;
|
||||
loff_t vaddr;
|
||||
int ret;
|
||||
struct map_info *info;
|
||||
int err = 0;
|
||||
|
||||
mapping = uprobe->inode->i_mapping;
|
||||
INIT_LIST_HEAD(&try_list);
|
||||
info = build_map_info(uprobe->inode->i_mapping,
|
||||
uprobe->offset, is_register);
|
||||
if (IS_ERR(info))
|
||||
return PTR_ERR(info);
|
||||
|
||||
ret = 0;
|
||||
while (info) {
|
||||
struct mm_struct *mm = info->mm;
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
for (;;) {
|
||||
vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register);
|
||||
if (!vi)
|
||||
break;
|
||||
if (err)
|
||||
goto free;
|
||||
|
||||
if (IS_ERR(vi)) {
|
||||
ret = PTR_ERR(vi);
|
||||
break;
|
||||
}
|
||||
down_write(&mm->mmap_sem);
|
||||
vma = find_vma(mm, (unsigned long)info->vaddr);
|
||||
if (!vma || !valid_vma(vma, is_register))
|
||||
goto unlock;
|
||||
|
||||
mm = vi->mm;
|
||||
down_read(&mm->mmap_sem);
|
||||
vma = find_vma(mm, (unsigned long)vi->vaddr);
|
||||
if (!vma || !valid_vma(vma, is_register)) {
|
||||
list_del(&vi->probe_list);
|
||||
kfree(vi);
|
||||
up_read(&mm->mmap_sem);
|
||||
mmput(mm);
|
||||
continue;
|
||||
}
|
||||
vaddr = vma_address(vma, uprobe->offset);
|
||||
if (vma->vm_file->f_mapping->host != uprobe->inode ||
|
||||
vaddr != vi->vaddr) {
|
||||
list_del(&vi->probe_list);
|
||||
kfree(vi);
|
||||
up_read(&mm->mmap_sem);
|
||||
mmput(mm);
|
||||
continue;
|
||||
}
|
||||
vma_address(vma, uprobe->offset) != info->vaddr)
|
||||
goto unlock;
|
||||
|
||||
if (is_register)
|
||||
ret = install_breakpoint(uprobe, mm, vma, vi->vaddr);
|
||||
else
|
||||
remove_breakpoint(uprobe, mm, vi->vaddr);
|
||||
|
||||
up_read(&mm->mmap_sem);
|
||||
mmput(mm);
|
||||
if (is_register) {
|
||||
if (ret && ret == -EEXIST)
|
||||
ret = 0;
|
||||
if (ret)
|
||||
break;
|
||||
err = install_breakpoint(uprobe, mm, vma, info->vaddr);
|
||||
/*
|
||||
* We can race against uprobe_mmap(), see the
|
||||
* comment near uprobe_hash().
|
||||
*/
|
||||
if (err == -EEXIST)
|
||||
err = 0;
|
||||
} else {
|
||||
remove_breakpoint(uprobe, mm, info->vaddr);
|
||||
}
|
||||
unlock:
|
||||
up_write(&mm->mmap_sem);
|
||||
free:
|
||||
mmput(mm);
|
||||
info = free_map_info(info);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) {
|
||||
list_del(&vi->probe_list);
|
||||
kfree(vi);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __uprobe_register(struct uprobe *uprobe)
|
||||
@@ -1048,7 +1031,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head)
|
||||
int uprobe_mmap(struct vm_area_struct *vma)
|
||||
{
|
||||
struct list_head tmp_list;
|
||||
struct uprobe *uprobe, *u;
|
||||
struct uprobe *uprobe;
|
||||
struct inode *inode;
|
||||
int ret, count;
|
||||
|
||||
@@ -1066,12 +1049,9 @@ int uprobe_mmap(struct vm_area_struct *vma)
|
||||
ret = 0;
|
||||
count = 0;
|
||||
|
||||
list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
|
||||
loff_t vaddr;
|
||||
|
||||
list_del(&uprobe->pending_list);
|
||||
list_for_each_entry(uprobe, &tmp_list, pending_list) {
|
||||
if (!ret) {
|
||||
vaddr = vma_address(vma, uprobe->offset);
|
||||
loff_t vaddr = vma_address(vma, uprobe->offset);
|
||||
|
||||
if (vaddr < vma->vm_start || vaddr >= vma->vm_end) {
|
||||
put_uprobe(uprobe);
|
||||
@@ -1079,8 +1059,10 @@ int uprobe_mmap(struct vm_area_struct *vma)
|
||||
}
|
||||
|
||||
ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
|
||||
|
||||
/* Ignore double add: */
|
||||
/*
|
||||
* We can race against uprobe_register(), see the
|
||||
* comment near uprobe_hash().
|
||||
*/
|
||||
if (ret == -EEXIST) {
|
||||
ret = 0;
|
||||
|
||||
@@ -1115,7 +1097,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
|
||||
void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
|
||||
{
|
||||
struct list_head tmp_list;
|
||||
struct uprobe *uprobe, *u;
|
||||
struct uprobe *uprobe;
|
||||
struct inode *inode;
|
||||
|
||||
if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
|
||||
@@ -1132,11 +1114,8 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
|
||||
mutex_lock(uprobes_mmap_hash(inode));
|
||||
build_probe_list(inode, &tmp_list);
|
||||
|
||||
list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
|
||||
loff_t vaddr;
|
||||
|
||||
list_del(&uprobe->pending_list);
|
||||
vaddr = vma_address(vma, uprobe->offset);
|
||||
list_for_each_entry(uprobe, &tmp_list, pending_list) {
|
||||
loff_t vaddr = vma_address(vma, uprobe->offset);
|
||||
|
||||
if (vaddr >= start && vaddr < end) {
|
||||
/*
|
||||
@@ -1378,9 +1357,6 @@ void uprobe_free_utask(struct task_struct *t)
|
||||
{
|
||||
struct uprobe_task *utask = t->utask;
|
||||
|
||||
if (t->uprobe_srcu_id != -1)
|
||||
srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id);
|
||||
|
||||
if (!utask)
|
||||
return;
|
||||
|
||||
@@ -1398,7 +1374,6 @@ void uprobe_free_utask(struct task_struct *t)
|
||||
void uprobe_copy_process(struct task_struct *t)
|
||||
{
|
||||
t->utask = NULL;
|
||||
t->uprobe_srcu_id = -1;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1417,7 +1392,6 @@ static struct uprobe_task *add_utask(void)
|
||||
if (unlikely(!utask))
|
||||
return NULL;
|
||||
|
||||
utask->active_uprobe = NULL;
|
||||
current->utask = utask;
|
||||
return utask;
|
||||
}
|
||||
@@ -1479,41 +1453,64 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct uprobe *uprobe = NULL;
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
down_read(&mm->mmap_sem);
|
||||
vma = find_vma(mm, bp_vaddr);
|
||||
if (vma && vma->vm_start <= bp_vaddr) {
|
||||
if (valid_vma(vma, false)) {
|
||||
struct inode *inode;
|
||||
loff_t offset;
|
||||
|
||||
inode = vma->vm_file->f_mapping->host;
|
||||
offset = bp_vaddr - vma->vm_start;
|
||||
offset += (vma->vm_pgoff << PAGE_SHIFT);
|
||||
uprobe = find_uprobe(inode, offset);
|
||||
}
|
||||
|
||||
if (!uprobe)
|
||||
*is_swbp = is_swbp_at_addr(mm, bp_vaddr);
|
||||
} else {
|
||||
*is_swbp = -EFAULT;
|
||||
}
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
return uprobe;
|
||||
}
|
||||
|
||||
/*
|
||||
* Run handler and ask thread to singlestep.
|
||||
* Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
|
||||
*/
|
||||
static void handle_swbp(struct pt_regs *regs)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct uprobe_task *utask;
|
||||
struct uprobe *uprobe;
|
||||
struct mm_struct *mm;
|
||||
unsigned long bp_vaddr;
|
||||
int uninitialized_var(is_swbp);
|
||||
|
||||
uprobe = NULL;
|
||||
bp_vaddr = uprobe_get_swbp_addr(regs);
|
||||
mm = current->mm;
|
||||
down_read(&mm->mmap_sem);
|
||||
vma = find_vma(mm, bp_vaddr);
|
||||
|
||||
if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) {
|
||||
struct inode *inode;
|
||||
loff_t offset;
|
||||
|
||||
inode = vma->vm_file->f_mapping->host;
|
||||
offset = bp_vaddr - vma->vm_start;
|
||||
offset += (vma->vm_pgoff << PAGE_SHIFT);
|
||||
uprobe = find_uprobe(inode, offset);
|
||||
}
|
||||
|
||||
srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id);
|
||||
current->uprobe_srcu_id = -1;
|
||||
up_read(&mm->mmap_sem);
|
||||
uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
|
||||
|
||||
if (!uprobe) {
|
||||
/* No matching uprobe; signal SIGTRAP. */
|
||||
send_sig(SIGTRAP, current, 0);
|
||||
if (is_swbp > 0) {
|
||||
/* No matching uprobe; signal SIGTRAP. */
|
||||
send_sig(SIGTRAP, current, 0);
|
||||
} else {
|
||||
/*
|
||||
* Either we raced with uprobe_unregister() or we can't
|
||||
* access this memory. The latter is only possible if
|
||||
* another thread plays with our ->mm. In both cases
|
||||
* we can simply restart. If this vma was unmapped we
|
||||
* can pretend this insn was not executed yet and get
|
||||
* the (correct) SIGSEGV after restart.
|
||||
*/
|
||||
instruction_pointer_set(regs, bp_vaddr);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1620,7 +1617,6 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs)
|
||||
utask->state = UTASK_BP_HIT;
|
||||
|
||||
set_thread_flag(TIF_UPROBE);
|
||||
current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu);
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -1655,7 +1651,6 @@ static int __init init_uprobes(void)
|
||||
mutex_init(&uprobes_mutex[i]);
|
||||
mutex_init(&uprobes_mmap_mutex[i]);
|
||||
}
|
||||
init_srcu_struct(&uprobes_srcu);
|
||||
|
||||
return register_die_notifier(&uprobe_exception_nb);
|
||||
}
|
||||
|
@@ -953,14 +953,11 @@ void do_exit(long code)
|
||||
exit_signals(tsk); /* sets PF_EXITING */
|
||||
/*
|
||||
* tsk->flags are checked in the futex code to protect against
|
||||
* an exiting task cleaning up the robust pi futexes, and in
|
||||
* task_work_add() to avoid the race with exit_task_work().
|
||||
* an exiting task cleaning up the robust pi futexes.
|
||||
*/
|
||||
smp_mb();
|
||||
raw_spin_unlock_wait(&tsk->pi_lock);
|
||||
|
||||
exit_task_work(tsk);
|
||||
|
||||
if (unlikely(in_atomic()))
|
||||
printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
|
||||
current->comm, task_pid_nr(current),
|
||||
@@ -995,6 +992,7 @@ void do_exit(long code)
|
||||
exit_shm(tsk);
|
||||
exit_files(tsk);
|
||||
exit_fs(tsk);
|
||||
exit_task_work(tsk);
|
||||
check_stack_usage();
|
||||
exit_thread();
|
||||
|
||||
|
@@ -304,12 +304,17 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
|
||||
}
|
||||
|
||||
err = arch_dup_task_struct(tsk, orig);
|
||||
|
||||
/*
|
||||
* We defer looking at err, because we will need this setup
|
||||
* for the clean up path to work correctly.
|
||||
*/
|
||||
tsk->stack = ti;
|
||||
setup_thread_stack(tsk, orig);
|
||||
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
tsk->stack = ti;
|
||||
|
||||
setup_thread_stack(tsk, orig);
|
||||
clear_user_return_notifier(tsk);
|
||||
clear_tsk_need_resched(tsk);
|
||||
stackend = end_of_stack(tsk);
|
||||
@@ -1415,7 +1420,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
*/
|
||||
p->group_leader = p;
|
||||
INIT_LIST_HEAD(&p->thread_group);
|
||||
INIT_HLIST_HEAD(&p->task_works);
|
||||
p->task_works = NULL;
|
||||
|
||||
/* Now that the task is set up, run cgroup callbacks if
|
||||
* necessary. We need to run them before the task is visible
|
||||
|
@@ -657,6 +657,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
|
||||
{
|
||||
ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
|
||||
ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
|
||||
|
||||
return ktime_get_update_offsets(offs_real, offs_boot);
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrigger next event is called after clock was set
|
||||
*
|
||||
@@ -665,22 +673,12 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
||||
static void retrigger_next_event(void *arg)
|
||||
{
|
||||
struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
|
||||
struct timespec realtime_offset, xtim, wtm, sleep;
|
||||
|
||||
if (!hrtimer_hres_active())
|
||||
return;
|
||||
|
||||
/* Optimized out for !HIGH_RES */
|
||||
get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
|
||||
set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
|
||||
|
||||
/* Adjust CLOCK_REALTIME offset */
|
||||
raw_spin_lock(&base->lock);
|
||||
base->clock_base[HRTIMER_BASE_REALTIME].offset =
|
||||
timespec_to_ktime(realtime_offset);
|
||||
base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
|
||||
timespec_to_ktime(sleep);
|
||||
|
||||
hrtimer_update_base(base);
|
||||
hrtimer_force_reprogram(base, 0);
|
||||
raw_spin_unlock(&base->lock);
|
||||
}
|
||||
@@ -710,13 +708,25 @@ static int hrtimer_switch_to_hres(void)
|
||||
base->clock_base[i].resolution = KTIME_HIGH_RES;
|
||||
|
||||
tick_setup_sched_timer();
|
||||
|
||||
/* "Retrigger" the interrupt to get things going */
|
||||
retrigger_next_event(NULL);
|
||||
local_irq_restore(flags);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from timekeeping code to reprogramm the hrtimer interrupt
|
||||
* device. If called from the timer interrupt context we defer it to
|
||||
* softirq context.
|
||||
*/
|
||||
void clock_was_set_delayed(void)
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
|
||||
cpu_base->clock_was_set = 1;
|
||||
__raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline int hrtimer_hres_active(void) { return 0; }
|
||||
@@ -1250,11 +1260,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
|
||||
cpu_base->nr_events++;
|
||||
dev->next_event.tv64 = KTIME_MAX;
|
||||
|
||||
entry_time = now = ktime_get();
|
||||
raw_spin_lock(&cpu_base->lock);
|
||||
entry_time = now = hrtimer_update_base(cpu_base);
|
||||
retry:
|
||||
expires_next.tv64 = KTIME_MAX;
|
||||
|
||||
raw_spin_lock(&cpu_base->lock);
|
||||
/*
|
||||
* We set expires_next to KTIME_MAX here with cpu_base->lock
|
||||
* held to prevent that a timer is enqueued in our queue via
|
||||
@@ -1330,8 +1339,12 @@ retry:
|
||||
* We need to prevent that we loop forever in the hrtimer
|
||||
* interrupt routine. We give it 3 attempts to avoid
|
||||
* overreacting on some spurious event.
|
||||
*
|
||||
* Acquire base lock for updating the offsets and retrieving
|
||||
* the current time.
|
||||
*/
|
||||
now = ktime_get();
|
||||
raw_spin_lock(&cpu_base->lock);
|
||||
now = hrtimer_update_base(cpu_base);
|
||||
cpu_base->nr_retries++;
|
||||
if (++retries < 3)
|
||||
goto retry;
|
||||
@@ -1343,6 +1356,7 @@ retry:
|
||||
*/
|
||||
cpu_base->nr_hangs++;
|
||||
cpu_base->hang_detected = 1;
|
||||
raw_spin_unlock(&cpu_base->lock);
|
||||
delta = ktime_sub(now, entry_time);
|
||||
if (delta.tv64 > cpu_base->max_hang_time.tv64)
|
||||
cpu_base->max_hang_time = delta;
|
||||
@@ -1395,6 +1409,13 @@ void hrtimer_peek_ahead_timers(void)
|
||||
|
||||
static void run_hrtimer_softirq(struct softirq_action *h)
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
|
||||
if (cpu_base->clock_was_set) {
|
||||
cpu_base->clock_was_set = 0;
|
||||
clock_was_set();
|
||||
}
|
||||
|
||||
hrtimer_peek_ahead_timers();
|
||||
}
|
||||
|
||||
|
@@ -781,7 +781,7 @@ static void wake_threads_waitq(struct irq_desc *desc)
|
||||
wake_up(&desc->wait_for_threads);
|
||||
}
|
||||
|
||||
static void irq_thread_dtor(struct task_work *unused)
|
||||
static void irq_thread_dtor(struct callback_head *unused)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct irq_desc *desc;
|
||||
@@ -813,7 +813,7 @@ static void irq_thread_dtor(struct task_work *unused)
|
||||
*/
|
||||
static int irq_thread(void *data)
|
||||
{
|
||||
struct task_work on_exit_work;
|
||||
struct callback_head on_exit_work;
|
||||
static const struct sched_param param = {
|
||||
.sched_priority = MAX_USER_RT_PRIO/2,
|
||||
};
|
||||
@@ -830,7 +830,7 @@ static int irq_thread(void *data)
|
||||
|
||||
sched_setscheduler(current, SCHED_FIFO, ¶m);
|
||||
|
||||
init_task_work(&on_exit_work, irq_thread_dtor, NULL);
|
||||
init_task_work(&on_exit_work, irq_thread_dtor);
|
||||
task_work_add(current, &on_exit_work, false);
|
||||
|
||||
while (!irq_wait_for_interrupt(action)) {
|
||||
|
@@ -360,16 +360,12 @@ repeat:
|
||||
struct kthread_work, node);
|
||||
list_del_init(&work->node);
|
||||
}
|
||||
worker->current_work = work;
|
||||
spin_unlock_irq(&worker->lock);
|
||||
|
||||
if (work) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
work->func(work);
|
||||
smp_wmb(); /* wmb worker-b0 paired with flush-b1 */
|
||||
work->done_seq = work->queue_seq;
|
||||
smp_mb(); /* mb worker-b1 paired with flush-b0 */
|
||||
if (atomic_read(&work->flushing))
|
||||
wake_up_all(&work->done);
|
||||
} else if (!freezing(current))
|
||||
schedule();
|
||||
|
||||
@@ -378,6 +374,19 @@ repeat:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kthread_worker_fn);
|
||||
|
||||
/* insert @work before @pos in @worker */
|
||||
static void insert_kthread_work(struct kthread_worker *worker,
|
||||
struct kthread_work *work,
|
||||
struct list_head *pos)
|
||||
{
|
||||
lockdep_assert_held(&worker->lock);
|
||||
|
||||
list_add_tail(&work->node, pos);
|
||||
work->worker = worker;
|
||||
if (likely(worker->task))
|
||||
wake_up_process(worker->task);
|
||||
}
|
||||
|
||||
/**
|
||||
* queue_kthread_work - queue a kthread_work
|
||||
* @worker: target kthread_worker
|
||||
@@ -395,10 +404,7 @@ bool queue_kthread_work(struct kthread_worker *worker,
|
||||
|
||||
spin_lock_irqsave(&worker->lock, flags);
|
||||
if (list_empty(&work->node)) {
|
||||
list_add_tail(&work->node, &worker->work_list);
|
||||
work->queue_seq++;
|
||||
if (likely(worker->task))
|
||||
wake_up_process(worker->task);
|
||||
insert_kthread_work(worker, work, &worker->work_list);
|
||||
ret = true;
|
||||
}
|
||||
spin_unlock_irqrestore(&worker->lock, flags);
|
||||
@@ -406,36 +412,6 @@ bool queue_kthread_work(struct kthread_worker *worker,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(queue_kthread_work);
|
||||
|
||||
/**
|
||||
* flush_kthread_work - flush a kthread_work
|
||||
* @work: work to flush
|
||||
*
|
||||
* If @work is queued or executing, wait for it to finish execution.
|
||||
*/
|
||||
void flush_kthread_work(struct kthread_work *work)
|
||||
{
|
||||
int seq = work->queue_seq;
|
||||
|
||||
atomic_inc(&work->flushing);
|
||||
|
||||
/*
|
||||
* mb flush-b0 paired with worker-b1, to make sure either
|
||||
* worker sees the above increment or we see done_seq update.
|
||||
*/
|
||||
smp_mb__after_atomic_inc();
|
||||
|
||||
/* A - B <= 0 tests whether B is in front of A regardless of overflow */
|
||||
wait_event(work->done, seq - work->done_seq <= 0);
|
||||
atomic_dec(&work->flushing);
|
||||
|
||||
/*
|
||||
* rmb flush-b1 paired with worker-b0, to make sure our caller
|
||||
* sees every change made by work->func().
|
||||
*/
|
||||
smp_mb__after_atomic_dec();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flush_kthread_work);
|
||||
|
||||
struct kthread_flush_work {
|
||||
struct kthread_work work;
|
||||
struct completion done;
|
||||
@@ -448,6 +424,46 @@ static void kthread_flush_work_fn(struct kthread_work *work)
|
||||
complete(&fwork->done);
|
||||
}
|
||||
|
||||
/**
|
||||
* flush_kthread_work - flush a kthread_work
|
||||
* @work: work to flush
|
||||
*
|
||||
* If @work is queued or executing, wait for it to finish execution.
|
||||
*/
|
||||
void flush_kthread_work(struct kthread_work *work)
|
||||
{
|
||||
struct kthread_flush_work fwork = {
|
||||
KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
|
||||
COMPLETION_INITIALIZER_ONSTACK(fwork.done),
|
||||
};
|
||||
struct kthread_worker *worker;
|
||||
bool noop = false;
|
||||
|
||||
retry:
|
||||
worker = work->worker;
|
||||
if (!worker)
|
||||
return;
|
||||
|
||||
spin_lock_irq(&worker->lock);
|
||||
if (work->worker != worker) {
|
||||
spin_unlock_irq(&worker->lock);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (!list_empty(&work->node))
|
||||
insert_kthread_work(worker, &fwork.work, work->node.next);
|
||||
else if (worker->current_work == work)
|
||||
insert_kthread_work(worker, &fwork.work, worker->work_list.next);
|
||||
else
|
||||
noop = true;
|
||||
|
||||
spin_unlock_irq(&worker->lock);
|
||||
|
||||
if (!noop)
|
||||
wait_for_completion(&fwork.done);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flush_kthread_work);
|
||||
|
||||
/**
|
||||
* flush_kthread_worker - flush all current works on a kthread_worker
|
||||
* @worker: worker to flush
|
||||
|
@@ -175,7 +175,7 @@ config PM_TEST_SUSPEND
|
||||
You probably want to have your system's RTC driver statically
|
||||
linked, ensuring that it's available when this test runs.
|
||||
|
||||
config CAN_PM_TRACE
|
||||
config PM_SLEEP_DEBUG
|
||||
def_bool y
|
||||
depends on PM_DEBUG && PM_SLEEP
|
||||
|
||||
@@ -196,7 +196,7 @@ config PM_TRACE
|
||||
|
||||
config PM_TRACE_RTC
|
||||
bool "Suspend/resume event tracing"
|
||||
depends on CAN_PM_TRACE
|
||||
depends on PM_SLEEP_DEBUG
|
||||
depends on X86
|
||||
select PM_TRACE
|
||||
---help---
|
||||
|
@@ -5,6 +5,7 @@
|
||||
* Copyright (c) 2003 Open Source Development Lab
|
||||
* Copyright (c) 2004 Pavel Machek <pavel@ucw.cz>
|
||||
* Copyright (c) 2009 Rafael J. Wysocki, Novell Inc.
|
||||
* Copyright (C) 2012 Bojan Smojver <bojan@rexursive.com>
|
||||
*
|
||||
* This file is released under the GPLv2.
|
||||
*/
|
||||
@@ -27,7 +28,6 @@
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <scsi/scsi_scan.h>
|
||||
|
||||
#include "power.h"
|
||||
|
||||
@@ -46,6 +46,9 @@ enum {
|
||||
HIBERNATION_PLATFORM,
|
||||
HIBERNATION_SHUTDOWN,
|
||||
HIBERNATION_REBOOT,
|
||||
#ifdef CONFIG_SUSPEND
|
||||
HIBERNATION_SUSPEND,
|
||||
#endif
|
||||
/* keep last */
|
||||
__HIBERNATION_AFTER_LAST
|
||||
};
|
||||
@@ -354,6 +357,7 @@ int hibernation_snapshot(int platform_mode)
|
||||
}
|
||||
|
||||
suspend_console();
|
||||
ftrace_stop();
|
||||
pm_restrict_gfp_mask();
|
||||
|
||||
error = dpm_suspend(PMSG_FREEZE);
|
||||
@@ -379,6 +383,7 @@ int hibernation_snapshot(int platform_mode)
|
||||
if (error || !in_suspend)
|
||||
pm_restore_gfp_mask();
|
||||
|
||||
ftrace_start();
|
||||
resume_console();
|
||||
dpm_complete(msg);
|
||||
|
||||
@@ -481,6 +486,7 @@ int hibernation_restore(int platform_mode)
|
||||
|
||||
pm_prepare_console();
|
||||
suspend_console();
|
||||
ftrace_stop();
|
||||
pm_restrict_gfp_mask();
|
||||
error = dpm_suspend_start(PMSG_QUIESCE);
|
||||
if (!error) {
|
||||
@@ -488,6 +494,7 @@ int hibernation_restore(int platform_mode)
|
||||
dpm_resume_end(PMSG_RECOVER);
|
||||
}
|
||||
pm_restore_gfp_mask();
|
||||
ftrace_start();
|
||||
resume_console();
|
||||
pm_restore_console();
|
||||
return error;
|
||||
@@ -514,6 +521,7 @@ int hibernation_platform_enter(void)
|
||||
|
||||
entering_platform_hibernation = true;
|
||||
suspend_console();
|
||||
ftrace_stop();
|
||||
error = dpm_suspend_start(PMSG_HIBERNATE);
|
||||
if (error) {
|
||||
if (hibernation_ops->recover)
|
||||
@@ -557,6 +565,7 @@ int hibernation_platform_enter(void)
|
||||
Resume_devices:
|
||||
entering_platform_hibernation = false;
|
||||
dpm_resume_end(PMSG_RESTORE);
|
||||
ftrace_start();
|
||||
resume_console();
|
||||
|
||||
Close:
|
||||
@@ -574,6 +583,10 @@ int hibernation_platform_enter(void)
|
||||
*/
|
||||
static void power_down(void)
|
||||
{
|
||||
#ifdef CONFIG_SUSPEND
|
||||
int error;
|
||||
#endif
|
||||
|
||||
switch (hibernation_mode) {
|
||||
case HIBERNATION_REBOOT:
|
||||
kernel_restart(NULL);
|
||||
@@ -583,6 +596,25 @@ static void power_down(void)
|
||||
case HIBERNATION_SHUTDOWN:
|
||||
kernel_power_off();
|
||||
break;
|
||||
#ifdef CONFIG_SUSPEND
|
||||
case HIBERNATION_SUSPEND:
|
||||
error = suspend_devices_and_enter(PM_SUSPEND_MEM);
|
||||
if (error) {
|
||||
if (hibernation_ops)
|
||||
hibernation_mode = HIBERNATION_PLATFORM;
|
||||
else
|
||||
hibernation_mode = HIBERNATION_SHUTDOWN;
|
||||
power_down();
|
||||
}
|
||||
/*
|
||||
* Restore swap signature.
|
||||
*/
|
||||
error = swsusp_unmark();
|
||||
if (error)
|
||||
printk(KERN_ERR "PM: Swap will be unusable! "
|
||||
"Try swapon -a.\n");
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
kernel_halt();
|
||||
/*
|
||||
@@ -748,13 +780,6 @@ static int software_resume(void)
|
||||
async_synchronize_full();
|
||||
}
|
||||
|
||||
/*
|
||||
* We can't depend on SCSI devices being available after loading
|
||||
* one of their modules until scsi_complete_async_scans() is
|
||||
* called and the resume device usually is a SCSI one.
|
||||
*/
|
||||
scsi_complete_async_scans();
|
||||
|
||||
swsusp_resume_device = name_to_dev_t(resume_file);
|
||||
if (!swsusp_resume_device) {
|
||||
error = -ENODEV;
|
||||
@@ -827,6 +852,9 @@ static const char * const hibernation_modes[] = {
|
||||
[HIBERNATION_PLATFORM] = "platform",
|
||||
[HIBERNATION_SHUTDOWN] = "shutdown",
|
||||
[HIBERNATION_REBOOT] = "reboot",
|
||||
#ifdef CONFIG_SUSPEND
|
||||
[HIBERNATION_SUSPEND] = "suspend",
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -867,6 +895,9 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
switch (i) {
|
||||
case HIBERNATION_SHUTDOWN:
|
||||
case HIBERNATION_REBOOT:
|
||||
#ifdef CONFIG_SUSPEND
|
||||
case HIBERNATION_SUSPEND:
|
||||
#endif
|
||||
break;
|
||||
case HIBERNATION_PLATFORM:
|
||||
if (hibernation_ops)
|
||||
@@ -907,6 +938,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
switch (mode) {
|
||||
case HIBERNATION_SHUTDOWN:
|
||||
case HIBERNATION_REBOOT:
|
||||
#ifdef CONFIG_SUSPEND
|
||||
case HIBERNATION_SUSPEND:
|
||||
#endif
|
||||
hibernation_mode = mode;
|
||||
break;
|
||||
case HIBERNATION_PLATFORM:
|
||||
|
@@ -235,6 +235,47 @@ late_initcall(pm_debugfs_init);
|
||||
|
||||
#endif /* CONFIG_PM_SLEEP */
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP_DEBUG
|
||||
/*
|
||||
* pm_print_times: print time taken by devices to suspend and resume.
|
||||
*
|
||||
* show() returns whether printing of suspend and resume times is enabled.
|
||||
* store() accepts 0 or 1. 0 disables printing and 1 enables it.
|
||||
*/
|
||||
bool pm_print_times_enabled;
|
||||
|
||||
static ssize_t pm_print_times_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n", pm_print_times_enabled);
|
||||
}
|
||||
|
||||
static ssize_t pm_print_times_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t n)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
if (kstrtoul(buf, 10, &val))
|
||||
return -EINVAL;
|
||||
|
||||
if (val > 1)
|
||||
return -EINVAL;
|
||||
|
||||
pm_print_times_enabled = !!val;
|
||||
return n;
|
||||
}
|
||||
|
||||
power_attr(pm_print_times);
|
||||
|
||||
static inline void pm_print_times_init(void)
|
||||
{
|
||||
pm_print_times_enabled = !!initcall_debug;
|
||||
}
|
||||
#else /* !CONFIG_PP_SLEEP_DEBUG */
|
||||
static inline void pm_print_times_init(void) {}
|
||||
#endif /* CONFIG_PM_SLEEP_DEBUG */
|
||||
|
||||
struct kobject *power_kobj;
|
||||
|
||||
/**
|
||||
@@ -531,6 +572,9 @@ static struct attribute * g[] = {
|
||||
#ifdef CONFIG_PM_DEBUG
|
||||
&pm_test_attr.attr,
|
||||
#endif
|
||||
#ifdef CONFIG_PM_SLEEP_DEBUG
|
||||
&pm_print_times_attr.attr,
|
||||
#endif
|
||||
#endif
|
||||
NULL,
|
||||
};
|
||||
@@ -566,6 +610,7 @@ static int __init pm_init(void)
|
||||
error = sysfs_create_group(power_kobj, &attr_group);
|
||||
if (error)
|
||||
return error;
|
||||
pm_print_times_init();
|
||||
return pm_autosleep_init();
|
||||
}
|
||||
|
||||
|
@@ -156,6 +156,9 @@ extern void swsusp_free(void);
|
||||
extern int swsusp_read(unsigned int *flags_p);
|
||||
extern int swsusp_write(unsigned int flags);
|
||||
extern void swsusp_close(fmode_t);
|
||||
#ifdef CONFIG_SUSPEND
|
||||
extern int swsusp_unmark(void);
|
||||
#endif
|
||||
|
||||
/* kernel/power/block_io.c */
|
||||
extern struct block_device *hib_resume_bdev;
|
||||
|
@@ -24,6 +24,7 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <trace/events/power.h>
|
||||
|
||||
#include "power.h"
|
||||
@@ -212,6 +213,7 @@ int suspend_devices_and_enter(suspend_state_t state)
|
||||
goto Close;
|
||||
}
|
||||
suspend_console();
|
||||
ftrace_stop();
|
||||
suspend_test_start();
|
||||
error = dpm_suspend_start(PMSG_SUSPEND);
|
||||
if (error) {
|
||||
@@ -231,6 +233,7 @@ int suspend_devices_and_enter(suspend_state_t state)
|
||||
suspend_test_start();
|
||||
dpm_resume_end(PMSG_RESUME);
|
||||
suspend_test_finish("resume devices");
|
||||
ftrace_start();
|
||||
resume_console();
|
||||
Close:
|
||||
if (suspend_ops->end)
|
||||
|
@@ -448,9 +448,9 @@ static int save_image(struct swap_map_handle *handle,
|
||||
struct timeval start;
|
||||
struct timeval stop;
|
||||
|
||||
printk(KERN_INFO "PM: Saving image data pages (%u pages) ... ",
|
||||
printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n",
|
||||
nr_to_write);
|
||||
m = nr_to_write / 100;
|
||||
m = nr_to_write / 10;
|
||||
if (!m)
|
||||
m = 1;
|
||||
nr_pages = 0;
|
||||
@@ -464,7 +464,8 @@ static int save_image(struct swap_map_handle *handle,
|
||||
if (ret)
|
||||
break;
|
||||
if (!(nr_pages % m))
|
||||
printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
|
||||
printk(KERN_INFO "PM: Image saving progress: %3d%%\n",
|
||||
nr_pages / m * 10);
|
||||
nr_pages++;
|
||||
}
|
||||
err2 = hib_wait_on_bio_chain(&bio);
|
||||
@@ -472,9 +473,7 @@ static int save_image(struct swap_map_handle *handle,
|
||||
if (!ret)
|
||||
ret = err2;
|
||||
if (!ret)
|
||||
printk(KERN_CONT "\b\b\b\bdone\n");
|
||||
else
|
||||
printk(KERN_CONT "\n");
|
||||
printk(KERN_INFO "PM: Image saving done.\n");
|
||||
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
|
||||
return ret;
|
||||
}
|
||||
@@ -668,9 +667,9 @@ static int save_image_lzo(struct swap_map_handle *handle,
|
||||
|
||||
printk(KERN_INFO
|
||||
"PM: Using %u thread(s) for compression.\n"
|
||||
"PM: Compressing and saving image data (%u pages) ... ",
|
||||
"PM: Compressing and saving image data (%u pages)...\n",
|
||||
nr_threads, nr_to_write);
|
||||
m = nr_to_write / 100;
|
||||
m = nr_to_write / 10;
|
||||
if (!m)
|
||||
m = 1;
|
||||
nr_pages = 0;
|
||||
@@ -690,8 +689,10 @@ static int save_image_lzo(struct swap_map_handle *handle,
|
||||
data_of(*snapshot), PAGE_SIZE);
|
||||
|
||||
if (!(nr_pages % m))
|
||||
printk(KERN_CONT "\b\b\b\b%3d%%",
|
||||
nr_pages / m);
|
||||
printk(KERN_INFO
|
||||
"PM: Image saving progress: "
|
||||
"%3d%%\n",
|
||||
nr_pages / m * 10);
|
||||
nr_pages++;
|
||||
}
|
||||
if (!off)
|
||||
@@ -761,11 +762,8 @@ out_finish:
|
||||
do_gettimeofday(&stop);
|
||||
if (!ret)
|
||||
ret = err2;
|
||||
if (!ret) {
|
||||
printk(KERN_CONT "\b\b\b\bdone\n");
|
||||
} else {
|
||||
printk(KERN_CONT "\n");
|
||||
}
|
||||
if (!ret)
|
||||
printk(KERN_INFO "PM: Image saving done.\n");
|
||||
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
|
||||
out_clean:
|
||||
if (crc) {
|
||||
@@ -973,9 +971,9 @@ static int load_image(struct swap_map_handle *handle,
|
||||
int err2;
|
||||
unsigned nr_pages;
|
||||
|
||||
printk(KERN_INFO "PM: Loading image data pages (%u pages) ... ",
|
||||
printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n",
|
||||
nr_to_read);
|
||||
m = nr_to_read / 100;
|
||||
m = nr_to_read / 10;
|
||||
if (!m)
|
||||
m = 1;
|
||||
nr_pages = 0;
|
||||
@@ -993,7 +991,8 @@ static int load_image(struct swap_map_handle *handle,
|
||||
if (ret)
|
||||
break;
|
||||
if (!(nr_pages % m))
|
||||
printk("\b\b\b\b%3d%%", nr_pages / m);
|
||||
printk(KERN_INFO "PM: Image loading progress: %3d%%\n",
|
||||
nr_pages / m * 10);
|
||||
nr_pages++;
|
||||
}
|
||||
err2 = hib_wait_on_bio_chain(&bio);
|
||||
@@ -1001,12 +1000,11 @@ static int load_image(struct swap_map_handle *handle,
|
||||
if (!ret)
|
||||
ret = err2;
|
||||
if (!ret) {
|
||||
printk("\b\b\b\bdone\n");
|
||||
printk(KERN_INFO "PM: Image loading done.\n");
|
||||
snapshot_write_finalize(snapshot);
|
||||
if (!snapshot_image_loaded(snapshot))
|
||||
ret = -ENODATA;
|
||||
} else
|
||||
printk("\n");
|
||||
}
|
||||
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
|
||||
return ret;
|
||||
}
|
||||
@@ -1185,9 +1183,9 @@ static int load_image_lzo(struct swap_map_handle *handle,
|
||||
|
||||
printk(KERN_INFO
|
||||
"PM: Using %u thread(s) for decompression.\n"
|
||||
"PM: Loading and decompressing image data (%u pages) ... ",
|
||||
"PM: Loading and decompressing image data (%u pages)...\n",
|
||||
nr_threads, nr_to_read);
|
||||
m = nr_to_read / 100;
|
||||
m = nr_to_read / 10;
|
||||
if (!m)
|
||||
m = 1;
|
||||
nr_pages = 0;
|
||||
@@ -1319,7 +1317,10 @@ static int load_image_lzo(struct swap_map_handle *handle,
|
||||
data[thr].unc + off, PAGE_SIZE);
|
||||
|
||||
if (!(nr_pages % m))
|
||||
printk("\b\b\b\b%3d%%", nr_pages / m);
|
||||
printk(KERN_INFO
|
||||
"PM: Image loading progress: "
|
||||
"%3d%%\n",
|
||||
nr_pages / m * 10);
|
||||
nr_pages++;
|
||||
|
||||
ret = snapshot_write_next(snapshot);
|
||||
@@ -1344,7 +1345,7 @@ out_finish:
|
||||
}
|
||||
do_gettimeofday(&stop);
|
||||
if (!ret) {
|
||||
printk("\b\b\b\bdone\n");
|
||||
printk(KERN_INFO "PM: Image loading done.\n");
|
||||
snapshot_write_finalize(snapshot);
|
||||
if (!snapshot_image_loaded(snapshot))
|
||||
ret = -ENODATA;
|
||||
@@ -1357,8 +1358,7 @@ out_finish:
|
||||
}
|
||||
}
|
||||
}
|
||||
} else
|
||||
printk("\n");
|
||||
}
|
||||
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
|
||||
out_clean:
|
||||
for (i = 0; i < ring_size; i++)
|
||||
@@ -1472,6 +1472,34 @@ void swsusp_close(fmode_t mode)
|
||||
blkdev_put(hib_resume_bdev, mode);
|
||||
}
|
||||
|
||||
/**
|
||||
* swsusp_unmark - Unmark swsusp signature in the resume device
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SUSPEND
|
||||
int swsusp_unmark(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
|
||||
if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) {
|
||||
memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10);
|
||||
error = hib_bio_write_page(swsusp_resume_block,
|
||||
swsusp_header, NULL);
|
||||
} else {
|
||||
printk(KERN_ERR "PM: Cannot find swsusp signature!\n");
|
||||
error = -ENODEV;
|
||||
}
|
||||
|
||||
/*
|
||||
* We just returned from suspend, we don't need the image any more.
|
||||
*/
|
||||
free_all_swap_pages(root_swap);
|
||||
|
||||
return error;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int swsusp_header_init(void)
|
||||
{
|
||||
swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
|
||||
|
@@ -24,7 +24,6 @@
|
||||
#include <linux/console.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <scsi/scsi_scan.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
@@ -84,7 +83,6 @@ static int snapshot_open(struct inode *inode, struct file *filp)
|
||||
* appear.
|
||||
*/
|
||||
wait_for_device_probe();
|
||||
scsi_complete_async_scans();
|
||||
|
||||
data->swap = -1;
|
||||
data->mode = O_WRONLY;
|
||||
|
@@ -9,6 +9,7 @@
|
||||
* manipulate wakelocks on Android.
|
||||
*/
|
||||
|
||||
#include <linux/capability.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/err.h>
|
||||
@@ -188,6 +189,9 @@ int pm_wake_lock(const char *buf)
|
||||
size_t len;
|
||||
int ret = 0;
|
||||
|
||||
if (!capable(CAP_BLOCK_SUSPEND))
|
||||
return -EPERM;
|
||||
|
||||
while (*str && !isspace(*str))
|
||||
str++;
|
||||
|
||||
@@ -231,6 +235,9 @@ int pm_wake_unlock(const char *buf)
|
||||
size_t len;
|
||||
int ret = 0;
|
||||
|
||||
if (!capable(CAP_BLOCK_SUSPEND))
|
||||
return -EPERM;
|
||||
|
||||
len = strlen(buf);
|
||||
if (!len)
|
||||
return -EINVAL;
|
||||
|
319
kernel/printk.c
319
kernel/printk.c
@@ -194,8 +194,10 @@ static int console_may_schedule;
|
||||
*/
|
||||
|
||||
enum log_flags {
|
||||
LOG_DEFAULT = 0,
|
||||
LOG_NOCONS = 1, /* already flushed, do not print to console */
|
||||
LOG_NOCONS = 1, /* already flushed, do not print to console */
|
||||
LOG_NEWLINE = 2, /* text ended with a newline */
|
||||
LOG_PREFIX = 4, /* text started with a prefix */
|
||||
LOG_CONT = 8, /* text is a fragment of a continuation line */
|
||||
};
|
||||
|
||||
struct log {
|
||||
@@ -217,6 +219,8 @@ static DEFINE_RAW_SPINLOCK(logbuf_lock);
|
||||
/* the next printk record to read by syslog(READ) or /proc/kmsg */
|
||||
static u64 syslog_seq;
|
||||
static u32 syslog_idx;
|
||||
static enum log_flags syslog_prev;
|
||||
static size_t syslog_partial;
|
||||
|
||||
/* index and sequence number of the first record stored in the buffer */
|
||||
static u64 log_first_seq;
|
||||
@@ -430,20 +434,20 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
|
||||
ret = mutex_lock_interruptible(&user->lock);
|
||||
if (ret)
|
||||
return ret;
|
||||
raw_spin_lock(&logbuf_lock);
|
||||
raw_spin_lock_irq(&logbuf_lock);
|
||||
while (user->seq == log_next_seq) {
|
||||
if (file->f_flags & O_NONBLOCK) {
|
||||
ret = -EAGAIN;
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
ret = wait_event_interruptible(log_wait,
|
||||
user->seq != log_next_seq);
|
||||
if (ret)
|
||||
goto out;
|
||||
raw_spin_lock(&logbuf_lock);
|
||||
raw_spin_lock_irq(&logbuf_lock);
|
||||
}
|
||||
|
||||
if (user->seq < log_first_seq) {
|
||||
@@ -451,7 +455,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
|
||||
user->idx = log_first_idx;
|
||||
user->seq = log_first_seq;
|
||||
ret = -EPIPE;
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -465,7 +469,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
|
||||
for (i = 0; i < msg->text_len; i++) {
|
||||
unsigned char c = log_text(msg)[i];
|
||||
|
||||
if (c < ' ' || c >= 128)
|
||||
if (c < ' ' || c >= 127 || c == '\\')
|
||||
len += sprintf(user->buf + len, "\\x%02x", c);
|
||||
else
|
||||
user->buf[len++] = c;
|
||||
@@ -489,7 +493,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c < ' ' || c >= 128) {
|
||||
if (c < ' ' || c >= 127 || c == '\\') {
|
||||
len += sprintf(user->buf + len, "\\x%02x", c);
|
||||
continue;
|
||||
}
|
||||
@@ -501,7 +505,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
|
||||
|
||||
user->idx = log_next(user->idx);
|
||||
user->seq++;
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
|
||||
if (len > count) {
|
||||
ret = -EINVAL;
|
||||
@@ -528,7 +532,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
|
||||
if (offset)
|
||||
return -ESPIPE;
|
||||
|
||||
raw_spin_lock(&logbuf_lock);
|
||||
raw_spin_lock_irq(&logbuf_lock);
|
||||
switch (whence) {
|
||||
case SEEK_SET:
|
||||
/* the first record */
|
||||
@@ -552,7 +556,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -566,14 +570,14 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
|
||||
|
||||
poll_wait(file, &log_wait, wait);
|
||||
|
||||
raw_spin_lock(&logbuf_lock);
|
||||
raw_spin_lock_irq(&logbuf_lock);
|
||||
if (user->seq < log_next_seq) {
|
||||
/* return error when data has vanished underneath us */
|
||||
if (user->seq < log_first_seq)
|
||||
ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI;
|
||||
ret = POLLIN|POLLRDNORM;
|
||||
}
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -597,10 +601,10 @@ static int devkmsg_open(struct inode *inode, struct file *file)
|
||||
|
||||
mutex_init(&user->lock);
|
||||
|
||||
raw_spin_lock(&logbuf_lock);
|
||||
raw_spin_lock_irq(&logbuf_lock);
|
||||
user->idx = log_first_idx;
|
||||
user->seq = log_first_seq;
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
|
||||
file->private_data = user;
|
||||
return 0;
|
||||
@@ -818,15 +822,18 @@ static size_t print_time(u64 ts, char *buf)
|
||||
static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
|
||||
{
|
||||
size_t len = 0;
|
||||
unsigned int prefix = (msg->facility << 3) | msg->level;
|
||||
|
||||
if (syslog) {
|
||||
if (buf) {
|
||||
len += sprintf(buf, "<%u>", msg->level);
|
||||
len += sprintf(buf, "<%u>", prefix);
|
||||
} else {
|
||||
len += 3;
|
||||
if (msg->level > 9)
|
||||
len++;
|
||||
if (msg->level > 99)
|
||||
if (prefix > 999)
|
||||
len += 3;
|
||||
else if (prefix > 99)
|
||||
len += 2;
|
||||
else if (prefix > 9)
|
||||
len++;
|
||||
}
|
||||
}
|
||||
@@ -835,13 +842,26 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
|
||||
return len;
|
||||
}
|
||||
|
||||
static size_t msg_print_text(const struct log *msg, bool syslog,
|
||||
char *buf, size_t size)
|
||||
static size_t msg_print_text(const struct log *msg, enum log_flags prev,
|
||||
bool syslog, char *buf, size_t size)
|
||||
{
|
||||
const char *text = log_text(msg);
|
||||
size_t text_size = msg->text_len;
|
||||
bool prefix = true;
|
||||
bool newline = true;
|
||||
size_t len = 0;
|
||||
|
||||
if ((prev & LOG_CONT) && !(msg->flags & LOG_PREFIX))
|
||||
prefix = false;
|
||||
|
||||
if (msg->flags & LOG_CONT) {
|
||||
if ((prev & LOG_CONT) && !(prev & LOG_NEWLINE))
|
||||
prefix = false;
|
||||
|
||||
if (!(msg->flags & LOG_NEWLINE))
|
||||
newline = false;
|
||||
}
|
||||
|
||||
do {
|
||||
const char *next = memchr(text, '\n', text_size);
|
||||
size_t text_len;
|
||||
@@ -859,16 +879,22 @@ static size_t msg_print_text(const struct log *msg, bool syslog,
|
||||
text_len + 1>= size - len)
|
||||
break;
|
||||
|
||||
len += print_prefix(msg, syslog, buf + len);
|
||||
if (prefix)
|
||||
len += print_prefix(msg, syslog, buf + len);
|
||||
memcpy(buf + len, text, text_len);
|
||||
len += text_len;
|
||||
buf[len++] = '\n';
|
||||
if (next || newline)
|
||||
buf[len++] = '\n';
|
||||
} else {
|
||||
/* SYSLOG_ACTION_* buffer size only calculation */
|
||||
len += print_prefix(msg, syslog, NULL);
|
||||
len += text_len + 1;
|
||||
if (prefix)
|
||||
len += print_prefix(msg, syslog, NULL);
|
||||
len += text_len;
|
||||
if (next || newline)
|
||||
len++;
|
||||
}
|
||||
|
||||
prefix = true;
|
||||
text = next;
|
||||
} while (text);
|
||||
|
||||
@@ -887,22 +913,35 @@ static int syslog_print(char __user *buf, int size)
|
||||
|
||||
while (size > 0) {
|
||||
size_t n;
|
||||
size_t skip;
|
||||
|
||||
raw_spin_lock_irq(&logbuf_lock);
|
||||
if (syslog_seq < log_first_seq) {
|
||||
/* messages are gone, move to first one */
|
||||
syslog_seq = log_first_seq;
|
||||
syslog_idx = log_first_idx;
|
||||
syslog_prev = 0;
|
||||
syslog_partial = 0;
|
||||
}
|
||||
if (syslog_seq == log_next_seq) {
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
break;
|
||||
}
|
||||
|
||||
skip = syslog_partial;
|
||||
msg = log_from_idx(syslog_idx);
|
||||
n = msg_print_text(msg, true, text, LOG_LINE_MAX);
|
||||
if (n <= size) {
|
||||
n = msg_print_text(msg, syslog_prev, true, text, LOG_LINE_MAX);
|
||||
if (n - syslog_partial <= size) {
|
||||
/* message fits into buffer, move forward */
|
||||
syslog_idx = log_next(syslog_idx);
|
||||
syslog_seq++;
|
||||
syslog_prev = msg->flags;
|
||||
n -= syslog_partial;
|
||||
syslog_partial = 0;
|
||||
} else if (!len){
|
||||
/* partial read(), remember position */
|
||||
n = size;
|
||||
syslog_partial += n;
|
||||
} else
|
||||
n = 0;
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
@@ -910,17 +949,15 @@ static int syslog_print(char __user *buf, int size)
|
||||
if (!n)
|
||||
break;
|
||||
|
||||
len += n;
|
||||
size -= n;
|
||||
buf += n;
|
||||
n = copy_to_user(buf - n, text, n);
|
||||
|
||||
if (n) {
|
||||
len -= n;
|
||||
if (copy_to_user(buf, text + skip, n)) {
|
||||
if (!len)
|
||||
len = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
len += n;
|
||||
size -= n;
|
||||
buf += n;
|
||||
}
|
||||
|
||||
kfree(text);
|
||||
@@ -941,6 +978,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
|
||||
u64 next_seq;
|
||||
u64 seq;
|
||||
u32 idx;
|
||||
enum log_flags prev;
|
||||
|
||||
if (clear_seq < log_first_seq) {
|
||||
/* messages are gone, move to first available one */
|
||||
@@ -954,10 +992,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
|
||||
*/
|
||||
seq = clear_seq;
|
||||
idx = clear_idx;
|
||||
prev = 0;
|
||||
while (seq < log_next_seq) {
|
||||
struct log *msg = log_from_idx(idx);
|
||||
|
||||
len += msg_print_text(msg, true, NULL, 0);
|
||||
len += msg_print_text(msg, prev, true, NULL, 0);
|
||||
idx = log_next(idx);
|
||||
seq++;
|
||||
}
|
||||
@@ -965,10 +1004,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
|
||||
/* move first record forward until length fits into the buffer */
|
||||
seq = clear_seq;
|
||||
idx = clear_idx;
|
||||
prev = 0;
|
||||
while (len > size && seq < log_next_seq) {
|
||||
struct log *msg = log_from_idx(idx);
|
||||
|
||||
len -= msg_print_text(msg, true, NULL, 0);
|
||||
len -= msg_print_text(msg, prev, true, NULL, 0);
|
||||
idx = log_next(idx);
|
||||
seq++;
|
||||
}
|
||||
@@ -977,17 +1017,19 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
|
||||
next_seq = log_next_seq;
|
||||
|
||||
len = 0;
|
||||
prev = 0;
|
||||
while (len >= 0 && seq < next_seq) {
|
||||
struct log *msg = log_from_idx(idx);
|
||||
int textlen;
|
||||
|
||||
textlen = msg_print_text(msg, true, text, LOG_LINE_MAX);
|
||||
textlen = msg_print_text(msg, prev, true, text, LOG_LINE_MAX);
|
||||
if (textlen < 0) {
|
||||
len = textlen;
|
||||
break;
|
||||
}
|
||||
idx = log_next(idx);
|
||||
seq++;
|
||||
prev = msg->flags;
|
||||
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
if (copy_to_user(buf + len, text, textlen))
|
||||
@@ -1000,6 +1042,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
|
||||
/* messages are gone, move to next one */
|
||||
seq = log_first_seq;
|
||||
idx = log_first_idx;
|
||||
prev = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1018,7 +1061,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
|
||||
{
|
||||
bool clear = false;
|
||||
static int saved_console_loglevel = -1;
|
||||
static DEFINE_MUTEX(syslog_mutex);
|
||||
int error;
|
||||
|
||||
error = check_syslog_permissions(type, from_file);
|
||||
@@ -1045,17 +1087,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
|
||||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
error = mutex_lock_interruptible(&syslog_mutex);
|
||||
if (error)
|
||||
goto out;
|
||||
error = wait_event_interruptible(log_wait,
|
||||
syslog_seq != log_next_seq);
|
||||
if (error) {
|
||||
mutex_unlock(&syslog_mutex);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
error = syslog_print(buf, len);
|
||||
mutex_unlock(&syslog_mutex);
|
||||
break;
|
||||
/* Read/clear last kernel messages */
|
||||
case SYSLOG_ACTION_READ_CLEAR:
|
||||
@@ -1111,6 +1147,8 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
|
||||
/* messages are gone, move to first one */
|
||||
syslog_seq = log_first_seq;
|
||||
syslog_idx = log_first_idx;
|
||||
syslog_prev = 0;
|
||||
syslog_partial = 0;
|
||||
}
|
||||
if (from_file) {
|
||||
/*
|
||||
@@ -1120,19 +1158,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
|
||||
*/
|
||||
error = log_next_idx - syslog_idx;
|
||||
} else {
|
||||
u64 seq;
|
||||
u32 idx;
|
||||
u64 seq = syslog_seq;
|
||||
u32 idx = syslog_idx;
|
||||
enum log_flags prev = syslog_prev;
|
||||
|
||||
error = 0;
|
||||
seq = syslog_seq;
|
||||
idx = syslog_idx;
|
||||
while (seq < log_next_seq) {
|
||||
struct log *msg = log_from_idx(idx);
|
||||
|
||||
error += msg_print_text(msg, true, NULL, 0);
|
||||
error += msg_print_text(msg, prev, true, NULL, 0);
|
||||
idx = log_next(idx);
|
||||
seq++;
|
||||
prev = msg->flags;
|
||||
}
|
||||
error -= syslog_partial;
|
||||
}
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
break;
|
||||
@@ -1153,21 +1192,6 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
|
||||
return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KGDB_KDB
|
||||
/* kdb dmesg command needs access to the syslog buffer. do_syslog()
|
||||
* uses locks so it cannot be used during debugging. Just tell kdb
|
||||
* where the start and end of the physical and logical logs are. This
|
||||
* is equivalent to do_syslog(3).
|
||||
*/
|
||||
void kdb_syslog_data(char *syslog_data[4])
|
||||
{
|
||||
syslog_data[0] = log_buf;
|
||||
syslog_data[1] = log_buf + log_buf_len;
|
||||
syslog_data[2] = log_buf + log_first_idx;
|
||||
syslog_data[3] = log_buf + log_next_idx;
|
||||
}
|
||||
#endif /* CONFIG_KGDB_KDB */
|
||||
|
||||
static bool __read_mostly ignore_loglevel;
|
||||
|
||||
static int __init ignore_loglevel_setup(char *str)
|
||||
@@ -1400,10 +1424,9 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
static char textbuf[LOG_LINE_MAX];
|
||||
char *text = textbuf;
|
||||
size_t text_len;
|
||||
enum log_flags lflags = 0;
|
||||
unsigned long flags;
|
||||
int this_cpu;
|
||||
bool newline = false;
|
||||
bool prefix = false;
|
||||
int printed_len = 0;
|
||||
|
||||
boot_delay_msec();
|
||||
@@ -1442,7 +1465,7 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
recursion_bug = 0;
|
||||
printed_len += strlen(recursion_msg);
|
||||
/* emit KERN_CRIT message */
|
||||
log_store(0, 2, LOG_DEFAULT, 0,
|
||||
log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
|
||||
NULL, 0, recursion_msg, printed_len);
|
||||
}
|
||||
|
||||
@@ -1455,7 +1478,7 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
/* mark and strip a trailing newline */
|
||||
if (text_len && text[text_len-1] == '\n') {
|
||||
text_len--;
|
||||
newline = true;
|
||||
lflags |= LOG_NEWLINE;
|
||||
}
|
||||
|
||||
/* strip syslog prefix and extract log level or control flags */
|
||||
@@ -1465,7 +1488,7 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
if (level == -1)
|
||||
level = text[1] - '0';
|
||||
case 'd': /* KERN_DEFAULT */
|
||||
prefix = true;
|
||||
lflags |= LOG_PREFIX;
|
||||
case 'c': /* KERN_CONT */
|
||||
text += 3;
|
||||
text_len -= 3;
|
||||
@@ -1475,22 +1498,20 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
if (level == -1)
|
||||
level = default_message_loglevel;
|
||||
|
||||
if (dict) {
|
||||
prefix = true;
|
||||
newline = true;
|
||||
}
|
||||
if (dict)
|
||||
lflags |= LOG_PREFIX|LOG_NEWLINE;
|
||||
|
||||
if (!newline) {
|
||||
if (!(lflags & LOG_NEWLINE)) {
|
||||
/*
|
||||
* Flush the conflicting buffer. An earlier newline was missing,
|
||||
* or another task also prints continuation lines.
|
||||
*/
|
||||
if (cont.len && (prefix || cont.owner != current))
|
||||
if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
|
||||
cont_flush();
|
||||
|
||||
/* buffer line if possible, otherwise store it right away */
|
||||
if (!cont_add(facility, level, text, text_len))
|
||||
log_store(facility, level, LOG_DEFAULT, 0,
|
||||
log_store(facility, level, lflags | LOG_CONT, 0,
|
||||
dict, dictlen, text, text_len);
|
||||
} else {
|
||||
bool stored = false;
|
||||
@@ -1502,13 +1523,13 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
* flush it out and store this line separately.
|
||||
*/
|
||||
if (cont.len && cont.owner == current) {
|
||||
if (!prefix)
|
||||
if (!(lflags & LOG_PREFIX))
|
||||
stored = cont_add(facility, level, text, text_len);
|
||||
cont_flush();
|
||||
}
|
||||
|
||||
if (!stored)
|
||||
log_store(facility, level, LOG_DEFAULT, 0,
|
||||
log_store(facility, level, lflags, 0,
|
||||
dict, dictlen, text, text_len);
|
||||
}
|
||||
printed_len += text_len;
|
||||
@@ -1607,8 +1628,8 @@ static struct cont {
|
||||
static struct log *log_from_idx(u32 idx) { return NULL; }
|
||||
static u32 log_next(u32 idx) { return 0; }
|
||||
static void call_console_drivers(int level, const char *text, size_t len) {}
|
||||
static size_t msg_print_text(const struct log *msg, bool syslog,
|
||||
char *buf, size_t size) { return 0; }
|
||||
static size_t msg_print_text(const struct log *msg, enum log_flags prev,
|
||||
bool syslog, char *buf, size_t size) { return 0; }
|
||||
static size_t cont_print_text(char *text, size_t size) { return 0; }
|
||||
|
||||
#endif /* CONFIG_PRINTK */
|
||||
@@ -1884,6 +1905,7 @@ void wake_up_klogd(void)
|
||||
/* the next printk record to write to the console */
|
||||
static u64 console_seq;
|
||||
static u32 console_idx;
|
||||
static enum log_flags console_prev;
|
||||
|
||||
/**
|
||||
* console_unlock - unlock the console system
|
||||
@@ -1944,6 +1966,7 @@ again:
|
||||
/* messages are gone, move to first one */
|
||||
console_seq = log_first_seq;
|
||||
console_idx = log_first_idx;
|
||||
console_prev = 0;
|
||||
}
|
||||
skip:
|
||||
if (console_seq == log_next_seq)
|
||||
@@ -1957,14 +1980,21 @@ skip:
|
||||
*/
|
||||
console_idx = log_next(console_idx);
|
||||
console_seq++;
|
||||
/*
|
||||
* We will get here again when we register a new
|
||||
* CON_PRINTBUFFER console. Clear the flag so we
|
||||
* will properly dump everything later.
|
||||
*/
|
||||
msg->flags &= ~LOG_NOCONS;
|
||||
goto skip;
|
||||
}
|
||||
|
||||
level = msg->level;
|
||||
len = msg_print_text(msg, false, text, sizeof(text));
|
||||
|
||||
len = msg_print_text(msg, console_prev, false,
|
||||
text, sizeof(text));
|
||||
console_idx = log_next(console_idx);
|
||||
console_seq++;
|
||||
console_prev = msg->flags;
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
|
||||
stop_critical_timings(); /* don't trace print latency */
|
||||
@@ -2227,6 +2257,7 @@ void register_console(struct console *newcon)
|
||||
raw_spin_lock_irqsave(&logbuf_lock, flags);
|
||||
console_seq = syslog_seq;
|
||||
console_idx = syslog_idx;
|
||||
console_prev = syslog_prev;
|
||||
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
/*
|
||||
* We're about to replay the log buffer. Only do this to the
|
||||
@@ -2478,6 +2509,57 @@ void kmsg_dump(enum kmsg_dump_reason reason)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version)
|
||||
* @dumper: registered kmsg dumper
|
||||
* @syslog: include the "<4>" prefixes
|
||||
* @line: buffer to copy the line to
|
||||
* @size: maximum size of the buffer
|
||||
* @len: length of line placed into buffer
|
||||
*
|
||||
* Start at the beginning of the kmsg buffer, with the oldest kmsg
|
||||
* record, and copy one record into the provided buffer.
|
||||
*
|
||||
* Consecutive calls will return the next available record moving
|
||||
* towards the end of the buffer with the youngest messages.
|
||||
*
|
||||
* A return value of FALSE indicates that there are no more records to
|
||||
* read.
|
||||
*
|
||||
* The function is similar to kmsg_dump_get_line(), but grabs no locks.
|
||||
*/
|
||||
bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
|
||||
char *line, size_t size, size_t *len)
|
||||
{
|
||||
struct log *msg;
|
||||
size_t l = 0;
|
||||
bool ret = false;
|
||||
|
||||
if (!dumper->active)
|
||||
goto out;
|
||||
|
||||
if (dumper->cur_seq < log_first_seq) {
|
||||
/* messages are gone, move to first available one */
|
||||
dumper->cur_seq = log_first_seq;
|
||||
dumper->cur_idx = log_first_idx;
|
||||
}
|
||||
|
||||
/* last entry */
|
||||
if (dumper->cur_seq >= log_next_seq)
|
||||
goto out;
|
||||
|
||||
msg = log_from_idx(dumper->cur_idx);
|
||||
l = msg_print_text(msg, 0, syslog, line, size);
|
||||
|
||||
dumper->cur_idx = log_next(dumper->cur_idx);
|
||||
dumper->cur_seq++;
|
||||
ret = true;
|
||||
out:
|
||||
if (len)
|
||||
*len = l;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* kmsg_dump_get_line - retrieve one kmsg log line
|
||||
* @dumper: registered kmsg dumper
|
||||
@@ -2499,37 +2581,12 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
|
||||
char *line, size_t size, size_t *len)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct log *msg;
|
||||
size_t l = 0;
|
||||
bool ret = false;
|
||||
|
||||
if (!dumper->active)
|
||||
goto out;
|
||||
bool ret;
|
||||
|
||||
raw_spin_lock_irqsave(&logbuf_lock, flags);
|
||||
if (dumper->cur_seq < log_first_seq) {
|
||||
/* messages are gone, move to first available one */
|
||||
dumper->cur_seq = log_first_seq;
|
||||
dumper->cur_idx = log_first_idx;
|
||||
}
|
||||
|
||||
/* last entry */
|
||||
if (dumper->cur_seq >= log_next_seq) {
|
||||
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
msg = log_from_idx(dumper->cur_idx);
|
||||
l = msg_print_text(msg, syslog,
|
||||
line, size);
|
||||
|
||||
dumper->cur_idx = log_next(dumper->cur_idx);
|
||||
dumper->cur_seq++;
|
||||
ret = true;
|
||||
ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
|
||||
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
out:
|
||||
if (len)
|
||||
*len = l;
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
|
||||
@@ -2561,6 +2618,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
|
||||
u32 idx;
|
||||
u64 next_seq;
|
||||
u32 next_idx;
|
||||
enum log_flags prev;
|
||||
size_t l = 0;
|
||||
bool ret = false;
|
||||
|
||||
@@ -2583,23 +2641,27 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
|
||||
/* calculate length of entire buffer */
|
||||
seq = dumper->cur_seq;
|
||||
idx = dumper->cur_idx;
|
||||
prev = 0;
|
||||
while (seq < dumper->next_seq) {
|
||||
struct log *msg = log_from_idx(idx);
|
||||
|
||||
l += msg_print_text(msg, true, NULL, 0);
|
||||
l += msg_print_text(msg, prev, true, NULL, 0);
|
||||
idx = log_next(idx);
|
||||
seq++;
|
||||
prev = msg->flags;
|
||||
}
|
||||
|
||||
/* move first record forward until length fits into the buffer */
|
||||
seq = dumper->cur_seq;
|
||||
idx = dumper->cur_idx;
|
||||
prev = 0;
|
||||
while (l > size && seq < dumper->next_seq) {
|
||||
struct log *msg = log_from_idx(idx);
|
||||
|
||||
l -= msg_print_text(msg, true, NULL, 0);
|
||||
l -= msg_print_text(msg, prev, true, NULL, 0);
|
||||
idx = log_next(idx);
|
||||
seq++;
|
||||
prev = msg->flags;
|
||||
}
|
||||
|
||||
/* last message in next interation */
|
||||
@@ -2607,14 +2669,14 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
|
||||
next_idx = idx;
|
||||
|
||||
l = 0;
|
||||
prev = 0;
|
||||
while (seq < dumper->next_seq) {
|
||||
struct log *msg = log_from_idx(idx);
|
||||
|
||||
l += msg_print_text(msg, syslog,
|
||||
buf + l, size - l);
|
||||
|
||||
l += msg_print_text(msg, prev, syslog, buf + l, size - l);
|
||||
idx = log_next(idx);
|
||||
seq++;
|
||||
prev = msg->flags;
|
||||
}
|
||||
|
||||
dumper->next_seq = next_seq;
|
||||
@@ -2628,6 +2690,24 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
|
||||
|
||||
/**
|
||||
* kmsg_dump_rewind_nolock - reset the interator (unlocked version)
|
||||
* @dumper: registered kmsg dumper
|
||||
*
|
||||
* Reset the dumper's iterator so that kmsg_dump_get_line() and
|
||||
* kmsg_dump_get_buffer() can be called again and used multiple
|
||||
* times within the same dumper.dump() callback.
|
||||
*
|
||||
* The function is similar to kmsg_dump_rewind(), but grabs no locks.
|
||||
*/
|
||||
void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
|
||||
{
|
||||
dumper->cur_seq = clear_seq;
|
||||
dumper->cur_idx = clear_idx;
|
||||
dumper->next_seq = log_next_seq;
|
||||
dumper->next_idx = log_next_idx;
|
||||
}
|
||||
|
||||
/**
|
||||
* kmsg_dump_rewind - reset the interator
|
||||
* @dumper: registered kmsg dumper
|
||||
@@ -2641,10 +2721,7 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper)
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&logbuf_lock, flags);
|
||||
dumper->cur_seq = clear_seq;
|
||||
dumper->cur_idx = clear_idx;
|
||||
dumper->next_seq = log_next_seq;
|
||||
dumper->next_idx = log_next_idx;
|
||||
kmsg_dump_rewind_nolock(dumper);
|
||||
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
|
||||
|
@@ -53,6 +53,50 @@
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
|
||||
/*
|
||||
* Preemptible RCU implementation for rcu_read_lock().
|
||||
* Just increment ->rcu_read_lock_nesting, shared state will be updated
|
||||
* if we block.
|
||||
*/
|
||||
void __rcu_read_lock(void)
|
||||
{
|
||||
current->rcu_read_lock_nesting++;
|
||||
barrier(); /* critical section after entry code. */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rcu_read_lock);
|
||||
|
||||
/*
|
||||
* Preemptible RCU implementation for rcu_read_unlock().
|
||||
* Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
|
||||
* rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
|
||||
* invoke rcu_read_unlock_special() to clean up after a context switch
|
||||
* in an RCU read-side critical section and other special cases.
|
||||
*/
|
||||
void __rcu_read_unlock(void)
|
||||
{
|
||||
struct task_struct *t = current;
|
||||
|
||||
if (t->rcu_read_lock_nesting != 1) {
|
||||
--t->rcu_read_lock_nesting;
|
||||
} else {
|
||||
barrier(); /* critical section before exit code. */
|
||||
t->rcu_read_lock_nesting = INT_MIN;
|
||||
barrier(); /* assign before ->rcu_read_unlock_special load */
|
||||
if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
|
||||
rcu_read_unlock_special(t);
|
||||
barrier(); /* ->rcu_read_unlock_special load before assign */
|
||||
t->rcu_read_lock_nesting = 0;
|
||||
}
|
||||
#ifdef CONFIG_PROVE_LOCKING
|
||||
{
|
||||
int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
|
||||
|
||||
WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
|
||||
}
|
||||
#endif /* #ifdef CONFIG_PROVE_LOCKING */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
|
||||
|
||||
/*
|
||||
* Check for a task exiting while in a preemptible-RCU read-side
|
||||
* critical section, clean up if so. No need to issue warnings,
|
||||
|
@@ -172,7 +172,7 @@ void rcu_irq_enter(void)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
|
||||
/*
|
||||
* Test whether RCU thinks that the current CPU is idle.
|
||||
@@ -183,7 +183,7 @@ int rcu_is_cpu_idle(void)
|
||||
}
|
||||
EXPORT_SYMBOL(rcu_is_cpu_idle);
|
||||
|
||||
#endif /* #ifdef CONFIG_PROVE_RCU */
|
||||
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
||||
|
||||
/*
|
||||
* Test whether the current CPU was interrupted from idle. Nested
|
||||
|
@@ -132,7 +132,6 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
|
||||
RCU_TRACE(.rcb.name = "rcu_preempt")
|
||||
};
|
||||
|
||||
static void rcu_read_unlock_special(struct task_struct *t);
|
||||
static int rcu_preempted_readers_exp(void);
|
||||
static void rcu_report_exp_done(void);
|
||||
|
||||
@@ -351,8 +350,9 @@ static int rcu_initiate_boost(void)
|
||||
rcu_preempt_ctrlblk.boost_tasks =
|
||||
rcu_preempt_ctrlblk.gp_tasks;
|
||||
invoke_rcu_callbacks();
|
||||
} else
|
||||
} else {
|
||||
RCU_TRACE(rcu_initiate_boost_trace());
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -526,24 +526,12 @@ void rcu_preempt_note_context_switch(void)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tiny-preemptible RCU implementation for rcu_read_lock().
|
||||
* Just increment ->rcu_read_lock_nesting, shared state will be updated
|
||||
* if we block.
|
||||
*/
|
||||
void __rcu_read_lock(void)
|
||||
{
|
||||
current->rcu_read_lock_nesting++;
|
||||
barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rcu_read_lock);
|
||||
|
||||
/*
|
||||
* Handle special cases during rcu_read_unlock(), such as needing to
|
||||
* notify RCU core processing or task having blocked during the RCU
|
||||
* read-side critical section.
|
||||
*/
|
||||
static noinline void rcu_read_unlock_special(struct task_struct *t)
|
||||
void rcu_read_unlock_special(struct task_struct *t)
|
||||
{
|
||||
int empty;
|
||||
int empty_exp;
|
||||
@@ -626,38 +614,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tiny-preemptible RCU implementation for rcu_read_unlock().
|
||||
* Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
|
||||
* rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
|
||||
* invoke rcu_read_unlock_special() to clean up after a context switch
|
||||
* in an RCU read-side critical section and other special cases.
|
||||
*/
|
||||
void __rcu_read_unlock(void)
|
||||
{
|
||||
struct task_struct *t = current;
|
||||
|
||||
barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
|
||||
if (t->rcu_read_lock_nesting != 1)
|
||||
--t->rcu_read_lock_nesting;
|
||||
else {
|
||||
t->rcu_read_lock_nesting = INT_MIN;
|
||||
barrier(); /* assign before ->rcu_read_unlock_special load */
|
||||
if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
|
||||
rcu_read_unlock_special(t);
|
||||
barrier(); /* ->rcu_read_unlock_special load before assign */
|
||||
t->rcu_read_lock_nesting = 0;
|
||||
}
|
||||
#ifdef CONFIG_PROVE_LOCKING
|
||||
{
|
||||
int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
|
||||
|
||||
WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
|
||||
}
|
||||
#endif /* #ifdef CONFIG_PROVE_LOCKING */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
|
||||
|
||||
/*
|
||||
* Check for a quiescent state from the current CPU. When a task blocks,
|
||||
* the task is recorded in the rcu_preempt_ctrlblk structure, which is
|
||||
@@ -823,9 +779,9 @@ void synchronize_rcu_expedited(void)
|
||||
rpcp->exp_tasks = NULL;
|
||||
|
||||
/* Wait for tail of ->blkd_tasks list to drain. */
|
||||
if (!rcu_preempted_readers_exp())
|
||||
if (!rcu_preempted_readers_exp()) {
|
||||
local_irq_restore(flags);
|
||||
else {
|
||||
} else {
|
||||
rcu_initiate_boost();
|
||||
local_irq_restore(flags);
|
||||
wait_event(sync_rcu_preempt_exp_wq,
|
||||
@@ -846,8 +802,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
|
||||
*/
|
||||
int rcu_preempt_needs_cpu(void)
|
||||
{
|
||||
if (!rcu_preempt_running_reader())
|
||||
rcu_preempt_cpu_qs();
|
||||
return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
|
||||
}
|
||||
|
||||
|
@@ -49,8 +49,7 @@
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
|
||||
"Josh Triplett <josh@freedesktop.org>");
|
||||
MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>");
|
||||
|
||||
static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */
|
||||
static int nfakewriters = 4; /* # fake writer threads */
|
||||
@@ -206,6 +205,7 @@ static unsigned long boost_starttime; /* jiffies of next boost test start. */
|
||||
DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
|
||||
/* and boost task create/destroy. */
|
||||
static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
|
||||
static bool barrier_phase; /* Test phase. */
|
||||
static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */
|
||||
static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */
|
||||
static DECLARE_WAIT_QUEUE_HEAD(barrier_wq);
|
||||
@@ -407,8 +407,9 @@ rcu_torture_cb(struct rcu_head *p)
|
||||
if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) {
|
||||
rp->rtort_mbtest = 0;
|
||||
rcu_torture_free(rp);
|
||||
} else
|
||||
} else {
|
||||
cur_ops->deferred_free(rp);
|
||||
}
|
||||
}
|
||||
|
||||
static int rcu_no_completed(void)
|
||||
@@ -635,6 +636,17 @@ static void srcu_torture_synchronize(void)
|
||||
synchronize_srcu(&srcu_ctl);
|
||||
}
|
||||
|
||||
static void srcu_torture_call(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *head))
|
||||
{
|
||||
call_srcu(&srcu_ctl, head, func);
|
||||
}
|
||||
|
||||
static void srcu_torture_barrier(void)
|
||||
{
|
||||
srcu_barrier(&srcu_ctl);
|
||||
}
|
||||
|
||||
static int srcu_torture_stats(char *page)
|
||||
{
|
||||
int cnt = 0;
|
||||
@@ -661,8 +673,8 @@ static struct rcu_torture_ops srcu_ops = {
|
||||
.completed = srcu_torture_completed,
|
||||
.deferred_free = srcu_torture_deferred_free,
|
||||
.sync = srcu_torture_synchronize,
|
||||
.call = NULL,
|
||||
.cb_barrier = NULL,
|
||||
.call = srcu_torture_call,
|
||||
.cb_barrier = srcu_torture_barrier,
|
||||
.stats = srcu_torture_stats,
|
||||
.name = "srcu"
|
||||
};
|
||||
@@ -1013,7 +1025,11 @@ rcu_torture_fakewriter(void *arg)
|
||||
do {
|
||||
schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
|
||||
udelay(rcu_random(&rand) & 0x3ff);
|
||||
cur_ops->sync();
|
||||
if (cur_ops->cb_barrier != NULL &&
|
||||
rcu_random(&rand) % (nfakewriters * 8) == 0)
|
||||
cur_ops->cb_barrier();
|
||||
else
|
||||
cur_ops->sync();
|
||||
rcu_stutter_wait("rcu_torture_fakewriter");
|
||||
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
|
||||
|
||||
@@ -1183,27 +1199,27 @@ rcu_torture_printk(char *page)
|
||||
}
|
||||
cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
|
||||
cnt += sprintf(&page[cnt],
|
||||
"rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
|
||||
"rtmbe: %d rtbke: %ld rtbre: %ld "
|
||||
"rtbf: %ld rtb: %ld nt: %ld "
|
||||
"onoff: %ld/%ld:%ld/%ld "
|
||||
"barrier: %ld/%ld:%ld",
|
||||
"rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
|
||||
rcu_torture_current,
|
||||
rcu_torture_current_version,
|
||||
list_empty(&rcu_torture_freelist),
|
||||
atomic_read(&n_rcu_torture_alloc),
|
||||
atomic_read(&n_rcu_torture_alloc_fail),
|
||||
atomic_read(&n_rcu_torture_free),
|
||||
atomic_read(&n_rcu_torture_free));
|
||||
cnt += sprintf(&page[cnt], "rtmbe: %d rtbke: %ld rtbre: %ld ",
|
||||
atomic_read(&n_rcu_torture_mberror),
|
||||
n_rcu_torture_boost_ktrerror,
|
||||
n_rcu_torture_boost_rterror,
|
||||
n_rcu_torture_boost_rterror);
|
||||
cnt += sprintf(&page[cnt], "rtbf: %ld rtb: %ld nt: %ld ",
|
||||
n_rcu_torture_boost_failure,
|
||||
n_rcu_torture_boosts,
|
||||
n_rcu_torture_timers,
|
||||
n_rcu_torture_timers);
|
||||
cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ",
|
||||
n_online_successes,
|
||||
n_online_attempts,
|
||||
n_offline_successes,
|
||||
n_offline_attempts,
|
||||
n_offline_attempts);
|
||||
cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld",
|
||||
n_barrier_successes,
|
||||
n_barrier_attempts,
|
||||
n_rcu_torture_barrier_error);
|
||||
@@ -1445,8 +1461,7 @@ rcu_torture_shutdown(void *arg)
|
||||
delta = shutdown_time - jiffies_snap;
|
||||
if (verbose)
|
||||
printk(KERN_ALERT "%s" TORTURE_FLAG
|
||||
"rcu_torture_shutdown task: %lu "
|
||||
"jiffies remaining\n",
|
||||
"rcu_torture_shutdown task: %lu jiffies remaining\n",
|
||||
torture_type, delta);
|
||||
schedule_timeout_interruptible(delta);
|
||||
jiffies_snap = ACCESS_ONCE(jiffies);
|
||||
@@ -1498,8 +1513,7 @@ rcu_torture_onoff(void *arg)
|
||||
if (cpu_down(cpu) == 0) {
|
||||
if (verbose)
|
||||
printk(KERN_ALERT "%s" TORTURE_FLAG
|
||||
"rcu_torture_onoff task: "
|
||||
"offlined %d\n",
|
||||
"rcu_torture_onoff task: offlined %d\n",
|
||||
torture_type, cpu);
|
||||
n_offline_successes++;
|
||||
}
|
||||
@@ -1512,8 +1526,7 @@ rcu_torture_onoff(void *arg)
|
||||
if (cpu_up(cpu) == 0) {
|
||||
if (verbose)
|
||||
printk(KERN_ALERT "%s" TORTURE_FLAG
|
||||
"rcu_torture_onoff task: "
|
||||
"onlined %d\n",
|
||||
"rcu_torture_onoff task: onlined %d\n",
|
||||
torture_type, cpu);
|
||||
n_online_successes++;
|
||||
}
|
||||
@@ -1631,6 +1644,7 @@ void rcu_torture_barrier_cbf(struct rcu_head *rcu)
|
||||
static int rcu_torture_barrier_cbs(void *arg)
|
||||
{
|
||||
long myid = (long)arg;
|
||||
bool lastphase = 0;
|
||||
struct rcu_head rcu;
|
||||
|
||||
init_rcu_head_on_stack(&rcu);
|
||||
@@ -1638,9 +1652,11 @@ static int rcu_torture_barrier_cbs(void *arg)
|
||||
set_user_nice(current, 19);
|
||||
do {
|
||||
wait_event(barrier_cbs_wq[myid],
|
||||
atomic_read(&barrier_cbs_count) == n_barrier_cbs ||
|
||||
barrier_phase != lastphase ||
|
||||
kthread_should_stop() ||
|
||||
fullstop != FULLSTOP_DONTSTOP);
|
||||
lastphase = barrier_phase;
|
||||
smp_mb(); /* ensure barrier_phase load before ->call(). */
|
||||
if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP)
|
||||
break;
|
||||
cur_ops->call(&rcu, rcu_torture_barrier_cbf);
|
||||
@@ -1665,7 +1681,8 @@ static int rcu_torture_barrier(void *arg)
|
||||
do {
|
||||
atomic_set(&barrier_cbs_invoked, 0);
|
||||
atomic_set(&barrier_cbs_count, n_barrier_cbs);
|
||||
/* wake_up() path contains the required barriers. */
|
||||
smp_mb(); /* Ensure barrier_phase after prior assignments. */
|
||||
barrier_phase = !barrier_phase;
|
||||
for (i = 0; i < n_barrier_cbs; i++)
|
||||
wake_up(&barrier_cbs_wq[i]);
|
||||
wait_event(barrier_wq,
|
||||
@@ -1684,7 +1701,7 @@ static int rcu_torture_barrier(void *arg)
|
||||
schedule_timeout_interruptible(HZ / 10);
|
||||
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
|
||||
VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping");
|
||||
rcutorture_shutdown_absorb("rcu_torture_barrier_cbs");
|
||||
rcutorture_shutdown_absorb("rcu_torture_barrier");
|
||||
while (!kthread_should_stop())
|
||||
schedule_timeout_interruptible(1);
|
||||
return 0;
|
||||
@@ -1908,8 +1925,8 @@ rcu_torture_init(void)
|
||||
static struct rcu_torture_ops *torture_ops[] =
|
||||
{ &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
|
||||
&rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
|
||||
&srcu_ops, &srcu_sync_ops, &srcu_raw_ops,
|
||||
&srcu_raw_sync_ops, &srcu_expedited_ops,
|
||||
&srcu_ops, &srcu_sync_ops, &srcu_expedited_ops,
|
||||
&srcu_raw_ops, &srcu_raw_sync_ops,
|
||||
&sched_ops, &sched_sync_ops, &sched_expedited_ops, };
|
||||
|
||||
mutex_lock(&fullstop_mutex);
|
||||
@@ -1931,8 +1948,7 @@ rcu_torture_init(void)
|
||||
return -EINVAL;
|
||||
}
|
||||
if (cur_ops->fqs == NULL && fqs_duration != 0) {
|
||||
printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero "
|
||||
"fqs_duration, fqs disabled.\n");
|
||||
printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n");
|
||||
fqs_duration = 0;
|
||||
}
|
||||
if (cur_ops->init)
|
||||
|
479
kernel/rcutree.c
479
kernel/rcutree.c
@@ -60,36 +60,44 @@
|
||||
|
||||
/* Data structures. */
|
||||
|
||||
static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
|
||||
static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
|
||||
|
||||
#define RCU_STATE_INITIALIZER(structname) { \
|
||||
.level = { &structname##_state.node[0] }, \
|
||||
.levelcnt = { \
|
||||
NUM_RCU_LVL_0, /* root of hierarchy. */ \
|
||||
NUM_RCU_LVL_1, \
|
||||
NUM_RCU_LVL_2, \
|
||||
NUM_RCU_LVL_3, \
|
||||
NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
|
||||
}, \
|
||||
#define RCU_STATE_INITIALIZER(sname, cr) { \
|
||||
.level = { &sname##_state.node[0] }, \
|
||||
.call = cr, \
|
||||
.fqs_state = RCU_GP_IDLE, \
|
||||
.gpnum = -300, \
|
||||
.completed = -300, \
|
||||
.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
|
||||
.orphan_nxttail = &structname##_state.orphan_nxtlist, \
|
||||
.orphan_donetail = &structname##_state.orphan_donelist, \
|
||||
.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \
|
||||
.n_force_qs = 0, \
|
||||
.n_force_qs_ngp = 0, \
|
||||
.name = #structname, \
|
||||
.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \
|
||||
.orphan_nxttail = &sname##_state.orphan_nxtlist, \
|
||||
.orphan_donetail = &sname##_state.orphan_donelist, \
|
||||
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
|
||||
.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \
|
||||
.name = #sname, \
|
||||
}
|
||||
|
||||
struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched);
|
||||
struct rcu_state rcu_sched_state =
|
||||
RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
|
||||
|
||||
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh);
|
||||
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
|
||||
|
||||
static struct rcu_state *rcu_state;
|
||||
LIST_HEAD(rcu_struct_flavors);
|
||||
|
||||
/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
|
||||
static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
|
||||
module_param(rcu_fanout_leaf, int, 0);
|
||||
int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
|
||||
static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
|
||||
NUM_RCU_LVL_0,
|
||||
NUM_RCU_LVL_1,
|
||||
NUM_RCU_LVL_2,
|
||||
NUM_RCU_LVL_3,
|
||||
NUM_RCU_LVL_4,
|
||||
};
|
||||
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
|
||||
|
||||
/*
|
||||
* The rcu_scheduler_active variable transitions from zero to one just
|
||||
@@ -147,13 +155,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
|
||||
unsigned long rcutorture_testseq;
|
||||
unsigned long rcutorture_vernum;
|
||||
|
||||
/* State information for rcu_barrier() and friends. */
|
||||
|
||||
static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
|
||||
static atomic_t rcu_barrier_cpu_count;
|
||||
static DEFINE_MUTEX(rcu_barrier_mutex);
|
||||
static struct completion rcu_barrier_completion;
|
||||
|
||||
/*
|
||||
* Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
|
||||
* permit this function to be invoked without holding the root rcu_node
|
||||
@@ -201,6 +202,7 @@ void rcu_note_context_switch(int cpu)
|
||||
{
|
||||
trace_rcu_utilization("Start context switch");
|
||||
rcu_sched_qs(cpu);
|
||||
rcu_preempt_note_context_switch(cpu);
|
||||
trace_rcu_utilization("End context switch");
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
|
||||
@@ -357,7 +359,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
|
||||
struct task_struct *idle = idle_task(smp_processor_id());
|
||||
|
||||
trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
|
||||
ftrace_dump(DUMP_ALL);
|
||||
ftrace_dump(DUMP_ORIG);
|
||||
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
|
||||
current->pid, current->comm,
|
||||
idle->pid, idle->comm); /* must be idle task! */
|
||||
@@ -467,7 +469,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
|
||||
|
||||
trace_rcu_dyntick("Error on exit: not idle task",
|
||||
oldval, rdtp->dynticks_nesting);
|
||||
ftrace_dump(DUMP_ALL);
|
||||
ftrace_dump(DUMP_ORIG);
|
||||
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
|
||||
current->pid, current->comm,
|
||||
idle->pid, idle->comm); /* must be idle task! */
|
||||
@@ -584,8 +586,6 @@ void rcu_nmi_exit(void)
|
||||
WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
|
||||
/**
|
||||
* rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
|
||||
*
|
||||
@@ -603,7 +603,7 @@ int rcu_is_cpu_idle(void)
|
||||
}
|
||||
EXPORT_SYMBOL(rcu_is_cpu_idle);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
|
||||
|
||||
/*
|
||||
* Is the current CPU online? Disable preemption to avoid false positives
|
||||
@@ -644,9 +644,7 @@ bool rcu_lockdep_current_cpu_online(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
|
||||
|
||||
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
|
||||
|
||||
#endif /* #ifdef CONFIG_PROVE_RCU */
|
||||
#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
|
||||
|
||||
/**
|
||||
* rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
|
||||
@@ -732,7 +730,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
|
||||
int cpu;
|
||||
long delta;
|
||||
unsigned long flags;
|
||||
int ndetected;
|
||||
int ndetected = 0;
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
|
||||
/* Only let one CPU complain about others per time interval. */
|
||||
@@ -773,7 +771,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
|
||||
*/
|
||||
rnp = rcu_get_root(rsp);
|
||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||
ndetected = rcu_print_task_stall(rnp);
|
||||
ndetected += rcu_print_task_stall(rnp);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
|
||||
print_cpu_stall_info_end();
|
||||
@@ -859,9 +857,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
|
||||
*/
|
||||
void rcu_cpu_stall_reset(void)
|
||||
{
|
||||
rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
|
||||
rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
|
||||
rcu_preempt_stall_reset();
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp)
|
||||
rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
|
||||
}
|
||||
|
||||
static struct notifier_block rcu_panic_block = {
|
||||
@@ -893,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct
|
||||
if (rnp->qsmask & rdp->grpmask) {
|
||||
rdp->qs_pending = 1;
|
||||
rdp->passed_quiesce = 0;
|
||||
} else
|
||||
} else {
|
||||
rdp->qs_pending = 0;
|
||||
}
|
||||
zero_cpu_stall_ticks(rdp);
|
||||
}
|
||||
}
|
||||
@@ -935,6 +935,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the specified rcu_data structure's callback list to empty.
|
||||
*/
|
||||
static void init_callback_list(struct rcu_data *rdp)
|
||||
{
|
||||
int i;
|
||||
|
||||
rdp->nxtlist = NULL;
|
||||
for (i = 0; i < RCU_NEXT_SIZE; i++)
|
||||
rdp->nxttail[i] = &rdp->nxtlist;
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance this CPU's callbacks, but only if the current grace period
|
||||
* has ended. This may be called only from the CPU to whom the rdp
|
||||
@@ -1327,8 +1339,6 @@ static void
|
||||
rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
|
||||
struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
{
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Orphan the callbacks. First adjust the counts. This is safe
|
||||
* because ->onofflock excludes _rcu_barrier()'s adoption of
|
||||
@@ -1339,7 +1349,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
|
||||
rsp->qlen += rdp->qlen;
|
||||
rdp->n_cbs_orphaned += rdp->qlen;
|
||||
rdp->qlen_lazy = 0;
|
||||
rdp->qlen = 0;
|
||||
ACCESS_ONCE(rdp->qlen) = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1368,9 +1378,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
|
||||
}
|
||||
|
||||
/* Finally, initialize the rcu_data structure's list to empty. */
|
||||
rdp->nxtlist = NULL;
|
||||
for (i = 0; i < RCU_NEXT_SIZE; i++)
|
||||
rdp->nxttail[i] = &rdp->nxtlist;
|
||||
init_callback_list(rdp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1504,6 +1512,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
if (need_report & RCU_OFL_TASKS_EXP_GP)
|
||||
rcu_report_exp_rnp(rsp, rnp, true);
|
||||
WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
|
||||
"rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
|
||||
cpu, rdp->qlen, rdp->nxtlist);
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_HOTPLUG_CPU */
|
||||
@@ -1591,7 +1602,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
}
|
||||
smp_mb(); /* List handling before counting for rcu_barrier(). */
|
||||
rdp->qlen_lazy -= count_lazy;
|
||||
rdp->qlen -= count;
|
||||
ACCESS_ONCE(rdp->qlen) -= count;
|
||||
rdp->n_cbs_invoked += count;
|
||||
|
||||
/* Reinstate batch limit if we have worked down the excess. */
|
||||
@@ -1604,6 +1615,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
rdp->n_force_qs_snap = rsp->n_force_qs;
|
||||
} else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
|
||||
rdp->qlen_last_fqs_check = rdp->qlen;
|
||||
WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
@@ -1744,8 +1756,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
|
||||
break; /* grace period idle or initializing, ignore. */
|
||||
|
||||
case RCU_SAVE_DYNTICK:
|
||||
if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
|
||||
break; /* So gcc recognizes the dead code. */
|
||||
|
||||
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
|
||||
|
||||
@@ -1787,9 +1797,10 @@ unlock_fqs_ret:
|
||||
* whom the rdp belongs.
|
||||
*/
|
||||
static void
|
||||
__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
__rcu_process_callbacks(struct rcu_state *rsp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
|
||||
|
||||
WARN_ON_ONCE(rdp->beenonline == 0);
|
||||
|
||||
@@ -1825,11 +1836,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
*/
|
||||
static void rcu_process_callbacks(struct softirq_action *unused)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
|
||||
trace_rcu_utilization("Start RCU core");
|
||||
__rcu_process_callbacks(&rcu_sched_state,
|
||||
&__get_cpu_var(rcu_sched_data));
|
||||
__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
|
||||
rcu_preempt_process_callbacks();
|
||||
for_each_rcu_flavor(rsp)
|
||||
__rcu_process_callbacks(rsp);
|
||||
trace_rcu_utilization("End RCU core");
|
||||
}
|
||||
|
||||
@@ -1856,50 +1867,22 @@ static void invoke_rcu_core(void)
|
||||
raise_softirq(RCU_SOFTIRQ);
|
||||
}
|
||||
|
||||
static void
|
||||
__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
|
||||
struct rcu_state *rsp, bool lazy)
|
||||
/*
|
||||
* Handle any core-RCU processing required by a call_rcu() invocation.
|
||||
*/
|
||||
static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
|
||||
struct rcu_head *head, unsigned long flags)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
|
||||
debug_rcu_head_queue(head);
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
|
||||
smp_mb(); /* Ensure RCU update seen before callback registry. */
|
||||
|
||||
/*
|
||||
* Opportunistically note grace-period endings and beginnings.
|
||||
* Note that we might see a beginning right after we see an
|
||||
* end, but never vice versa, since this CPU has to pass through
|
||||
* a quiescent state betweentimes.
|
||||
* If called from an extended quiescent state, invoke the RCU
|
||||
* core in order to force a re-evaluation of RCU's idleness.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
rdp = this_cpu_ptr(rsp->rda);
|
||||
if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
|
||||
invoke_rcu_core();
|
||||
|
||||
/* Add the callback to our list. */
|
||||
rdp->qlen++;
|
||||
if (lazy)
|
||||
rdp->qlen_lazy++;
|
||||
else
|
||||
rcu_idle_count_callbacks_posted();
|
||||
smp_mb(); /* Count before adding callback for rcu_barrier(). */
|
||||
*rdp->nxttail[RCU_NEXT_TAIL] = head;
|
||||
rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
|
||||
|
||||
if (__is_kfree_rcu_offset((unsigned long)func))
|
||||
trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
|
||||
rdp->qlen_lazy, rdp->qlen);
|
||||
else
|
||||
trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
|
||||
|
||||
/* If interrupts were disabled, don't dive into RCU core. */
|
||||
if (irqs_disabled_flags(flags)) {
|
||||
local_irq_restore(flags);
|
||||
/* If interrupts were disabled or CPU offline, don't invoke RCU core. */
|
||||
if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Force the grace period if too many callbacks or too long waiting.
|
||||
@@ -1932,6 +1915,49 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
|
||||
}
|
||||
} else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
|
||||
force_quiescent_state(rsp, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
|
||||
struct rcu_state *rsp, bool lazy)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
|
||||
debug_rcu_head_queue(head);
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
|
||||
smp_mb(); /* Ensure RCU update seen before callback registry. */
|
||||
|
||||
/*
|
||||
* Opportunistically note grace-period endings and beginnings.
|
||||
* Note that we might see a beginning right after we see an
|
||||
* end, but never vice versa, since this CPU has to pass through
|
||||
* a quiescent state betweentimes.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
rdp = this_cpu_ptr(rsp->rda);
|
||||
|
||||
/* Add the callback to our list. */
|
||||
ACCESS_ONCE(rdp->qlen)++;
|
||||
if (lazy)
|
||||
rdp->qlen_lazy++;
|
||||
else
|
||||
rcu_idle_count_callbacks_posted();
|
||||
smp_mb(); /* Count before adding callback for rcu_barrier(). */
|
||||
*rdp->nxttail[RCU_NEXT_TAIL] = head;
|
||||
rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
|
||||
|
||||
if (__is_kfree_rcu_offset((unsigned long)func))
|
||||
trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
|
||||
rdp->qlen_lazy, rdp->qlen);
|
||||
else
|
||||
trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
|
||||
|
||||
/* Go handle any RCU core processing required. */
|
||||
__call_rcu_core(rsp, rdp, head, flags);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
@@ -1961,28 +1987,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
|
||||
* occasionally incorrectly indicate that there are multiple CPUs online
|
||||
* when there was in fact only one the whole time, as this just adds
|
||||
* some overhead: RCU still operates correctly.
|
||||
*
|
||||
* Of course, sampling num_online_cpus() with preemption enabled can
|
||||
* give erroneous results if there are concurrent CPU-hotplug operations.
|
||||
* For example, given a demonic sequence of preemptions in num_online_cpus()
|
||||
* and CPU-hotplug operations, there could be two or more CPUs online at
|
||||
* all times, but num_online_cpus() might well return one (or even zero).
|
||||
*
|
||||
* However, all such demonic sequences require at least one CPU-offline
|
||||
* operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer
|
||||
* is only a problem if there is an RCU read-side critical section executing
|
||||
* throughout. But RCU-sched and RCU-bh read-side critical sections
|
||||
* disable either preemption or bh, which prevents a CPU from going offline.
|
||||
* Therefore, the only way that rcu_blocking_is_gp() can incorrectly return
|
||||
* that there is only one CPU when in fact there was more than one throughout
|
||||
* is when there were no RCU readers in the system. If there are no
|
||||
* RCU readers, the grace period by definition can be of zero length,
|
||||
* regardless of the number of online CPUs.
|
||||
*/
|
||||
static inline int rcu_blocking_is_gp(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
might_sleep(); /* Check for RCU read-side critical section. */
|
||||
return num_online_cpus() <= 1;
|
||||
preempt_disable();
|
||||
ret = num_online_cpus() <= 1;
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2117,9 +2131,9 @@ void synchronize_sched_expedited(void)
|
||||
put_online_cpus();
|
||||
|
||||
/* No joy, try again later. Or just synchronize_sched(). */
|
||||
if (trycount++ < 10)
|
||||
if (trycount++ < 10) {
|
||||
udelay(trycount * num_online_cpus());
|
||||
else {
|
||||
} else {
|
||||
synchronize_sched();
|
||||
return;
|
||||
}
|
||||
@@ -2240,9 +2254,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
*/
|
||||
static int rcu_pending(int cpu)
|
||||
{
|
||||
return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
|
||||
__rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) ||
|
||||
rcu_preempt_pending(cpu);
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp)
|
||||
if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2252,20 +2269,41 @@ static int rcu_pending(int cpu)
|
||||
*/
|
||||
static int rcu_cpu_has_callbacks(int cpu)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
|
||||
/* RCU callbacks either ready or pending? */
|
||||
return per_cpu(rcu_sched_data, cpu).nxtlist ||
|
||||
per_cpu(rcu_bh_data, cpu).nxtlist ||
|
||||
rcu_preempt_cpu_has_callbacks(cpu);
|
||||
for_each_rcu_flavor(rsp)
|
||||
if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for _rcu_barrier() tracing. If tracing is disabled,
|
||||
* the compiler is expected to optimize this away.
|
||||
*/
|
||||
static void _rcu_barrier_trace(struct rcu_state *rsp, char *s,
|
||||
int cpu, unsigned long done)
|
||||
{
|
||||
trace_rcu_barrier(rsp->name, s, cpu,
|
||||
atomic_read(&rsp->barrier_cpu_count), done);
|
||||
}
|
||||
|
||||
/*
|
||||
* RCU callback function for _rcu_barrier(). If we are last, wake
|
||||
* up the task executing _rcu_barrier().
|
||||
*/
|
||||
static void rcu_barrier_callback(struct rcu_head *notused)
|
||||
static void rcu_barrier_callback(struct rcu_head *rhp)
|
||||
{
|
||||
if (atomic_dec_and_test(&rcu_barrier_cpu_count))
|
||||
complete(&rcu_barrier_completion);
|
||||
struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
|
||||
struct rcu_state *rsp = rdp->rsp;
|
||||
|
||||
if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
|
||||
_rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
|
||||
complete(&rsp->barrier_completion);
|
||||
} else {
|
||||
_rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2273,35 +2311,63 @@ static void rcu_barrier_callback(struct rcu_head *notused)
|
||||
*/
|
||||
static void rcu_barrier_func(void *type)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
|
||||
void (*call_rcu_func)(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *head));
|
||||
struct rcu_state *rsp = type;
|
||||
struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
|
||||
|
||||
atomic_inc(&rcu_barrier_cpu_count);
|
||||
call_rcu_func = type;
|
||||
call_rcu_func(head, rcu_barrier_callback);
|
||||
_rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
|
||||
atomic_inc(&rsp->barrier_cpu_count);
|
||||
rsp->call(&rdp->barrier_head, rcu_barrier_callback);
|
||||
}
|
||||
|
||||
/*
|
||||
* Orchestrate the specified type of RCU barrier, waiting for all
|
||||
* RCU callbacks of the specified type to complete.
|
||||
*/
|
||||
static void _rcu_barrier(struct rcu_state *rsp,
|
||||
void (*call_rcu_func)(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *head)))
|
||||
static void _rcu_barrier(struct rcu_state *rsp)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_head rh;
|
||||
struct rcu_data rd;
|
||||
unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
|
||||
unsigned long snap_done;
|
||||
|
||||
init_rcu_head_on_stack(&rh);
|
||||
init_rcu_head_on_stack(&rd.barrier_head);
|
||||
_rcu_barrier_trace(rsp, "Begin", -1, snap);
|
||||
|
||||
/* Take mutex to serialize concurrent rcu_barrier() requests. */
|
||||
mutex_lock(&rcu_barrier_mutex);
|
||||
mutex_lock(&rsp->barrier_mutex);
|
||||
|
||||
smp_mb(); /* Prevent any prior operations from leaking in. */
|
||||
/*
|
||||
* Ensure that all prior references, including to ->n_barrier_done,
|
||||
* are ordered before the _rcu_barrier() machinery.
|
||||
*/
|
||||
smp_mb(); /* See above block comment. */
|
||||
|
||||
/*
|
||||
* Recheck ->n_barrier_done to see if others did our work for us.
|
||||
* This means checking ->n_barrier_done for an even-to-odd-to-even
|
||||
* transition. The "if" expression below therefore rounds the old
|
||||
* value up to the next even number and adds two before comparing.
|
||||
*/
|
||||
snap_done = ACCESS_ONCE(rsp->n_barrier_done);
|
||||
_rcu_barrier_trace(rsp, "Check", -1, snap_done);
|
||||
if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) {
|
||||
_rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
|
||||
smp_mb(); /* caller's subsequent code after above check. */
|
||||
mutex_unlock(&rsp->barrier_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment ->n_barrier_done to avoid duplicate work. Use
|
||||
* ACCESS_ONCE() to prevent the compiler from speculating
|
||||
* the increment to precede the early-exit check.
|
||||
*/
|
||||
ACCESS_ONCE(rsp->n_barrier_done)++;
|
||||
WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
|
||||
_rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
|
||||
smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
|
||||
|
||||
/*
|
||||
* Initialize the count to one rather than to zero in order to
|
||||
@@ -2320,8 +2386,8 @@ static void _rcu_barrier(struct rcu_state *rsp,
|
||||
* 6. Both rcu_barrier_callback() callbacks are invoked, awakening
|
||||
* us -- but before CPU 1's orphaned callbacks are invoked!!!
|
||||
*/
|
||||
init_completion(&rcu_barrier_completion);
|
||||
atomic_set(&rcu_barrier_cpu_count, 1);
|
||||
init_completion(&rsp->barrier_completion);
|
||||
atomic_set(&rsp->barrier_cpu_count, 1);
|
||||
raw_spin_lock_irqsave(&rsp->onofflock, flags);
|
||||
rsp->rcu_barrier_in_progress = current;
|
||||
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
|
||||
@@ -2337,14 +2403,19 @@ static void _rcu_barrier(struct rcu_state *rsp,
|
||||
preempt_disable();
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
if (cpu_is_offline(cpu)) {
|
||||
_rcu_barrier_trace(rsp, "Offline", cpu,
|
||||
rsp->n_barrier_done);
|
||||
preempt_enable();
|
||||
while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen))
|
||||
schedule_timeout_interruptible(1);
|
||||
} else if (ACCESS_ONCE(rdp->qlen)) {
|
||||
smp_call_function_single(cpu, rcu_barrier_func,
|
||||
(void *)call_rcu_func, 1);
|
||||
_rcu_barrier_trace(rsp, "OnlineQ", cpu,
|
||||
rsp->n_barrier_done);
|
||||
smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
|
||||
preempt_enable();
|
||||
} else {
|
||||
_rcu_barrier_trace(rsp, "OnlineNQ", cpu,
|
||||
rsp->n_barrier_done);
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
@@ -2361,24 +2432,32 @@ static void _rcu_barrier(struct rcu_state *rsp,
|
||||
rcu_adopt_orphan_cbs(rsp);
|
||||
rsp->rcu_barrier_in_progress = NULL;
|
||||
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
|
||||
atomic_inc(&rcu_barrier_cpu_count);
|
||||
atomic_inc(&rsp->barrier_cpu_count);
|
||||
smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */
|
||||
call_rcu_func(&rh, rcu_barrier_callback);
|
||||
rd.rsp = rsp;
|
||||
rsp->call(&rd.barrier_head, rcu_barrier_callback);
|
||||
|
||||
/*
|
||||
* Now that we have an rcu_barrier_callback() callback on each
|
||||
* CPU, and thus each counted, remove the initial count.
|
||||
*/
|
||||
if (atomic_dec_and_test(&rcu_barrier_cpu_count))
|
||||
complete(&rcu_barrier_completion);
|
||||
if (atomic_dec_and_test(&rsp->barrier_cpu_count))
|
||||
complete(&rsp->barrier_completion);
|
||||
|
||||
/* Increment ->n_barrier_done to prevent duplicate work. */
|
||||
smp_mb(); /* Keep increment after above mechanism. */
|
||||
ACCESS_ONCE(rsp->n_barrier_done)++;
|
||||
WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
|
||||
_rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
|
||||
smp_mb(); /* Keep increment before caller's subsequent code. */
|
||||
|
||||
/* Wait for all rcu_barrier_callback() callbacks to be invoked. */
|
||||
wait_for_completion(&rcu_barrier_completion);
|
||||
wait_for_completion(&rsp->barrier_completion);
|
||||
|
||||
/* Other rcu_barrier() invocations can now safely proceed. */
|
||||
mutex_unlock(&rcu_barrier_mutex);
|
||||
mutex_unlock(&rsp->barrier_mutex);
|
||||
|
||||
destroy_rcu_head_on_stack(&rh);
|
||||
destroy_rcu_head_on_stack(&rd.barrier_head);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2386,7 +2465,7 @@ static void _rcu_barrier(struct rcu_state *rsp,
|
||||
*/
|
||||
void rcu_barrier_bh(void)
|
||||
{
|
||||
_rcu_barrier(&rcu_bh_state, call_rcu_bh);
|
||||
_rcu_barrier(&rcu_bh_state);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier_bh);
|
||||
|
||||
@@ -2395,7 +2474,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh);
|
||||
*/
|
||||
void rcu_barrier_sched(void)
|
||||
{
|
||||
_rcu_barrier(&rcu_sched_state, call_rcu_sched);
|
||||
_rcu_barrier(&rcu_sched_state);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier_sched);
|
||||
|
||||
@@ -2406,18 +2485,15 @@ static void __init
|
||||
rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
|
||||
/* Set up local state, ensuring consistent view of global state. */
|
||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
|
||||
rdp->nxtlist = NULL;
|
||||
for (i = 0; i < RCU_NEXT_SIZE; i++)
|
||||
rdp->nxttail[i] = &rdp->nxtlist;
|
||||
init_callback_list(rdp);
|
||||
rdp->qlen_lazy = 0;
|
||||
rdp->qlen = 0;
|
||||
ACCESS_ONCE(rdp->qlen) = 0;
|
||||
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
|
||||
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
|
||||
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
|
||||
@@ -2491,9 +2567,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
|
||||
|
||||
static void __cpuinit rcu_prepare_cpu(int cpu)
|
||||
{
|
||||
rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
|
||||
rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
|
||||
rcu_preempt_init_percpu_data(cpu);
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp)
|
||||
rcu_init_percpu_data(cpu, rsp,
|
||||
strcmp(rsp->name, "rcu_preempt") == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2505,6 +2583,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
long cpu = (long)hcpu;
|
||||
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
trace_rcu_utilization("Start CPU hotplug");
|
||||
switch (action) {
|
||||
@@ -2529,18 +2608,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
* touch any data without introducing corruption. We send the
|
||||
* dying CPU's callbacks to an arbitrarily chosen online CPU.
|
||||
*/
|
||||
rcu_cleanup_dying_cpu(&rcu_bh_state);
|
||||
rcu_cleanup_dying_cpu(&rcu_sched_state);
|
||||
rcu_preempt_cleanup_dying_cpu();
|
||||
for_each_rcu_flavor(rsp)
|
||||
rcu_cleanup_dying_cpu(rsp);
|
||||
rcu_cleanup_after_idle(cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
rcu_cleanup_dead_cpu(cpu, &rcu_bh_state);
|
||||
rcu_cleanup_dead_cpu(cpu, &rcu_sched_state);
|
||||
rcu_preempt_cleanup_dead_cpu(cpu);
|
||||
for_each_rcu_flavor(rsp)
|
||||
rcu_cleanup_dead_cpu(cpu, rsp);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -2573,9 +2650,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = NUM_RCU_LVLS - 1; i > 0; i--)
|
||||
for (i = rcu_num_lvls - 1; i > 0; i--)
|
||||
rsp->levelspread[i] = CONFIG_RCU_FANOUT;
|
||||
rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF;
|
||||
rsp->levelspread[0] = rcu_fanout_leaf;
|
||||
}
|
||||
#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
|
||||
static void __init rcu_init_levelspread(struct rcu_state *rsp)
|
||||
@@ -2585,7 +2662,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
|
||||
int i;
|
||||
|
||||
cprv = NR_CPUS;
|
||||
for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
|
||||
for (i = rcu_num_lvls - 1; i >= 0; i--) {
|
||||
ccur = rsp->levelcnt[i];
|
||||
rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
|
||||
cprv = ccur;
|
||||
@@ -2612,13 +2689,15 @@ static void __init rcu_init_one(struct rcu_state *rsp,
|
||||
|
||||
/* Initialize the level-tracking arrays. */
|
||||
|
||||
for (i = 1; i < NUM_RCU_LVLS; i++)
|
||||
for (i = 0; i < rcu_num_lvls; i++)
|
||||
rsp->levelcnt[i] = num_rcu_lvl[i];
|
||||
for (i = 1; i < rcu_num_lvls; i++)
|
||||
rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
|
||||
rcu_init_levelspread(rsp);
|
||||
|
||||
/* Initialize the elements themselves, starting from the leaves. */
|
||||
|
||||
for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
|
||||
for (i = rcu_num_lvls - 1; i >= 0; i--) {
|
||||
cpustride *= rsp->levelspread[i];
|
||||
rnp = rsp->level[i];
|
||||
for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
|
||||
@@ -2648,13 +2727,74 @@ static void __init rcu_init_one(struct rcu_state *rsp,
|
||||
}
|
||||
|
||||
rsp->rda = rda;
|
||||
rnp = rsp->level[NUM_RCU_LVLS - 1];
|
||||
rnp = rsp->level[rcu_num_lvls - 1];
|
||||
for_each_possible_cpu(i) {
|
||||
while (i > rnp->grphi)
|
||||
rnp++;
|
||||
per_cpu_ptr(rsp->rda, i)->mynode = rnp;
|
||||
rcu_boot_init_percpu_data(i, rsp);
|
||||
}
|
||||
list_add(&rsp->flavors, &rcu_struct_flavors);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the rcu_node tree geometry from kernel parameters. This cannot
|
||||
* replace the definitions in rcutree.h because those are needed to size
|
||||
* the ->node array in the rcu_state structure.
|
||||
*/
|
||||
static void __init rcu_init_geometry(void)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
int n = nr_cpu_ids;
|
||||
int rcu_capacity[MAX_RCU_LVLS + 1];
|
||||
|
||||
/* If the compile-time values are accurate, just leave. */
|
||||
if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Compute number of nodes that can be handled an rcu_node tree
|
||||
* with the given number of levels. Setting rcu_capacity[0] makes
|
||||
* some of the arithmetic easier.
|
||||
*/
|
||||
rcu_capacity[0] = 1;
|
||||
rcu_capacity[1] = rcu_fanout_leaf;
|
||||
for (i = 2; i <= MAX_RCU_LVLS; i++)
|
||||
rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
|
||||
|
||||
/*
|
||||
* The boot-time rcu_fanout_leaf parameter is only permitted
|
||||
* to increase the leaf-level fanout, not decrease it. Of course,
|
||||
* the leaf-level fanout cannot exceed the number of bits in
|
||||
* the rcu_node masks. Finally, the tree must be able to accommodate
|
||||
* the configured number of CPUs. Complain and fall back to the
|
||||
* compile-time values if these limits are exceeded.
|
||||
*/
|
||||
if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
|
||||
rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
|
||||
n > rcu_capacity[MAX_RCU_LVLS]) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate the number of rcu_nodes at each level of the tree. */
|
||||
for (i = 1; i <= MAX_RCU_LVLS; i++)
|
||||
if (n <= rcu_capacity[i]) {
|
||||
for (j = 0; j <= i; j++)
|
||||
num_rcu_lvl[j] =
|
||||
DIV_ROUND_UP(n, rcu_capacity[i - j]);
|
||||
rcu_num_lvls = i;
|
||||
for (j = i + 1; j <= MAX_RCU_LVLS; j++)
|
||||
num_rcu_lvl[j] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Calculate the total number of rcu_node structures. */
|
||||
rcu_num_nodes = 0;
|
||||
for (i = 0; i <= MAX_RCU_LVLS; i++)
|
||||
rcu_num_nodes += num_rcu_lvl[i];
|
||||
rcu_num_nodes -= n;
|
||||
}
|
||||
|
||||
void __init rcu_init(void)
|
||||
@@ -2662,6 +2802,7 @@ void __init rcu_init(void)
|
||||
int cpu;
|
||||
|
||||
rcu_bootup_announce();
|
||||
rcu_init_geometry();
|
||||
rcu_init_one(&rcu_sched_state, &rcu_sched_data);
|
||||
rcu_init_one(&rcu_bh_state, &rcu_bh_data);
|
||||
__rcu_init_preempt();
|
||||
|
@@ -42,28 +42,28 @@
|
||||
#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
|
||||
|
||||
#if NR_CPUS <= RCU_FANOUT_1
|
||||
# define NUM_RCU_LVLS 1
|
||||
# define RCU_NUM_LVLS 1
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 (NR_CPUS)
|
||||
# define NUM_RCU_LVL_2 0
|
||||
# define NUM_RCU_LVL_3 0
|
||||
# define NUM_RCU_LVL_4 0
|
||||
#elif NR_CPUS <= RCU_FANOUT_2
|
||||
# define NUM_RCU_LVLS 2
|
||||
# define RCU_NUM_LVLS 2
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
|
||||
# define NUM_RCU_LVL_2 (NR_CPUS)
|
||||
# define NUM_RCU_LVL_3 0
|
||||
# define NUM_RCU_LVL_4 0
|
||||
#elif NR_CPUS <= RCU_FANOUT_3
|
||||
# define NUM_RCU_LVLS 3
|
||||
# define RCU_NUM_LVLS 3
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
|
||||
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
|
||||
# define NUM_RCU_LVL_3 (NR_CPUS)
|
||||
# define NUM_RCU_LVL_4 0
|
||||
#elif NR_CPUS <= RCU_FANOUT_4
|
||||
# define NUM_RCU_LVLS 4
|
||||
# define RCU_NUM_LVLS 4
|
||||
# define NUM_RCU_LVL_0 1
|
||||
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
|
||||
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
|
||||
@@ -76,6 +76,9 @@
|
||||
#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
|
||||
#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
|
||||
|
||||
extern int rcu_num_lvls;
|
||||
extern int rcu_num_nodes;
|
||||
|
||||
/*
|
||||
* Dynticks per-CPU state.
|
||||
*/
|
||||
@@ -97,6 +100,7 @@ struct rcu_dynticks {
|
||||
/* # times non-lazy CBs posted to CPU. */
|
||||
unsigned long nonlazy_posted_snap;
|
||||
/* idle-period nonlazy_posted snapshot. */
|
||||
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
|
||||
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
||||
};
|
||||
|
||||
@@ -206,7 +210,7 @@ struct rcu_node {
|
||||
*/
|
||||
#define rcu_for_each_node_breadth_first(rsp, rnp) \
|
||||
for ((rnp) = &(rsp)->node[0]; \
|
||||
(rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
|
||||
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
|
||||
|
||||
/*
|
||||
* Do a breadth-first scan of the non-leaf rcu_node structures for the
|
||||
@@ -215,7 +219,7 @@ struct rcu_node {
|
||||
*/
|
||||
#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
|
||||
for ((rnp) = &(rsp)->node[0]; \
|
||||
(rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
|
||||
(rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
|
||||
|
||||
/*
|
||||
* Scan the leaves of the rcu_node hierarchy for the specified rcu_state
|
||||
@@ -224,8 +228,8 @@ struct rcu_node {
|
||||
* It is still a leaf node, even if it is also the root node.
|
||||
*/
|
||||
#define rcu_for_each_leaf_node(rsp, rnp) \
|
||||
for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
|
||||
(rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
|
||||
for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
|
||||
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
|
||||
|
||||
/* Index values for nxttail array in struct rcu_data. */
|
||||
#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
|
||||
@@ -311,6 +315,9 @@ struct rcu_data {
|
||||
unsigned long n_rp_need_fqs;
|
||||
unsigned long n_rp_need_nothing;
|
||||
|
||||
/* 6) _rcu_barrier() callback. */
|
||||
struct rcu_head barrier_head;
|
||||
|
||||
int cpu;
|
||||
struct rcu_state *rsp;
|
||||
};
|
||||
@@ -357,10 +364,12 @@ do { \
|
||||
*/
|
||||
struct rcu_state {
|
||||
struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */
|
||||
struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */
|
||||
struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */
|
||||
u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
|
||||
u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */
|
||||
u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
|
||||
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
|
||||
void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
|
||||
void (*func)(struct rcu_head *head));
|
||||
|
||||
/* The following fields are guarded by the root rcu_node's lock. */
|
||||
|
||||
@@ -392,6 +401,11 @@ struct rcu_state {
|
||||
struct task_struct *rcu_barrier_in_progress;
|
||||
/* Task doing rcu_barrier(), */
|
||||
/* or NULL if no barrier. */
|
||||
struct mutex barrier_mutex; /* Guards barrier fields. */
|
||||
atomic_t barrier_cpu_count; /* # CPUs waiting on. */
|
||||
struct completion barrier_completion; /* Wake at barrier end. */
|
||||
unsigned long n_barrier_done; /* ++ at start and end of */
|
||||
/* _rcu_barrier(). */
|
||||
raw_spinlock_t fqslock; /* Only one task forcing */
|
||||
/* quiescent states. */
|
||||
unsigned long jiffies_force_qs; /* Time at which to invoke */
|
||||
@@ -409,8 +423,13 @@ struct rcu_state {
|
||||
unsigned long gp_max; /* Maximum GP duration in */
|
||||
/* jiffies. */
|
||||
char *name; /* Name of structure. */
|
||||
struct list_head flavors; /* List of RCU flavors. */
|
||||
};
|
||||
|
||||
extern struct list_head rcu_struct_flavors;
|
||||
#define for_each_rcu_flavor(rsp) \
|
||||
list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
|
||||
|
||||
/* Return values for rcu_preempt_offline_tasks(). */
|
||||
|
||||
#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
|
||||
@@ -444,6 +463,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
|
||||
/* Forward declarations for rcutree_plugin.h */
|
||||
static void rcu_bootup_announce(void);
|
||||
long rcu_batches_completed(void);
|
||||
static void rcu_preempt_note_context_switch(int cpu);
|
||||
static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
|
||||
@@ -452,25 +472,18 @@ static void rcu_stop_cpu_kthread(int cpu);
|
||||
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
|
||||
static void rcu_print_detail_task_stall(struct rcu_state *rsp);
|
||||
static int rcu_print_task_stall(struct rcu_node *rnp);
|
||||
static void rcu_preempt_stall_reset(void);
|
||||
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
|
||||
struct rcu_node *rnp,
|
||||
struct rcu_data *rdp);
|
||||
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
|
||||
static void rcu_preempt_cleanup_dead_cpu(int cpu);
|
||||
static void rcu_preempt_check_callbacks(int cpu);
|
||||
static void rcu_preempt_process_callbacks(void);
|
||||
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
|
||||
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
|
||||
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
bool wake);
|
||||
#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
|
||||
static int rcu_preempt_pending(int cpu);
|
||||
static int rcu_preempt_cpu_has_callbacks(int cpu);
|
||||
static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
|
||||
static void rcu_preempt_cleanup_dying_cpu(void);
|
||||
static void __init __rcu_init_preempt(void);
|
||||
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
|
||||
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
|
||||
|
@@ -68,17 +68,21 @@ static void __init rcu_bootup_announce_oddness(void)
|
||||
printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
|
||||
#endif
|
||||
#if NUM_RCU_LVL_4 != 0
|
||||
printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
|
||||
printk(KERN_INFO "\tFour-level hierarchy is enabled.\n");
|
||||
#endif
|
||||
if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
|
||||
printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
|
||||
if (nr_cpu_ids != NR_CPUS)
|
||||
printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
|
||||
struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt);
|
||||
struct rcu_state rcu_preempt_state =
|
||||
RCU_STATE_INITIALIZER(rcu_preempt, call_rcu);
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
|
||||
static struct rcu_state *rcu_state = &rcu_preempt_state;
|
||||
|
||||
static void rcu_read_unlock_special(struct task_struct *t);
|
||||
static int rcu_preempted_readers_exp(struct rcu_node *rnp);
|
||||
|
||||
/*
|
||||
@@ -153,7 +157,7 @@ static void rcu_preempt_qs(int cpu)
|
||||
*
|
||||
* Caller must disable preemption.
|
||||
*/
|
||||
void rcu_preempt_note_context_switch(void)
|
||||
static void rcu_preempt_note_context_switch(int cpu)
|
||||
{
|
||||
struct task_struct *t = current;
|
||||
unsigned long flags;
|
||||
@@ -164,7 +168,7 @@ void rcu_preempt_note_context_switch(void)
|
||||
(t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
|
||||
|
||||
/* Possibly blocking in an RCU read-side critical section. */
|
||||
rdp = __this_cpu_ptr(rcu_preempt_state.rda);
|
||||
rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
|
||||
rnp = rdp->mynode;
|
||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
|
||||
@@ -228,22 +232,10 @@ void rcu_preempt_note_context_switch(void)
|
||||
* means that we continue to block the current grace period.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
rcu_preempt_qs(smp_processor_id());
|
||||
rcu_preempt_qs(cpu);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tree-preemptible RCU implementation for rcu_read_lock().
|
||||
* Just increment ->rcu_read_lock_nesting, shared state will be updated
|
||||
* if we block.
|
||||
*/
|
||||
void __rcu_read_lock(void)
|
||||
{
|
||||
current->rcu_read_lock_nesting++;
|
||||
barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rcu_read_lock);
|
||||
|
||||
/*
|
||||
* Check for preempted RCU readers blocking the current grace period
|
||||
* for the specified rcu_node structure. If the caller needs a reliable
|
||||
@@ -310,7 +302,7 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
|
||||
* notify RCU core processing or task having blocked during the RCU
|
||||
* read-side critical section.
|
||||
*/
|
||||
static noinline void rcu_read_unlock_special(struct task_struct *t)
|
||||
void rcu_read_unlock_special(struct task_struct *t)
|
||||
{
|
||||
int empty;
|
||||
int empty_exp;
|
||||
@@ -398,8 +390,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
|
||||
rnp->grphi,
|
||||
!!rnp->gp_tasks);
|
||||
rcu_report_unblock_qs_rnp(rnp, flags);
|
||||
} else
|
||||
} else {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
/* Unboost if we were boosted. */
|
||||
@@ -418,38 +411,6 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Tree-preemptible RCU implementation for rcu_read_unlock().
|
||||
* Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
|
||||
* rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
|
||||
* invoke rcu_read_unlock_special() to clean up after a context switch
|
||||
* in an RCU read-side critical section and other special cases.
|
||||
*/
|
||||
void __rcu_read_unlock(void)
|
||||
{
|
||||
struct task_struct *t = current;
|
||||
|
||||
if (t->rcu_read_lock_nesting != 1)
|
||||
--t->rcu_read_lock_nesting;
|
||||
else {
|
||||
barrier(); /* critical section before exit code. */
|
||||
t->rcu_read_lock_nesting = INT_MIN;
|
||||
barrier(); /* assign before ->rcu_read_unlock_special load */
|
||||
if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
|
||||
rcu_read_unlock_special(t);
|
||||
barrier(); /* ->rcu_read_unlock_special load before assign */
|
||||
t->rcu_read_lock_nesting = 0;
|
||||
}
|
||||
#ifdef CONFIG_PROVE_LOCKING
|
||||
{
|
||||
int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
|
||||
|
||||
WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
|
||||
}
|
||||
#endif /* #ifdef CONFIG_PROVE_LOCKING */
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
|
||||
|
||||
#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
|
||||
|
||||
/*
|
||||
@@ -539,16 +500,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
|
||||
return ndetected;
|
||||
}
|
||||
|
||||
/*
|
||||
* Suppress preemptible RCU's CPU stall warnings by pushing the
|
||||
* time of the next stall-warning message comfortably far into the
|
||||
* future.
|
||||
*/
|
||||
static void rcu_preempt_stall_reset(void)
|
||||
{
|
||||
rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that the list of blocked tasks for the newly completed grace
|
||||
* period is in fact empty. It is a serious bug to complete a grace
|
||||
@@ -649,14 +600,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
|
||||
|
||||
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
|
||||
|
||||
/*
|
||||
* Do CPU-offline processing for preemptible RCU.
|
||||
*/
|
||||
static void rcu_preempt_cleanup_dead_cpu(int cpu)
|
||||
{
|
||||
rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for a quiescent state from the current CPU. When a task blocks,
|
||||
* the task is recorded in the corresponding CPU's rcu_node structure,
|
||||
@@ -677,15 +620,6 @@ static void rcu_preempt_check_callbacks(int cpu)
|
||||
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process callbacks for preemptible RCU.
|
||||
*/
|
||||
static void rcu_preempt_process_callbacks(void)
|
||||
{
|
||||
__rcu_process_callbacks(&rcu_preempt_state,
|
||||
&__get_cpu_var(rcu_preempt_data));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
|
||||
static void rcu_preempt_do_callbacks(void)
|
||||
@@ -824,9 +758,9 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
int must_wait = 0;
|
||||
|
||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||
if (list_empty(&rnp->blkd_tasks))
|
||||
if (list_empty(&rnp->blkd_tasks)) {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
else {
|
||||
} else {
|
||||
rnp->exp_tasks = rnp->blkd_tasks.next;
|
||||
rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
|
||||
must_wait = 1;
|
||||
@@ -870,9 +804,9 @@ void synchronize_rcu_expedited(void)
|
||||
* expedited grace period for us, just leave.
|
||||
*/
|
||||
while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
|
||||
if (trycount++ < 10)
|
||||
if (trycount++ < 10) {
|
||||
udelay(trycount * num_online_cpus());
|
||||
else {
|
||||
} else {
|
||||
synchronize_rcu();
|
||||
return;
|
||||
}
|
||||
@@ -917,50 +851,15 @@ mb_ret:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
|
||||
|
||||
/*
|
||||
* Check to see if there is any immediate preemptible-RCU-related work
|
||||
* to be done.
|
||||
*/
|
||||
static int rcu_preempt_pending(int cpu)
|
||||
{
|
||||
return __rcu_pending(&rcu_preempt_state,
|
||||
&per_cpu(rcu_preempt_data, cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Does preemptible RCU have callbacks on this CPU?
|
||||
*/
|
||||
static int rcu_preempt_cpu_has_callbacks(int cpu)
|
||||
{
|
||||
return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
|
||||
*/
|
||||
void rcu_barrier(void)
|
||||
{
|
||||
_rcu_barrier(&rcu_preempt_state, call_rcu);
|
||||
_rcu_barrier(&rcu_preempt_state);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier);
|
||||
|
||||
/*
|
||||
* Initialize preemptible RCU's per-CPU data.
|
||||
*/
|
||||
static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
|
||||
{
|
||||
rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Move preemptible RCU's callbacks from dying CPU to other online CPU
|
||||
* and record a quiescent state.
|
||||
*/
|
||||
static void rcu_preempt_cleanup_dying_cpu(void)
|
||||
{
|
||||
rcu_cleanup_dying_cpu(&rcu_preempt_state);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize preemptible RCU's state structures.
|
||||
*/
|
||||
@@ -1001,6 +900,14 @@ void rcu_force_quiescent_state(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, we never have to check for
|
||||
* CPUs being in quiescent states.
|
||||
*/
|
||||
static void rcu_preempt_note_context_switch(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, there are never any preempted
|
||||
* RCU readers.
|
||||
@@ -1037,14 +944,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, there is no need to suppress
|
||||
* its CPU stall warnings.
|
||||
*/
|
||||
static void rcu_preempt_stall_reset(void)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Because there is no preemptible RCU, there can be no readers blocked,
|
||||
* so there is no need to check for blocked tasks. So check only for
|
||||
@@ -1072,14 +971,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
|
||||
|
||||
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, it never needs CPU-offline
|
||||
* processing.
|
||||
*/
|
||||
static void rcu_preempt_cleanup_dead_cpu(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, it never has any callbacks
|
||||
* to check.
|
||||
@@ -1088,14 +979,6 @@ static void rcu_preempt_check_callbacks(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, it never has any callbacks
|
||||
* to process.
|
||||
*/
|
||||
static void rcu_preempt_process_callbacks(void)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Queue an RCU callback for lazy invocation after a grace period.
|
||||
* This will likely be later named something like "call_rcu_lazy()",
|
||||
@@ -1136,22 +1019,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
|
||||
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, it never has any work to do.
|
||||
*/
|
||||
static int rcu_preempt_pending(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, it never has callbacks
|
||||
*/
|
||||
static int rcu_preempt_cpu_has_callbacks(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, rcu_barrier() is just
|
||||
* another name for rcu_barrier_sched().
|
||||
@@ -1162,21 +1029,6 @@ void rcu_barrier(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier);
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, there is no per-CPU
|
||||
* data to initialize.
|
||||
*/
|
||||
static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Because there is no preemptible RCU, there is no cleanup to do.
|
||||
*/
|
||||
static void rcu_preempt_cleanup_dying_cpu(void)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Because preemptible RCU does not exist, it need not be initialized.
|
||||
*/
|
||||
@@ -1960,9 +1812,11 @@ static void rcu_idle_count_callbacks_posted(void)
|
||||
*/
|
||||
#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
|
||||
#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
|
||||
#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */
|
||||
#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
|
||||
#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
|
||||
|
||||
extern int tick_nohz_enabled;
|
||||
|
||||
/*
|
||||
* Does the specified flavor of RCU have non-lazy callbacks pending on
|
||||
* the specified CPU? Both RCU flavor and CPU are specified by the
|
||||
@@ -2039,10 +1893,13 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
|
||||
return 1;
|
||||
}
|
||||
/* Set up for the possibility that RCU will post a timer. */
|
||||
if (rcu_cpu_has_nonlazy_callbacks(cpu))
|
||||
*delta_jiffies = RCU_IDLE_GP_DELAY;
|
||||
else
|
||||
*delta_jiffies = RCU_IDLE_LAZY_GP_DELAY;
|
||||
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
|
||||
*delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies,
|
||||
RCU_IDLE_GP_DELAY) - jiffies;
|
||||
} else {
|
||||
*delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY;
|
||||
*delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2101,6 +1958,7 @@ static void rcu_cleanup_after_idle(int cpu)
|
||||
|
||||
del_timer(&rdtp->idle_gp_timer);
|
||||
trace_rcu_prep_idle("Cleanup after idle");
|
||||
rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2126,6 +1984,18 @@ static void rcu_prepare_for_idle(int cpu)
|
||||
{
|
||||
struct timer_list *tp;
|
||||
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
|
||||
int tne;
|
||||
|
||||
/* Handle nohz enablement switches conservatively. */
|
||||
tne = ACCESS_ONCE(tick_nohz_enabled);
|
||||
if (tne != rdtp->tick_nohz_enabled_snap) {
|
||||
if (rcu_cpu_has_callbacks(cpu))
|
||||
invoke_rcu_core(); /* force nohz to see update. */
|
||||
rdtp->tick_nohz_enabled_snap = tne;
|
||||
return;
|
||||
}
|
||||
if (!tne)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If this is an idle re-entry, for example, due to use of
|
||||
@@ -2179,10 +2049,11 @@ static void rcu_prepare_for_idle(int cpu)
|
||||
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
|
||||
trace_rcu_prep_idle("Dyntick with callbacks");
|
||||
rdtp->idle_gp_timer_expires =
|
||||
jiffies + RCU_IDLE_GP_DELAY;
|
||||
round_up(jiffies + RCU_IDLE_GP_DELAY,
|
||||
RCU_IDLE_GP_DELAY);
|
||||
} else {
|
||||
rdtp->idle_gp_timer_expires =
|
||||
jiffies + RCU_IDLE_LAZY_GP_DELAY;
|
||||
round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
|
||||
trace_rcu_prep_idle("Dyntick with lazy callbacks");
|
||||
}
|
||||
tp = &rdtp->idle_gp_timer;
|
||||
@@ -2223,8 +2094,9 @@ static void rcu_prepare_for_idle(int cpu)
|
||||
if (rcu_cpu_has_callbacks(cpu)) {
|
||||
trace_rcu_prep_idle("More callbacks");
|
||||
invoke_rcu_core();
|
||||
} else
|
||||
} else {
|
||||
trace_rcu_prep_idle("Callbacks drained");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2261,6 +2133,7 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
|
||||
|
||||
static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
|
||||
{
|
||||
*cp = '\0';
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
|
||||
|
@@ -46,6 +46,31 @@
|
||||
#define RCU_TREE_NONCORE
|
||||
#include "rcutree.h"
|
||||
|
||||
static int show_rcubarrier(struct seq_file *m, void *unused)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp)
|
||||
seq_printf(m, "%s: %c bcc: %d nbd: %lu\n",
|
||||
rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.',
|
||||
atomic_read(&rsp->barrier_cpu_count),
|
||||
rsp->n_barrier_done);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rcubarrier_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, show_rcubarrier, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations rcubarrier_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = rcubarrier_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
|
||||
static char convert_kthread_status(unsigned int kthread_status)
|
||||
@@ -95,24 +120,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
|
||||
rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
|
||||
}
|
||||
|
||||
#define PRINT_RCU_DATA(name, func, m) \
|
||||
do { \
|
||||
int _p_r_d_i; \
|
||||
\
|
||||
for_each_possible_cpu(_p_r_d_i) \
|
||||
func(m, &per_cpu(name, _p_r_d_i)); \
|
||||
} while (0)
|
||||
|
||||
static int show_rcudata(struct seq_file *m, void *unused)
|
||||
{
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
seq_puts(m, "rcu_preempt:\n");
|
||||
PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m);
|
||||
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
|
||||
seq_puts(m, "rcu_sched:\n");
|
||||
PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m);
|
||||
seq_puts(m, "rcu_bh:\n");
|
||||
PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m);
|
||||
int cpu;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp) {
|
||||
seq_printf(m, "%s:\n", rsp->name);
|
||||
for_each_possible_cpu(cpu)
|
||||
print_one_rcu_data(m, per_cpu_ptr(rsp->rda, cpu));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -166,6 +183,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
|
||||
|
||||
static int show_rcudata_csv(struct seq_file *m, void *unused)
|
||||
{
|
||||
int cpu;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
|
||||
seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
|
||||
seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\"");
|
||||
@@ -173,14 +193,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
|
||||
seq_puts(m, "\"kt\",\"ktl\"");
|
||||
#endif /* #ifdef CONFIG_RCU_BOOST */
|
||||
seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n");
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
seq_puts(m, "\"rcu_preempt:\"\n");
|
||||
PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m);
|
||||
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
|
||||
seq_puts(m, "\"rcu_sched:\"\n");
|
||||
PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m);
|
||||
seq_puts(m, "\"rcu_bh:\"\n");
|
||||
PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m);
|
||||
for_each_rcu_flavor(rsp) {
|
||||
seq_printf(m, "\"%s:\"\n", rsp->name);
|
||||
for_each_possible_cpu(cpu)
|
||||
print_one_rcu_data_csv(m, per_cpu_ptr(rsp->rda, cpu));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -201,8 +218,7 @@ static const struct file_operations rcudata_csv_fops = {
|
||||
|
||||
static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
|
||||
{
|
||||
seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu "
|
||||
"j=%04x bt=%04x\n",
|
||||
seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ",
|
||||
rnp->grplo, rnp->grphi,
|
||||
"T."[list_empty(&rnp->blkd_tasks)],
|
||||
"N."[!rnp->gp_tasks],
|
||||
@@ -210,11 +226,11 @@ static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
|
||||
"B."[!rnp->boost_tasks],
|
||||
convert_kthread_status(rnp->boost_kthread_status),
|
||||
rnp->n_tasks_boosted, rnp->n_exp_boosts,
|
||||
rnp->n_normal_boosts,
|
||||
rnp->n_normal_boosts);
|
||||
seq_printf(m, "j=%04x bt=%04x\n",
|
||||
(int)(jiffies & 0xffff),
|
||||
(int)(rnp->boost_time & 0xffff));
|
||||
seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
|
||||
" balk",
|
||||
seq_printf(m, " balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
|
||||
rnp->n_balk_blkd_tasks,
|
||||
rnp->n_balk_exp_gp_tasks,
|
||||
rnp->n_balk_boost_tasks,
|
||||
@@ -270,15 +286,15 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
|
||||
struct rcu_node *rnp;
|
||||
|
||||
gpnum = rsp->gpnum;
|
||||
seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x "
|
||||
"nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
|
||||
rsp->completed, gpnum, rsp->fqs_state,
|
||||
seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x ",
|
||||
rsp->name, rsp->completed, gpnum, rsp->fqs_state,
|
||||
(long)(rsp->jiffies_force_qs - jiffies),
|
||||
(int)(jiffies & 0xffff),
|
||||
(int)(jiffies & 0xffff));
|
||||
seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
|
||||
rsp->n_force_qs, rsp->n_force_qs_ngp,
|
||||
rsp->n_force_qs - rsp->n_force_qs_ngp,
|
||||
rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen);
|
||||
for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
|
||||
for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
|
||||
if (rnp->level != level) {
|
||||
seq_puts(m, "\n");
|
||||
level = rnp->level;
|
||||
@@ -295,14 +311,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
|
||||
|
||||
static int show_rcuhier(struct seq_file *m, void *unused)
|
||||
{
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
seq_puts(m, "rcu_preempt:\n");
|
||||
print_one_rcu_state(m, &rcu_preempt_state);
|
||||
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
|
||||
seq_puts(m, "rcu_sched:\n");
|
||||
print_one_rcu_state(m, &rcu_sched_state);
|
||||
seq_puts(m, "rcu_bh:\n");
|
||||
print_one_rcu_state(m, &rcu_bh_state);
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp)
|
||||
print_one_rcu_state(m, rsp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -343,11 +355,10 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
|
||||
|
||||
static int show_rcugp(struct seq_file *m, void *unused)
|
||||
{
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
show_one_rcugp(m, &rcu_preempt_state);
|
||||
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
|
||||
show_one_rcugp(m, &rcu_sched_state);
|
||||
show_one_rcugp(m, &rcu_bh_state);
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_rcu_flavor(rsp)
|
||||
show_one_rcugp(m, rsp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -366,44 +377,36 @@ static const struct file_operations rcugp_fops = {
|
||||
|
||||
static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
|
||||
{
|
||||
seq_printf(m, "%3d%cnp=%ld "
|
||||
"qsp=%ld rpq=%ld cbr=%ld cng=%ld "
|
||||
"gpc=%ld gps=%ld nf=%ld nn=%ld\n",
|
||||
seq_printf(m, "%3d%cnp=%ld ",
|
||||
rdp->cpu,
|
||||
cpu_is_offline(rdp->cpu) ? '!' : ' ',
|
||||
rdp->n_rcu_pending,
|
||||
rdp->n_rcu_pending);
|
||||
seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ",
|
||||
rdp->n_rp_qs_pending,
|
||||
rdp->n_rp_report_qs,
|
||||
rdp->n_rp_cb_ready,
|
||||
rdp->n_rp_cpu_needs_gp,
|
||||
rdp->n_rp_cpu_needs_gp);
|
||||
seq_printf(m, "gpc=%ld gps=%ld nf=%ld nn=%ld\n",
|
||||
rdp->n_rp_gp_completed,
|
||||
rdp->n_rp_gp_started,
|
||||
rdp->n_rp_need_fqs,
|
||||
rdp->n_rp_need_nothing);
|
||||
}
|
||||
|
||||
static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
|
||||
static int show_rcu_pending(struct seq_file *m, void *unused)
|
||||
{
|
||||
int cpu;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
if (rdp->beenonline)
|
||||
print_one_rcu_pending(m, rdp);
|
||||
for_each_rcu_flavor(rsp) {
|
||||
seq_printf(m, "%s:\n", rsp->name);
|
||||
for_each_possible_cpu(cpu) {
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
if (rdp->beenonline)
|
||||
print_one_rcu_pending(m, rdp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int show_rcu_pending(struct seq_file *m, void *unused)
|
||||
{
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
seq_puts(m, "rcu_preempt:\n");
|
||||
print_rcu_pendings(m, &rcu_preempt_state);
|
||||
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
|
||||
seq_puts(m, "rcu_sched:\n");
|
||||
print_rcu_pendings(m, &rcu_sched_state);
|
||||
seq_puts(m, "rcu_bh:\n");
|
||||
print_rcu_pendings(m, &rcu_bh_state);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -453,6 +456,11 @@ static int __init rcutree_trace_init(void)
|
||||
if (!rcudir)
|
||||
goto free_out;
|
||||
|
||||
retval = debugfs_create_file("rcubarrier", 0444, rcudir,
|
||||
NULL, &rcubarrier_fops);
|
||||
if (!retval)
|
||||
goto free_out;
|
||||
|
||||
retval = debugfs_create_file("rcudata", 0444, rcudir,
|
||||
NULL, &rcudata_fops);
|
||||
if (!retval)
|
||||
|
@@ -722,14 +722,12 @@ int adjust_resource(struct resource *res, resource_size_t start, resource_size_t
|
||||
|
||||
write_lock(&resource_lock);
|
||||
|
||||
if (!parent)
|
||||
goto skip;
|
||||
|
||||
if ((start < parent->start) || (end > parent->end))
|
||||
goto out;
|
||||
|
||||
for (tmp = res->child; tmp; tmp = tmp->sibling) {
|
||||
if ((tmp->start < start) || (tmp->end > end))
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (res->sibling && (res->sibling->start <= end))
|
||||
goto out;
|
||||
|
||||
@@ -741,6 +739,11 @@ int adjust_resource(struct resource *res, resource_size_t start, resource_size_t
|
||||
goto out;
|
||||
}
|
||||
|
||||
skip:
|
||||
for (tmp = res->child; tmp; tmp = tmp->sibling)
|
||||
if ((tmp->start < start) || (tmp->end > end))
|
||||
goto out;
|
||||
|
||||
res->start = start;
|
||||
res->end = end;
|
||||
result = 0;
|
||||
|
@@ -2081,7 +2081,6 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
||||
#endif
|
||||
|
||||
/* Here we just switch the register state and the stack. */
|
||||
rcu_switch_from(prev);
|
||||
switch_to(prev, next, prev);
|
||||
|
||||
barrier();
|
||||
@@ -2161,11 +2160,73 @@ unsigned long this_cpu_load(void)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Global load-average calculations
|
||||
*
|
||||
* We take a distributed and async approach to calculating the global load-avg
|
||||
* in order to minimize overhead.
|
||||
*
|
||||
* The global load average is an exponentially decaying average of nr_running +
|
||||
* nr_uninterruptible.
|
||||
*
|
||||
* Once every LOAD_FREQ:
|
||||
*
|
||||
* nr_active = 0;
|
||||
* for_each_possible_cpu(cpu)
|
||||
* nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible;
|
||||
*
|
||||
* avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n)
|
||||
*
|
||||
* Due to a number of reasons the above turns in the mess below:
|
||||
*
|
||||
* - for_each_possible_cpu() is prohibitively expensive on machines with
|
||||
* serious number of cpus, therefore we need to take a distributed approach
|
||||
* to calculating nr_active.
|
||||
*
|
||||
* \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0
|
||||
* = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) }
|
||||
*
|
||||
* So assuming nr_active := 0 when we start out -- true per definition, we
|
||||
* can simply take per-cpu deltas and fold those into a global accumulate
|
||||
* to obtain the same result. See calc_load_fold_active().
|
||||
*
|
||||
* Furthermore, in order to avoid synchronizing all per-cpu delta folding
|
||||
* across the machine, we assume 10 ticks is sufficient time for every
|
||||
* cpu to have completed this task.
|
||||
*
|
||||
* This places an upper-bound on the IRQ-off latency of the machine. Then
|
||||
* again, being late doesn't loose the delta, just wrecks the sample.
|
||||
*
|
||||
* - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because
|
||||
* this would add another cross-cpu cacheline miss and atomic operation
|
||||
* to the wakeup path. Instead we increment on whatever cpu the task ran
|
||||
* when it went into uninterruptible state and decrement on whatever cpu
|
||||
* did the wakeup. This means that only the sum of nr_uninterruptible over
|
||||
* all cpus yields the correct result.
|
||||
*
|
||||
* This covers the NO_HZ=n code, for extra head-aches, see the comment below.
|
||||
*/
|
||||
|
||||
/* Variables and functions for calc_load */
|
||||
static atomic_long_t calc_load_tasks;
|
||||
static unsigned long calc_load_update;
|
||||
unsigned long avenrun[3];
|
||||
EXPORT_SYMBOL(avenrun);
|
||||
EXPORT_SYMBOL(avenrun); /* should be removed */
|
||||
|
||||
/**
|
||||
* get_avenrun - get the load average array
|
||||
* @loads: pointer to dest load array
|
||||
* @offset: offset to add
|
||||
* @shift: shift count to shift the result left
|
||||
*
|
||||
* These values are estimates at best, so no need for locking.
|
||||
*/
|
||||
void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
|
||||
{
|
||||
loads[0] = (avenrun[0] + offset) << shift;
|
||||
loads[1] = (avenrun[1] + offset) << shift;
|
||||
loads[2] = (avenrun[2] + offset) << shift;
|
||||
}
|
||||
|
||||
static long calc_load_fold_active(struct rq *this_rq)
|
||||
{
|
||||
@@ -2182,6 +2243,9 @@ static long calc_load_fold_active(struct rq *this_rq)
|
||||
return delta;
|
||||
}
|
||||
|
||||
/*
|
||||
* a1 = a0 * e + a * (1 - e)
|
||||
*/
|
||||
static unsigned long
|
||||
calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
||||
{
|
||||
@@ -2193,30 +2257,118 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
||||
|
||||
#ifdef CONFIG_NO_HZ
|
||||
/*
|
||||
* For NO_HZ we delay the active fold to the next LOAD_FREQ update.
|
||||
* Handle NO_HZ for the global load-average.
|
||||
*
|
||||
* Since the above described distributed algorithm to compute the global
|
||||
* load-average relies on per-cpu sampling from the tick, it is affected by
|
||||
* NO_HZ.
|
||||
*
|
||||
* The basic idea is to fold the nr_active delta into a global idle-delta upon
|
||||
* entering NO_HZ state such that we can include this as an 'extra' cpu delta
|
||||
* when we read the global state.
|
||||
*
|
||||
* Obviously reality has to ruin such a delightfully simple scheme:
|
||||
*
|
||||
* - When we go NO_HZ idle during the window, we can negate our sample
|
||||
* contribution, causing under-accounting.
|
||||
*
|
||||
* We avoid this by keeping two idle-delta counters and flipping them
|
||||
* when the window starts, thus separating old and new NO_HZ load.
|
||||
*
|
||||
* The only trick is the slight shift in index flip for read vs write.
|
||||
*
|
||||
* 0s 5s 10s 15s
|
||||
* +10 +10 +10 +10
|
||||
* |-|-----------|-|-----------|-|-----------|-|
|
||||
* r:0 0 1 1 0 0 1 1 0
|
||||
* w:0 1 1 0 0 1 1 0 0
|
||||
*
|
||||
* This ensures we'll fold the old idle contribution in this window while
|
||||
* accumlating the new one.
|
||||
*
|
||||
* - When we wake up from NO_HZ idle during the window, we push up our
|
||||
* contribution, since we effectively move our sample point to a known
|
||||
* busy state.
|
||||
*
|
||||
* This is solved by pushing the window forward, and thus skipping the
|
||||
* sample, for this cpu (effectively using the idle-delta for this cpu which
|
||||
* was in effect at the time the window opened). This also solves the issue
|
||||
* of having to deal with a cpu having been in NOHZ idle for multiple
|
||||
* LOAD_FREQ intervals.
|
||||
*
|
||||
* When making the ILB scale, we should try to pull this in as well.
|
||||
*/
|
||||
static atomic_long_t calc_load_tasks_idle;
|
||||
static atomic_long_t calc_load_idle[2];
|
||||
static int calc_load_idx;
|
||||
|
||||
void calc_load_account_idle(struct rq *this_rq)
|
||||
static inline int calc_load_write_idx(void)
|
||||
{
|
||||
int idx = calc_load_idx;
|
||||
|
||||
/*
|
||||
* See calc_global_nohz(), if we observe the new index, we also
|
||||
* need to observe the new update time.
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
/*
|
||||
* If the folding window started, make sure we start writing in the
|
||||
* next idle-delta.
|
||||
*/
|
||||
if (!time_before(jiffies, calc_load_update))
|
||||
idx++;
|
||||
|
||||
return idx & 1;
|
||||
}
|
||||
|
||||
static inline int calc_load_read_idx(void)
|
||||
{
|
||||
return calc_load_idx & 1;
|
||||
}
|
||||
|
||||
void calc_load_enter_idle(void)
|
||||
{
|
||||
struct rq *this_rq = this_rq();
|
||||
long delta;
|
||||
|
||||
/*
|
||||
* We're going into NOHZ mode, if there's any pending delta, fold it
|
||||
* into the pending idle delta.
|
||||
*/
|
||||
delta = calc_load_fold_active(this_rq);
|
||||
if (delta)
|
||||
atomic_long_add(delta, &calc_load_tasks_idle);
|
||||
if (delta) {
|
||||
int idx = calc_load_write_idx();
|
||||
atomic_long_add(delta, &calc_load_idle[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
void calc_load_exit_idle(void)
|
||||
{
|
||||
struct rq *this_rq = this_rq();
|
||||
|
||||
/*
|
||||
* If we're still before the sample window, we're done.
|
||||
*/
|
||||
if (time_before(jiffies, this_rq->calc_load_update))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We woke inside or after the sample window, this means we're already
|
||||
* accounted through the nohz accounting, so skip the entire deal and
|
||||
* sync up for the next window.
|
||||
*/
|
||||
this_rq->calc_load_update = calc_load_update;
|
||||
if (time_before(jiffies, this_rq->calc_load_update + 10))
|
||||
this_rq->calc_load_update += LOAD_FREQ;
|
||||
}
|
||||
|
||||
static long calc_load_fold_idle(void)
|
||||
{
|
||||
int idx = calc_load_read_idx();
|
||||
long delta = 0;
|
||||
|
||||
/*
|
||||
* Its got a race, we don't care...
|
||||
*/
|
||||
if (atomic_long_read(&calc_load_tasks_idle))
|
||||
delta = atomic_long_xchg(&calc_load_tasks_idle, 0);
|
||||
if (atomic_long_read(&calc_load_idle[idx]))
|
||||
delta = atomic_long_xchg(&calc_load_idle[idx], 0);
|
||||
|
||||
return delta;
|
||||
}
|
||||
@@ -2302,66 +2454,39 @@ static void calc_global_nohz(void)
|
||||
{
|
||||
long delta, active, n;
|
||||
|
||||
/*
|
||||
* If we crossed a calc_load_update boundary, make sure to fold
|
||||
* any pending idle changes, the respective CPUs might have
|
||||
* missed the tick driven calc_load_account_active() update
|
||||
* due to NO_HZ.
|
||||
*/
|
||||
delta = calc_load_fold_idle();
|
||||
if (delta)
|
||||
atomic_long_add(delta, &calc_load_tasks);
|
||||
if (!time_before(jiffies, calc_load_update + 10)) {
|
||||
/*
|
||||
* Catch-up, fold however many we are behind still
|
||||
*/
|
||||
delta = jiffies - calc_load_update - 10;
|
||||
n = 1 + (delta / LOAD_FREQ);
|
||||
|
||||
active = atomic_long_read(&calc_load_tasks);
|
||||
active = active > 0 ? active * FIXED_1 : 0;
|
||||
|
||||
avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
|
||||
avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
|
||||
avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
|
||||
|
||||
calc_load_update += n * LOAD_FREQ;
|
||||
}
|
||||
|
||||
/*
|
||||
* It could be the one fold was all it took, we done!
|
||||
* Flip the idle index...
|
||||
*
|
||||
* Make sure we first write the new time then flip the index, so that
|
||||
* calc_load_write_idx() will see the new time when it reads the new
|
||||
* index, this avoids a double flip messing things up.
|
||||
*/
|
||||
if (time_before(jiffies, calc_load_update + 10))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Catch-up, fold however many we are behind still
|
||||
*/
|
||||
delta = jiffies - calc_load_update - 10;
|
||||
n = 1 + (delta / LOAD_FREQ);
|
||||
|
||||
active = atomic_long_read(&calc_load_tasks);
|
||||
active = active > 0 ? active * FIXED_1 : 0;
|
||||
|
||||
avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
|
||||
avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
|
||||
avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
|
||||
|
||||
calc_load_update += n * LOAD_FREQ;
|
||||
}
|
||||
#else
|
||||
void calc_load_account_idle(struct rq *this_rq)
|
||||
{
|
||||
smp_wmb();
|
||||
calc_load_idx++;
|
||||
}
|
||||
#else /* !CONFIG_NO_HZ */
|
||||
|
||||
static inline long calc_load_fold_idle(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline long calc_load_fold_idle(void) { return 0; }
|
||||
static inline void calc_global_nohz(void) { }
|
||||
|
||||
static void calc_global_nohz(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* get_avenrun - get the load average array
|
||||
* @loads: pointer to dest load array
|
||||
* @offset: offset to add
|
||||
* @shift: shift count to shift the result left
|
||||
*
|
||||
* These values are estimates at best, so no need for locking.
|
||||
*/
|
||||
void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
|
||||
{
|
||||
loads[0] = (avenrun[0] + offset) << shift;
|
||||
loads[1] = (avenrun[1] + offset) << shift;
|
||||
loads[2] = (avenrun[2] + offset) << shift;
|
||||
}
|
||||
#endif /* CONFIG_NO_HZ */
|
||||
|
||||
/*
|
||||
* calc_load - update the avenrun load estimates 10 ticks after the
|
||||
@@ -2369,11 +2494,18 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
|
||||
*/
|
||||
void calc_global_load(unsigned long ticks)
|
||||
{
|
||||
long active;
|
||||
long active, delta;
|
||||
|
||||
if (time_before(jiffies, calc_load_update + 10))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Fold the 'old' idle-delta to include all NO_HZ cpus.
|
||||
*/
|
||||
delta = calc_load_fold_idle();
|
||||
if (delta)
|
||||
atomic_long_add(delta, &calc_load_tasks);
|
||||
|
||||
active = atomic_long_read(&calc_load_tasks);
|
||||
active = active > 0 ? active * FIXED_1 : 0;
|
||||
|
||||
@@ -2384,12 +2516,7 @@ void calc_global_load(unsigned long ticks)
|
||||
calc_load_update += LOAD_FREQ;
|
||||
|
||||
/*
|
||||
* Account one period with whatever state we found before
|
||||
* folding in the nohz state and ageing the entire idle period.
|
||||
*
|
||||
* This avoids loosing a sample when we go idle between
|
||||
* calc_load_account_active() (10 ticks ago) and now and thus
|
||||
* under-accounting.
|
||||
* In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
|
||||
*/
|
||||
calc_global_nohz();
|
||||
}
|
||||
@@ -2406,13 +2533,16 @@ static void calc_load_account_active(struct rq *this_rq)
|
||||
return;
|
||||
|
||||
delta = calc_load_fold_active(this_rq);
|
||||
delta += calc_load_fold_idle();
|
||||
if (delta)
|
||||
atomic_long_add(delta, &calc_load_tasks);
|
||||
|
||||
this_rq->calc_load_update += LOAD_FREQ;
|
||||
}
|
||||
|
||||
/*
|
||||
* End of global load-average stuff
|
||||
*/
|
||||
|
||||
/*
|
||||
* The exact cpuload at various idx values, calculated at every tick would be
|
||||
* load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
|
||||
|
@@ -25,7 +25,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
|
||||
static struct task_struct *pick_next_task_idle(struct rq *rq)
|
||||
{
|
||||
schedstat_inc(rq, sched_goidle);
|
||||
calc_load_account_idle(rq);
|
||||
return rq->idle;
|
||||
}
|
||||
|
||||
|
@@ -942,8 +942,6 @@ static inline u64 sched_avg_period(void)
|
||||
return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
|
||||
}
|
||||
|
||||
void calc_load_account_idle(struct rq *this_rq);
|
||||
|
||||
#ifdef CONFIG_SCHED_HRTICK
|
||||
|
||||
/*
|
||||
|
@@ -1971,6 +1971,13 @@ static void ptrace_do_notify(int signr, int exit_code, int why)
|
||||
void ptrace_notify(int exit_code)
|
||||
{
|
||||
BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
|
||||
if (unlikely(current->task_works)) {
|
||||
if (test_and_clear_ti_thread_flag(current_thread_info(),
|
||||
TIF_NOTIFY_RESUME)) {
|
||||
smp_mb__after_clear_bit();
|
||||
task_work_run();
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED);
|
||||
@@ -2191,6 +2198,14 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
|
||||
struct signal_struct *signal = current->signal;
|
||||
int signr;
|
||||
|
||||
if (unlikely(current->task_works)) {
|
||||
if (test_and_clear_ti_thread_flag(current_thread_info(),
|
||||
TIF_NOTIFY_RESUME)) {
|
||||
smp_mb__after_clear_bit();
|
||||
task_work_run();
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(uprobe_deny_signal()))
|
||||
return 0;
|
||||
|
||||
|
20
kernel/smp.c
20
kernel/smp.c
@@ -581,26 +581,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait)
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(smp_call_function);
|
||||
|
||||
void ipi_call_lock(void)
|
||||
{
|
||||
raw_spin_lock(&call_function.lock);
|
||||
}
|
||||
|
||||
void ipi_call_unlock(void)
|
||||
{
|
||||
raw_spin_unlock(&call_function.lock);
|
||||
}
|
||||
|
||||
void ipi_call_lock_irq(void)
|
||||
{
|
||||
raw_spin_lock_irq(&call_function.lock);
|
||||
}
|
||||
|
||||
void ipi_call_unlock_irq(void)
|
||||
{
|
||||
raw_spin_unlock_irq(&call_function.lock);
|
||||
}
|
||||
#endif /* USE_GENERIC_SMP_HELPERS */
|
||||
|
||||
/* Setup configured maximum number of CPUs to activate */
|
||||
|
@@ -3,8 +3,6 @@
|
||||
|
||||
struct task_struct;
|
||||
|
||||
int smpboot_prepare(unsigned int cpu);
|
||||
|
||||
#ifdef CONFIG_GENERIC_SMP_IDLE_THREAD
|
||||
struct task_struct *idle_thread_get(unsigned int cpu);
|
||||
void idle_thread_set_boot_cpu(void);
|
||||
|
16
kernel/sys.c
16
kernel/sys.c
@@ -1788,7 +1788,6 @@ SYSCALL_DEFINE1(umask, int, mask)
|
||||
#ifdef CONFIG_CHECKPOINT_RESTORE
|
||||
static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct file *exe_file;
|
||||
struct dentry *dentry;
|
||||
int err;
|
||||
@@ -1816,13 +1815,17 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
|
||||
down_write(&mm->mmap_sem);
|
||||
|
||||
/*
|
||||
* Forbid mm->exe_file change if there are mapped other files.
|
||||
* Forbid mm->exe_file change if old file still mapped.
|
||||
*/
|
||||
err = -EBUSY;
|
||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||
if (vma->vm_file && !path_equal(&vma->vm_file->f_path,
|
||||
&exe_file->f_path))
|
||||
goto exit_unlock;
|
||||
if (mm->exe_file) {
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
for (vma = mm->mmap; vma; vma = vma->vm_next)
|
||||
if (vma->vm_file &&
|
||||
path_equal(&vma->vm_file->f_path,
|
||||
&mm->exe_file->f_path))
|
||||
goto exit_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1835,6 +1838,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
|
||||
if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
|
||||
goto exit_unlock;
|
||||
|
||||
err = 0;
|
||||
set_mm_exe_file(mm, exe_file);
|
||||
exit_unlock:
|
||||
up_write(&mm->mmap_sem);
|
||||
|
@@ -3,82 +3,78 @@
|
||||
#include <linux/tracehook.h>
|
||||
|
||||
int
|
||||
task_work_add(struct task_struct *task, struct task_work *twork, bool notify)
|
||||
task_work_add(struct task_struct *task, struct callback_head *twork, bool notify)
|
||||
{
|
||||
struct callback_head *last, *first;
|
||||
unsigned long flags;
|
||||
int err = -ESRCH;
|
||||
|
||||
#ifndef TIF_NOTIFY_RESUME
|
||||
if (notify)
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
/*
|
||||
* We must not insert the new work if the task has already passed
|
||||
* exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait()
|
||||
* and check PF_EXITING under pi_lock.
|
||||
* Not inserting the new work if the task has already passed
|
||||
* exit_task_work() is the responisbility of callers.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&task->pi_lock, flags);
|
||||
if (likely(!(task->flags & PF_EXITING))) {
|
||||
hlist_add_head(&twork->hlist, &task->task_works);
|
||||
err = 0;
|
||||
}
|
||||
last = task->task_works;
|
||||
first = last ? last->next : twork;
|
||||
twork->next = first;
|
||||
if (last)
|
||||
last->next = twork;
|
||||
task->task_works = twork;
|
||||
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
|
||||
/* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */
|
||||
if (likely(!err) && notify)
|
||||
if (notify)
|
||||
set_notify_resume(task);
|
||||
return err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct task_work *
|
||||
struct callback_head *
|
||||
task_work_cancel(struct task_struct *task, task_work_func_t func)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct task_work *twork;
|
||||
struct hlist_node *pos;
|
||||
struct callback_head *last, *res = NULL;
|
||||
|
||||
raw_spin_lock_irqsave(&task->pi_lock, flags);
|
||||
hlist_for_each_entry(twork, pos, &task->task_works, hlist) {
|
||||
if (twork->func == func) {
|
||||
hlist_del(&twork->hlist);
|
||||
goto found;
|
||||
last = task->task_works;
|
||||
if (last) {
|
||||
struct callback_head *q = last, *p = q->next;
|
||||
while (1) {
|
||||
if (p->func == func) {
|
||||
q->next = p->next;
|
||||
if (p == last)
|
||||
task->task_works = q == p ? NULL : q;
|
||||
res = p;
|
||||
break;
|
||||
}
|
||||
if (p == last)
|
||||
break;
|
||||
q = p;
|
||||
p = q->next;
|
||||
}
|
||||
}
|
||||
twork = NULL;
|
||||
found:
|
||||
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
|
||||
return twork;
|
||||
return res;
|
||||
}
|
||||
|
||||
void task_work_run(void)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
struct hlist_head task_works;
|
||||
struct hlist_node *pos;
|
||||
struct callback_head *p, *q;
|
||||
|
||||
raw_spin_lock_irq(&task->pi_lock);
|
||||
hlist_move_list(&task->task_works, &task_works);
|
||||
raw_spin_unlock_irq(&task->pi_lock);
|
||||
while (1) {
|
||||
raw_spin_lock_irq(&task->pi_lock);
|
||||
p = task->task_works;
|
||||
task->task_works = NULL;
|
||||
raw_spin_unlock_irq(&task->pi_lock);
|
||||
|
||||
if (unlikely(hlist_empty(&task_works)))
|
||||
return;
|
||||
/*
|
||||
* We use hlist to save the space in task_struct, but we want fifo.
|
||||
* Find the last entry, the list should be short, then process them
|
||||
* in reverse order.
|
||||
*/
|
||||
for (pos = task_works.first; pos->next; pos = pos->next)
|
||||
;
|
||||
if (unlikely(!p))
|
||||
return;
|
||||
|
||||
for (;;) {
|
||||
struct hlist_node **pprev = pos->pprev;
|
||||
struct task_work *twork = container_of(pos, struct task_work,
|
||||
hlist);
|
||||
twork->func(twork);
|
||||
|
||||
if (pprev == &task_works.first)
|
||||
break;
|
||||
pos = container_of(pprev, struct hlist_node, next);
|
||||
q = p->next; /* head */
|
||||
p->next = NULL; /* cut it */
|
||||
while (q) {
|
||||
p = q->next;
|
||||
q->func(q);
|
||||
q = p;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -409,7 +409,9 @@ int second_overflow(unsigned long secs)
|
||||
time_state = TIME_DEL;
|
||||
break;
|
||||
case TIME_INS:
|
||||
if (secs % 86400 == 0) {
|
||||
if (!(time_status & STA_INS))
|
||||
time_state = TIME_OK;
|
||||
else if (secs % 86400 == 0) {
|
||||
leap = -1;
|
||||
time_state = TIME_OOP;
|
||||
time_tai++;
|
||||
@@ -418,7 +420,9 @@ int second_overflow(unsigned long secs)
|
||||
}
|
||||
break;
|
||||
case TIME_DEL:
|
||||
if ((secs + 1) % 86400 == 0) {
|
||||
if (!(time_status & STA_DEL))
|
||||
time_state = TIME_OK;
|
||||
else if ((secs + 1) % 86400 == 0) {
|
||||
leap = 1;
|
||||
time_tai--;
|
||||
time_state = TIME_WAIT;
|
||||
|
@@ -105,7 +105,7 @@ static ktime_t tick_init_jiffy_update(void)
|
||||
/*
|
||||
* NO HZ enabled ?
|
||||
*/
|
||||
static int tick_nohz_enabled __read_mostly = 1;
|
||||
int tick_nohz_enabled __read_mostly = 1;
|
||||
|
||||
/*
|
||||
* Enable / Disable tickless mode
|
||||
@@ -271,50 +271,15 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
|
||||
|
||||
static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
|
||||
static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
|
||||
ktime_t now, int cpu)
|
||||
{
|
||||
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
|
||||
ktime_t last_update, expires, ret = { .tv64 = 0 };
|
||||
unsigned long rcu_delta_jiffies;
|
||||
ktime_t last_update, expires, now;
|
||||
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
|
||||
u64 time_delta;
|
||||
int cpu;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
ts = &per_cpu(tick_cpu_sched, cpu);
|
||||
|
||||
now = tick_nohz_start_idle(cpu, ts);
|
||||
|
||||
/*
|
||||
* If this cpu is offline and it is the one which updates
|
||||
* jiffies, then give up the assignment and let it be taken by
|
||||
* the cpu which runs the tick timer next. If we don't drop
|
||||
* this here the jiffies might be stale and do_timer() never
|
||||
* invoked.
|
||||
*/
|
||||
if (unlikely(!cpu_online(cpu))) {
|
||||
if (cpu == tick_do_timer_cpu)
|
||||
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
|
||||
}
|
||||
|
||||
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
|
||||
return;
|
||||
|
||||
if (need_resched())
|
||||
return;
|
||||
|
||||
if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
|
||||
static int ratelimit;
|
||||
|
||||
if (ratelimit < 10) {
|
||||
printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
|
||||
(unsigned int) local_softirq_pending());
|
||||
ratelimit++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
ts->idle_calls++;
|
||||
/* Read jiffies and the time when jiffies were updated last */
|
||||
do {
|
||||
seq = read_seqbegin(&xtime_lock);
|
||||
@@ -397,6 +362,8 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
|
||||
if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
|
||||
goto out;
|
||||
|
||||
ret = expires;
|
||||
|
||||
/*
|
||||
* nohz_stop_sched_tick can be called several times before
|
||||
* the nohz_restart_sched_tick is called. This happens when
|
||||
@@ -406,17 +373,12 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
|
||||
*/
|
||||
if (!ts->tick_stopped) {
|
||||
select_nohz_load_balancer(1);
|
||||
calc_load_enter_idle();
|
||||
|
||||
ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
|
||||
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
|
||||
ts->tick_stopped = 1;
|
||||
ts->idle_jiffies = last_jiffies;
|
||||
}
|
||||
|
||||
ts->idle_sleeps++;
|
||||
|
||||
/* Mark expires */
|
||||
ts->idle_expires = expires;
|
||||
|
||||
/*
|
||||
* If the expiration time == KTIME_MAX, then
|
||||
* in this case we simply stop the tick timer.
|
||||
@@ -447,6 +409,65 @@ out:
|
||||
ts->next_jiffies = next_jiffies;
|
||||
ts->last_jiffies = last_jiffies;
|
||||
ts->sleep_length = ktime_sub(dev->next_event, now);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
|
||||
{
|
||||
/*
|
||||
* If this cpu is offline and it is the one which updates
|
||||
* jiffies, then give up the assignment and let it be taken by
|
||||
* the cpu which runs the tick timer next. If we don't drop
|
||||
* this here the jiffies might be stale and do_timer() never
|
||||
* invoked.
|
||||
*/
|
||||
if (unlikely(!cpu_online(cpu))) {
|
||||
if (cpu == tick_do_timer_cpu)
|
||||
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
|
||||
}
|
||||
|
||||
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
|
||||
return false;
|
||||
|
||||
if (need_resched())
|
||||
return false;
|
||||
|
||||
if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
|
||||
static int ratelimit;
|
||||
|
||||
if (ratelimit < 10) {
|
||||
printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
|
||||
(unsigned int) local_softirq_pending());
|
||||
ratelimit++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __tick_nohz_idle_enter(struct tick_sched *ts)
|
||||
{
|
||||
ktime_t now, expires;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
now = tick_nohz_start_idle(cpu, ts);
|
||||
|
||||
if (can_stop_idle_tick(cpu, ts)) {
|
||||
int was_stopped = ts->tick_stopped;
|
||||
|
||||
ts->idle_calls++;
|
||||
|
||||
expires = tick_nohz_stop_sched_tick(ts, now, cpu);
|
||||
if (expires.tv64 > 0LL) {
|
||||
ts->idle_sleeps++;
|
||||
ts->idle_expires = expires;
|
||||
}
|
||||
|
||||
if (!was_stopped && ts->tick_stopped)
|
||||
ts->idle_jiffies = ts->last_jiffies;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -484,7 +505,7 @@ void tick_nohz_idle_enter(void)
|
||||
* update of the idle time accounting in tick_nohz_start_idle().
|
||||
*/
|
||||
ts->inidle = 1;
|
||||
tick_nohz_stop_sched_tick(ts);
|
||||
__tick_nohz_idle_enter(ts);
|
||||
|
||||
local_irq_enable();
|
||||
}
|
||||
@@ -504,7 +525,7 @@ void tick_nohz_irq_exit(void)
|
||||
if (!ts->inidle)
|
||||
return;
|
||||
|
||||
tick_nohz_stop_sched_tick(ts);
|
||||
__tick_nohz_idle_enter(ts);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -522,7 +543,7 @@ ktime_t tick_nohz_get_sleep_length(void)
|
||||
static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
|
||||
{
|
||||
hrtimer_cancel(&ts->sched_timer);
|
||||
hrtimer_set_expires(&ts->sched_timer, ts->idle_tick);
|
||||
hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
|
||||
|
||||
while (1) {
|
||||
/* Forward the time to expire in the future */
|
||||
@@ -545,6 +566,41 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
|
||||
}
|
||||
}
|
||||
|
||||
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
|
||||
{
|
||||
/* Update jiffies first */
|
||||
select_nohz_load_balancer(0);
|
||||
tick_do_update_jiffies64(now);
|
||||
update_cpu_load_nohz();
|
||||
|
||||
touch_softlockup_watchdog();
|
||||
/*
|
||||
* Cancel the scheduled timer and restore the tick
|
||||
*/
|
||||
ts->tick_stopped = 0;
|
||||
ts->idle_exittime = now;
|
||||
|
||||
tick_nohz_restart(ts, now);
|
||||
}
|
||||
|
||||
static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
|
||||
{
|
||||
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
unsigned long ticks;
|
||||
/*
|
||||
* We stopped the tick in idle. Update process times would miss the
|
||||
* time we slept as update_process_times does only a 1 tick
|
||||
* accounting. Enforce that this is accounted to idle !
|
||||
*/
|
||||
ticks = jiffies - ts->idle_jiffies;
|
||||
/*
|
||||
* We might be one off. Do not randomly account a huge number of ticks!
|
||||
*/
|
||||
if (ticks && ticks < LONG_MAX)
|
||||
account_idle_ticks(ticks);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* tick_nohz_idle_exit - restart the idle tick from the idle task
|
||||
*
|
||||
@@ -556,9 +612,6 @@ void tick_nohz_idle_exit(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
|
||||
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
unsigned long ticks;
|
||||
#endif
|
||||
ktime_t now;
|
||||
|
||||
local_irq_disable();
|
||||
@@ -573,39 +626,11 @@ void tick_nohz_idle_exit(void)
|
||||
if (ts->idle_active)
|
||||
tick_nohz_stop_idle(cpu, now);
|
||||
|
||||
if (!ts->tick_stopped) {
|
||||
local_irq_enable();
|
||||
return;
|
||||
if (ts->tick_stopped) {
|
||||
tick_nohz_restart_sched_tick(ts, now);
|
||||
tick_nohz_account_idle_ticks(ts);
|
||||
}
|
||||
|
||||
/* Update jiffies first */
|
||||
select_nohz_load_balancer(0);
|
||||
tick_do_update_jiffies64(now);
|
||||
update_cpu_load_nohz();
|
||||
|
||||
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
/*
|
||||
* We stopped the tick in idle. Update process times would miss the
|
||||
* time we slept as update_process_times does only a 1 tick
|
||||
* accounting. Enforce that this is accounted to idle !
|
||||
*/
|
||||
ticks = jiffies - ts->idle_jiffies;
|
||||
/*
|
||||
* We might be one off. Do not randomly account a huge number of ticks!
|
||||
*/
|
||||
if (ticks && ticks < LONG_MAX)
|
||||
account_idle_ticks(ticks);
|
||||
#endif
|
||||
|
||||
touch_softlockup_watchdog();
|
||||
/*
|
||||
* Cancel the scheduled timer and restore the tick
|
||||
*/
|
||||
ts->tick_stopped = 0;
|
||||
ts->idle_exittime = now;
|
||||
|
||||
tick_nohz_restart(ts, now);
|
||||
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
@@ -809,7 +834,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
|
||||
*/
|
||||
if (ts->tick_stopped) {
|
||||
touch_softlockup_watchdog();
|
||||
ts->idle_jiffies++;
|
||||
if (idle_cpu(cpu))
|
||||
ts->idle_jiffies++;
|
||||
}
|
||||
update_process_times(user_mode(regs));
|
||||
profile_tick(CPU_PROFILING);
|
||||
|
@@ -24,32 +24,32 @@
|
||||
/* Structure holding internal timekeeping values. */
|
||||
struct timekeeper {
|
||||
/* Current clocksource used for timekeeping. */
|
||||
struct clocksource *clock;
|
||||
struct clocksource *clock;
|
||||
/* NTP adjusted clock multiplier */
|
||||
u32 mult;
|
||||
u32 mult;
|
||||
/* The shift value of the current clocksource. */
|
||||
int shift;
|
||||
|
||||
u32 shift;
|
||||
/* Number of clock cycles in one NTP interval. */
|
||||
cycle_t cycle_interval;
|
||||
cycle_t cycle_interval;
|
||||
/* Number of clock shifted nano seconds in one NTP interval. */
|
||||
u64 xtime_interval;
|
||||
u64 xtime_interval;
|
||||
/* shifted nano seconds left over when rounding cycle_interval */
|
||||
s64 xtime_remainder;
|
||||
s64 xtime_remainder;
|
||||
/* Raw nano seconds accumulated per NTP interval. */
|
||||
u32 raw_interval;
|
||||
u32 raw_interval;
|
||||
|
||||
/* Current CLOCK_REALTIME time in seconds */
|
||||
u64 xtime_sec;
|
||||
/* Clock shifted nano seconds */
|
||||
u64 xtime_nsec;
|
||||
|
||||
/* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */
|
||||
u64 xtime_nsec;
|
||||
/* Difference between accumulated time and NTP time in ntp
|
||||
* shifted nano seconds. */
|
||||
s64 ntp_error;
|
||||
s64 ntp_error;
|
||||
/* Shift conversion between clock shifted nano seconds and
|
||||
* ntp shifted nano seconds. */
|
||||
int ntp_error_shift;
|
||||
u32 ntp_error_shift;
|
||||
|
||||
/* The current time */
|
||||
struct timespec xtime;
|
||||
/*
|
||||
* wall_to_monotonic is what we need to add to xtime (or xtime corrected
|
||||
* for sub jiffie times) to get to monotonic time. Monotonic is pegged
|
||||
@@ -64,14 +64,17 @@ struct timekeeper {
|
||||
* - wall_to_monotonic is no longer the boot time, getboottime must be
|
||||
* used instead.
|
||||
*/
|
||||
struct timespec wall_to_monotonic;
|
||||
struct timespec wall_to_monotonic;
|
||||
/* time spent in suspend */
|
||||
struct timespec total_sleep_time;
|
||||
struct timespec total_sleep_time;
|
||||
/* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
|
||||
struct timespec raw_time;
|
||||
|
||||
struct timespec raw_time;
|
||||
/* Offset clock monotonic -> clock realtime */
|
||||
ktime_t offs_real;
|
||||
/* Offset clock monotonic -> clock boottime */
|
||||
ktime_t offs_boot;
|
||||
/* Seqlock for all timekeeper values */
|
||||
seqlock_t lock;
|
||||
seqlock_t lock;
|
||||
};
|
||||
|
||||
static struct timekeeper timekeeper;
|
||||
@@ -82,11 +85,37 @@ static struct timekeeper timekeeper;
|
||||
*/
|
||||
__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
|
||||
|
||||
|
||||
/* flag for if timekeeping is suspended */
|
||||
int __read_mostly timekeeping_suspended;
|
||||
|
||||
static inline void tk_normalize_xtime(struct timekeeper *tk)
|
||||
{
|
||||
while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
|
||||
tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift;
|
||||
tk->xtime_sec++;
|
||||
}
|
||||
}
|
||||
|
||||
static struct timespec tk_xtime(struct timekeeper *tk)
|
||||
{
|
||||
struct timespec ts;
|
||||
|
||||
ts.tv_sec = tk->xtime_sec;
|
||||
ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
|
||||
return ts;
|
||||
}
|
||||
|
||||
static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
|
||||
{
|
||||
tk->xtime_sec = ts->tv_sec;
|
||||
tk->xtime_nsec = ts->tv_nsec << tk->shift;
|
||||
}
|
||||
|
||||
static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts)
|
||||
{
|
||||
tk->xtime_sec += ts->tv_sec;
|
||||
tk->xtime_nsec += ts->tv_nsec << tk->shift;
|
||||
}
|
||||
|
||||
/**
|
||||
* timekeeper_setup_internals - Set up internals to use clocksource clock.
|
||||
@@ -98,12 +127,14 @@ int __read_mostly timekeeping_suspended;
|
||||
*
|
||||
* Unless you're the timekeeping code, you should not be using this!
|
||||
*/
|
||||
static void timekeeper_setup_internals(struct clocksource *clock)
|
||||
static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
|
||||
{
|
||||
cycle_t interval;
|
||||
u64 tmp, ntpinterval;
|
||||
struct clocksource *old_clock;
|
||||
|
||||
timekeeper.clock = clock;
|
||||
old_clock = tk->clock;
|
||||
tk->clock = clock;
|
||||
clock->cycle_last = clock->read(clock);
|
||||
|
||||
/* Do the ns -> cycle conversion first, using original mult */
|
||||
@@ -116,71 +147,96 @@ static void timekeeper_setup_internals(struct clocksource *clock)
|
||||
tmp = 1;
|
||||
|
||||
interval = (cycle_t) tmp;
|
||||
timekeeper.cycle_interval = interval;
|
||||
tk->cycle_interval = interval;
|
||||
|
||||
/* Go back from cycles -> shifted ns */
|
||||
timekeeper.xtime_interval = (u64) interval * clock->mult;
|
||||
timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval;
|
||||
timekeeper.raw_interval =
|
||||
tk->xtime_interval = (u64) interval * clock->mult;
|
||||
tk->xtime_remainder = ntpinterval - tk->xtime_interval;
|
||||
tk->raw_interval =
|
||||
((u64) interval * clock->mult) >> clock->shift;
|
||||
|
||||
timekeeper.xtime_nsec = 0;
|
||||
timekeeper.shift = clock->shift;
|
||||
/* if changing clocks, convert xtime_nsec shift units */
|
||||
if (old_clock) {
|
||||
int shift_change = clock->shift - old_clock->shift;
|
||||
if (shift_change < 0)
|
||||
tk->xtime_nsec >>= -shift_change;
|
||||
else
|
||||
tk->xtime_nsec <<= shift_change;
|
||||
}
|
||||
tk->shift = clock->shift;
|
||||
|
||||
timekeeper.ntp_error = 0;
|
||||
timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
|
||||
tk->ntp_error = 0;
|
||||
tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
|
||||
|
||||
/*
|
||||
* The timekeeper keeps its own mult values for the currently
|
||||
* active clocksource. These value will be adjusted via NTP
|
||||
* to counteract clock drifting.
|
||||
*/
|
||||
timekeeper.mult = clock->mult;
|
||||
tk->mult = clock->mult;
|
||||
}
|
||||
|
||||
/* Timekeeper helper functions. */
|
||||
static inline s64 timekeeping_get_ns(void)
|
||||
static inline s64 timekeeping_get_ns(struct timekeeper *tk)
|
||||
{
|
||||
cycle_t cycle_now, cycle_delta;
|
||||
struct clocksource *clock;
|
||||
s64 nsec;
|
||||
|
||||
/* read clocksource: */
|
||||
clock = timekeeper.clock;
|
||||
clock = tk->clock;
|
||||
cycle_now = clock->read(clock);
|
||||
|
||||
/* calculate the delta since the last update_wall_time: */
|
||||
cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
|
||||
|
||||
/* return delta convert to nanoseconds using ntp adjusted mult. */
|
||||
return clocksource_cyc2ns(cycle_delta, timekeeper.mult,
|
||||
timekeeper.shift);
|
||||
nsec = cycle_delta * tk->mult + tk->xtime_nsec;
|
||||
nsec >>= tk->shift;
|
||||
|
||||
/* If arch requires, add in gettimeoffset() */
|
||||
return nsec + arch_gettimeoffset();
|
||||
}
|
||||
|
||||
static inline s64 timekeeping_get_ns_raw(void)
|
||||
static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
|
||||
{
|
||||
cycle_t cycle_now, cycle_delta;
|
||||
struct clocksource *clock;
|
||||
s64 nsec;
|
||||
|
||||
/* read clocksource: */
|
||||
clock = timekeeper.clock;
|
||||
clock = tk->clock;
|
||||
cycle_now = clock->read(clock);
|
||||
|
||||
/* calculate the delta since the last update_wall_time: */
|
||||
cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
|
||||
|
||||
/* return delta convert to nanoseconds. */
|
||||
return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
|
||||
/* convert delta to nanoseconds. */
|
||||
nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
|
||||
|
||||
/* If arch requires, add in gettimeoffset() */
|
||||
return nsec + arch_gettimeoffset();
|
||||
}
|
||||
|
||||
static void update_rt_offset(struct timekeeper *tk)
|
||||
{
|
||||
struct timespec tmp, *wtm = &tk->wall_to_monotonic;
|
||||
|
||||
set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
|
||||
tk->offs_real = timespec_to_ktime(tmp);
|
||||
}
|
||||
|
||||
/* must hold write on timekeeper.lock */
|
||||
static void timekeeping_update(bool clearntp)
|
||||
static void timekeeping_update(struct timekeeper *tk, bool clearntp)
|
||||
{
|
||||
struct timespec xt;
|
||||
|
||||
if (clearntp) {
|
||||
timekeeper.ntp_error = 0;
|
||||
tk->ntp_error = 0;
|
||||
ntp_clear();
|
||||
}
|
||||
update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic,
|
||||
timekeeper.clock, timekeeper.mult);
|
||||
update_rt_offset(tk);
|
||||
xt = tk_xtime(tk);
|
||||
update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
|
||||
}
|
||||
|
||||
|
||||
@@ -191,27 +247,26 @@ static void timekeeping_update(bool clearntp)
|
||||
* update_wall_time(). This is useful before significant clock changes,
|
||||
* as it avoids having to deal with this time offset explicitly.
|
||||
*/
|
||||
static void timekeeping_forward_now(void)
|
||||
static void timekeeping_forward_now(struct timekeeper *tk)
|
||||
{
|
||||
cycle_t cycle_now, cycle_delta;
|
||||
struct clocksource *clock;
|
||||
s64 nsec;
|
||||
|
||||
clock = timekeeper.clock;
|
||||
clock = tk->clock;
|
||||
cycle_now = clock->read(clock);
|
||||
cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
|
||||
clock->cycle_last = cycle_now;
|
||||
|
||||
nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult,
|
||||
timekeeper.shift);
|
||||
tk->xtime_nsec += cycle_delta * tk->mult;
|
||||
|
||||
/* If arch requires, add in gettimeoffset() */
|
||||
nsec += arch_gettimeoffset();
|
||||
tk->xtime_nsec += arch_gettimeoffset() << tk->shift;
|
||||
|
||||
timespec_add_ns(&timekeeper.xtime, nsec);
|
||||
tk_normalize_xtime(tk);
|
||||
|
||||
nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
|
||||
timespec_add_ns(&timekeeper.raw_time, nsec);
|
||||
timespec_add_ns(&tk->raw_time, nsec);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -223,18 +278,15 @@ static void timekeeping_forward_now(void)
|
||||
void getnstimeofday(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
s64 nsecs;
|
||||
s64 nsecs = 0;
|
||||
|
||||
WARN_ON(timekeeping_suspended);
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&timekeeper.lock);
|
||||
|
||||
*ts = timekeeper.xtime;
|
||||
nsecs = timekeeping_get_ns();
|
||||
|
||||
/* If arch requires, add in gettimeoffset() */
|
||||
nsecs += arch_gettimeoffset();
|
||||
ts->tv_sec = timekeeper.xtime_sec;
|
||||
ts->tv_nsec = timekeeping_get_ns(&timekeeper);
|
||||
|
||||
} while (read_seqretry(&timekeeper.lock, seq));
|
||||
|
||||
@@ -251,13 +303,10 @@ ktime_t ktime_get(void)
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&timekeeper.lock);
|
||||
secs = timekeeper.xtime.tv_sec +
|
||||
secs = timekeeper.xtime_sec +
|
||||
timekeeper.wall_to_monotonic.tv_sec;
|
||||
nsecs = timekeeper.xtime.tv_nsec +
|
||||
nsecs = timekeeping_get_ns(&timekeeper) +
|
||||
timekeeper.wall_to_monotonic.tv_nsec;
|
||||
nsecs += timekeeping_get_ns();
|
||||
/* If arch requires, add in gettimeoffset() */
|
||||
nsecs += arch_gettimeoffset();
|
||||
|
||||
} while (read_seqretry(&timekeeper.lock, seq));
|
||||
/*
|
||||
@@ -280,22 +329,19 @@ void ktime_get_ts(struct timespec *ts)
|
||||
{
|
||||
struct timespec tomono;
|
||||
unsigned int seq;
|
||||
s64 nsecs;
|
||||
|
||||
WARN_ON(timekeeping_suspended);
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&timekeeper.lock);
|
||||
*ts = timekeeper.xtime;
|
||||
ts->tv_sec = timekeeper.xtime_sec;
|
||||
ts->tv_nsec = timekeeping_get_ns(&timekeeper);
|
||||
tomono = timekeeper.wall_to_monotonic;
|
||||
nsecs = timekeeping_get_ns();
|
||||
/* If arch requires, add in gettimeoffset() */
|
||||
nsecs += arch_gettimeoffset();
|
||||
|
||||
} while (read_seqretry(&timekeeper.lock, seq));
|
||||
|
||||
set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
|
||||
ts->tv_nsec + tomono.tv_nsec + nsecs);
|
||||
ts->tv_nsec + tomono.tv_nsec);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ktime_get_ts);
|
||||
|
||||
@@ -318,20 +364,14 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
|
||||
WARN_ON_ONCE(timekeeping_suspended);
|
||||
|
||||
do {
|
||||
u32 arch_offset;
|
||||
|
||||
seq = read_seqbegin(&timekeeper.lock);
|
||||
|
||||
*ts_raw = timekeeper.raw_time;
|
||||
*ts_real = timekeeper.xtime;
|
||||
ts_real->tv_sec = timekeeper.xtime_sec;
|
||||
ts_real->tv_nsec = 0;
|
||||
|
||||
nsecs_raw = timekeeping_get_ns_raw();
|
||||
nsecs_real = timekeeping_get_ns();
|
||||
|
||||
/* If arch requires, add in gettimeoffset() */
|
||||
arch_offset = arch_gettimeoffset();
|
||||
nsecs_raw += arch_offset;
|
||||
nsecs_real += arch_offset;
|
||||
nsecs_raw = timekeeping_get_ns_raw(&timekeeper);
|
||||
nsecs_real = timekeeping_get_ns(&timekeeper);
|
||||
|
||||
} while (read_seqretry(&timekeeper.lock, seq));
|
||||
|
||||
@@ -366,7 +406,7 @@ EXPORT_SYMBOL(do_gettimeofday);
|
||||
*/
|
||||
int do_settimeofday(const struct timespec *tv)
|
||||
{
|
||||
struct timespec ts_delta;
|
||||
struct timespec ts_delta, xt;
|
||||
unsigned long flags;
|
||||
|
||||
if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
|
||||
@@ -374,15 +414,18 @@ int do_settimeofday(const struct timespec *tv)
|
||||
|
||||
write_seqlock_irqsave(&timekeeper.lock, flags);
|
||||
|
||||
timekeeping_forward_now();
|
||||
timekeeping_forward_now(&timekeeper);
|
||||
|
||||
xt = tk_xtime(&timekeeper);
|
||||
ts_delta.tv_sec = tv->tv_sec - xt.tv_sec;
|
||||
ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec;
|
||||
|
||||
ts_delta.tv_sec = tv->tv_sec - timekeeper.xtime.tv_sec;
|
||||
ts_delta.tv_nsec = tv->tv_nsec - timekeeper.xtime.tv_nsec;
|
||||
timekeeper.wall_to_monotonic =
|
||||
timespec_sub(timekeeper.wall_to_monotonic, ts_delta);
|
||||
|
||||
timekeeper.xtime = *tv;
|
||||
timekeeping_update(true);
|
||||
tk_set_xtime(&timekeeper, tv);
|
||||
|
||||
timekeeping_update(&timekeeper, true);
|
||||
|
||||
write_sequnlock_irqrestore(&timekeeper.lock, flags);
|
||||
|
||||
@@ -409,13 +452,14 @@ int timekeeping_inject_offset(struct timespec *ts)
|
||||
|
||||
write_seqlock_irqsave(&timekeeper.lock, flags);
|
||||
|
||||
timekeeping_forward_now();
|
||||
timekeeping_forward_now(&timekeeper);
|
||||
|
||||
timekeeper.xtime = timespec_add(timekeeper.xtime, *ts);
|
||||
|
||||
tk_xtime_add(&timekeeper, ts);
|
||||
timekeeper.wall_to_monotonic =
|
||||
timespec_sub(timekeeper.wall_to_monotonic, *ts);
|
||||
|
||||
timekeeping_update(true);
|
||||
timekeeping_update(&timekeeper, true);
|
||||
|
||||
write_sequnlock_irqrestore(&timekeeper.lock, flags);
|
||||
|
||||
@@ -440,14 +484,14 @@ static int change_clocksource(void *data)
|
||||
|
||||
write_seqlock_irqsave(&timekeeper.lock, flags);
|
||||
|
||||
timekeeping_forward_now();
|
||||
timekeeping_forward_now(&timekeeper);
|
||||
if (!new->enable || new->enable(new) == 0) {
|
||||
old = timekeeper.clock;
|
||||
timekeeper_setup_internals(new);
|
||||
tk_setup_internals(&timekeeper, new);
|
||||
if (old->disable)
|
||||
old->disable(old);
|
||||
}
|
||||
timekeeping_update(true);
|
||||
timekeeping_update(&timekeeper, true);
|
||||
|
||||
write_sequnlock_irqrestore(&timekeeper.lock, flags);
|
||||
|
||||
@@ -497,7 +541,7 @@ void getrawmonotonic(struct timespec *ts)
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&timekeeper.lock);
|
||||
nsecs = timekeeping_get_ns_raw();
|
||||
nsecs = timekeeping_get_ns_raw(&timekeeper);
|
||||
*ts = timekeeper.raw_time;
|
||||
|
||||
} while (read_seqretry(&timekeeper.lock, seq));
|
||||
@@ -532,6 +576,7 @@ u64 timekeeping_max_deferment(void)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ret;
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&timekeeper.lock);
|
||||
|
||||
@@ -592,18 +637,17 @@ void __init timekeeping_init(void)
|
||||
clock = clocksource_default_clock();
|
||||
if (clock->enable)
|
||||
clock->enable(clock);
|
||||
timekeeper_setup_internals(clock);
|
||||
tk_setup_internals(&timekeeper, clock);
|
||||
|
||||
timekeeper.xtime.tv_sec = now.tv_sec;
|
||||
timekeeper.xtime.tv_nsec = now.tv_nsec;
|
||||
tk_set_xtime(&timekeeper, &now);
|
||||
timekeeper.raw_time.tv_sec = 0;
|
||||
timekeeper.raw_time.tv_nsec = 0;
|
||||
if (boot.tv_sec == 0 && boot.tv_nsec == 0) {
|
||||
boot.tv_sec = timekeeper.xtime.tv_sec;
|
||||
boot.tv_nsec = timekeeper.xtime.tv_nsec;
|
||||
}
|
||||
if (boot.tv_sec == 0 && boot.tv_nsec == 0)
|
||||
boot = tk_xtime(&timekeeper);
|
||||
|
||||
set_normalized_timespec(&timekeeper.wall_to_monotonic,
|
||||
-boot.tv_sec, -boot.tv_nsec);
|
||||
update_rt_offset(&timekeeper);
|
||||
timekeeper.total_sleep_time.tv_sec = 0;
|
||||
timekeeper.total_sleep_time.tv_nsec = 0;
|
||||
write_sequnlock_irqrestore(&timekeeper.lock, flags);
|
||||
@@ -612,6 +656,12 @@ void __init timekeeping_init(void)
|
||||
/* time in seconds when suspend began */
|
||||
static struct timespec timekeeping_suspend_time;
|
||||
|
||||
static void update_sleep_time(struct timespec t)
|
||||
{
|
||||
timekeeper.total_sleep_time = t;
|
||||
timekeeper.offs_boot = timespec_to_ktime(t);
|
||||
}
|
||||
|
||||
/**
|
||||
* __timekeeping_inject_sleeptime - Internal function to add sleep interval
|
||||
* @delta: pointer to a timespec delta value
|
||||
@@ -619,7 +669,8 @@ static struct timespec timekeeping_suspend_time;
|
||||
* Takes a timespec offset measuring a suspend interval and properly
|
||||
* adds the sleep offset to the timekeeping variables.
|
||||
*/
|
||||
static void __timekeeping_inject_sleeptime(struct timespec *delta)
|
||||
static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
|
||||
struct timespec *delta)
|
||||
{
|
||||
if (!timespec_valid(delta)) {
|
||||
printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
|
||||
@@ -627,11 +678,9 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta)
|
||||
return;
|
||||
}
|
||||
|
||||
timekeeper.xtime = timespec_add(timekeeper.xtime, *delta);
|
||||
timekeeper.wall_to_monotonic =
|
||||
timespec_sub(timekeeper.wall_to_monotonic, *delta);
|
||||
timekeeper.total_sleep_time = timespec_add(
|
||||
timekeeper.total_sleep_time, *delta);
|
||||
tk_xtime_add(tk, delta);
|
||||
tk->wall_to_monotonic = timespec_sub(tk->wall_to_monotonic, *delta);
|
||||
update_sleep_time(timespec_add(tk->total_sleep_time, *delta));
|
||||
}
|
||||
|
||||
|
||||
@@ -657,11 +706,11 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
|
||||
|
||||
write_seqlock_irqsave(&timekeeper.lock, flags);
|
||||
|
||||
timekeeping_forward_now();
|
||||
timekeeping_forward_now(&timekeeper);
|
||||
|
||||
__timekeeping_inject_sleeptime(delta);
|
||||
__timekeeping_inject_sleeptime(&timekeeper, delta);
|
||||
|
||||
timekeeping_update(true);
|
||||
timekeeping_update(&timekeeper, true);
|
||||
|
||||
write_sequnlock_irqrestore(&timekeeper.lock, flags);
|
||||
|
||||
@@ -690,12 +739,13 @@ static void timekeeping_resume(void)
|
||||
|
||||
if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
|
||||
ts = timespec_sub(ts, timekeeping_suspend_time);
|
||||
__timekeeping_inject_sleeptime(&ts);
|
||||
__timekeeping_inject_sleeptime(&timekeeper, &ts);
|
||||
}
|
||||
/* re-base the last cycle value */
|
||||
timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
|
||||
timekeeper.ntp_error = 0;
|
||||
timekeeping_suspended = 0;
|
||||
timekeeping_update(&timekeeper, false);
|
||||
write_sequnlock_irqrestore(&timekeeper.lock, flags);
|
||||
|
||||
touch_softlockup_watchdog();
|
||||
@@ -715,7 +765,7 @@ static int timekeeping_suspend(void)
|
||||
read_persistent_clock(&timekeeping_suspend_time);
|
||||
|
||||
write_seqlock_irqsave(&timekeeper.lock, flags);
|
||||
timekeeping_forward_now();
|
||||
timekeeping_forward_now(&timekeeper);
|
||||
timekeeping_suspended = 1;
|
||||
|
||||
/*
|
||||
@@ -724,7 +774,7 @@ static int timekeeping_suspend(void)
|
||||
* try to compensate so the difference in system time
|
||||
* and persistent_clock time stays close to constant.
|
||||
*/
|
||||
delta = timespec_sub(timekeeper.xtime, timekeeping_suspend_time);
|
||||
delta = timespec_sub(tk_xtime(&timekeeper), timekeeping_suspend_time);
|
||||
delta_delta = timespec_sub(delta, old_delta);
|
||||
if (abs(delta_delta.tv_sec) >= 2) {
|
||||
/*
|
||||
@@ -763,7 +813,8 @@ device_initcall(timekeeping_init_ops);
|
||||
* If the error is already larger, we look ahead even further
|
||||
* to compensate for late or lost adjustments.
|
||||
*/
|
||||
static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
|
||||
static __always_inline int timekeeping_bigadjust(struct timekeeper *tk,
|
||||
s64 error, s64 *interval,
|
||||
s64 *offset)
|
||||
{
|
||||
s64 tick_error, i;
|
||||
@@ -779,7 +830,7 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
|
||||
* here. This is tuned so that an error of about 1 msec is adjusted
|
||||
* within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
|
||||
*/
|
||||
error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
|
||||
error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
|
||||
error2 = abs(error2);
|
||||
for (look_ahead = 0; error2 > 0; look_ahead++)
|
||||
error2 >>= 2;
|
||||
@@ -788,8 +839,8 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
|
||||
* Now calculate the error in (1 << look_ahead) ticks, but first
|
||||
* remove the single look ahead already included in the error.
|
||||
*/
|
||||
tick_error = ntp_tick_length() >> (timekeeper.ntp_error_shift + 1);
|
||||
tick_error -= timekeeper.xtime_interval >> 1;
|
||||
tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1);
|
||||
tick_error -= tk->xtime_interval >> 1;
|
||||
error = ((error - tick_error) >> look_ahead) + tick_error;
|
||||
|
||||
/* Finally calculate the adjustment shift value. */
|
||||
@@ -814,9 +865,9 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
|
||||
* this is optimized for the most common adjustments of -1,0,1,
|
||||
* for other values we can do a bit more work.
|
||||
*/
|
||||
static void timekeeping_adjust(s64 offset)
|
||||
static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
|
||||
{
|
||||
s64 error, interval = timekeeper.cycle_interval;
|
||||
s64 error, interval = tk->cycle_interval;
|
||||
int adj;
|
||||
|
||||
/*
|
||||
@@ -832,7 +883,7 @@ static void timekeeping_adjust(s64 offset)
|
||||
*
|
||||
* Note: It does not "save" on aggravation when reading the code.
|
||||
*/
|
||||
error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1);
|
||||
error = tk->ntp_error >> (tk->ntp_error_shift - 1);
|
||||
if (error > interval) {
|
||||
/*
|
||||
* We now divide error by 4(via shift), which checks if
|
||||
@@ -854,7 +905,8 @@ static void timekeeping_adjust(s64 offset)
|
||||
if (likely(error <= interval))
|
||||
adj = 1;
|
||||
else
|
||||
adj = timekeeping_bigadjust(error, &interval, &offset);
|
||||
adj = timekeeping_bigadjust(tk, error, &interval,
|
||||
&offset);
|
||||
} else if (error < -interval) {
|
||||
/* See comment above, this is just switched for the negative */
|
||||
error >>= 2;
|
||||
@@ -863,18 +915,17 @@ static void timekeeping_adjust(s64 offset)
|
||||
interval = -interval;
|
||||
offset = -offset;
|
||||
} else
|
||||
adj = timekeeping_bigadjust(error, &interval, &offset);
|
||||
} else /* No adjustment needed */
|
||||
adj = timekeeping_bigadjust(tk, error, &interval,
|
||||
&offset);
|
||||
} else
|
||||
return;
|
||||
|
||||
if (unlikely(timekeeper.clock->maxadj &&
|
||||
(timekeeper.mult + adj >
|
||||
timekeeper.clock->mult + timekeeper.clock->maxadj))) {
|
||||
if (unlikely(tk->clock->maxadj &&
|
||||
(tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
|
||||
printk_once(KERN_WARNING
|
||||
"Adjusting %s more than 11%% (%ld vs %ld)\n",
|
||||
timekeeper.clock->name, (long)timekeeper.mult + adj,
|
||||
(long)timekeeper.clock->mult +
|
||||
timekeeper.clock->maxadj);
|
||||
tk->clock->name, (long)tk->mult + adj,
|
||||
(long)tk->clock->mult + tk->clock->maxadj);
|
||||
}
|
||||
/*
|
||||
* So the following can be confusing.
|
||||
@@ -925,11 +976,60 @@ static void timekeeping_adjust(s64 offset)
|
||||
*
|
||||
* XXX - TODO: Doc ntp_error calculation.
|
||||
*/
|
||||
timekeeper.mult += adj;
|
||||
timekeeper.xtime_interval += interval;
|
||||
timekeeper.xtime_nsec -= offset;
|
||||
timekeeper.ntp_error -= (interval - offset) <<
|
||||
timekeeper.ntp_error_shift;
|
||||
tk->mult += adj;
|
||||
tk->xtime_interval += interval;
|
||||
tk->xtime_nsec -= offset;
|
||||
tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
|
||||
|
||||
/*
|
||||
* It may be possible that when we entered this function, xtime_nsec
|
||||
* was very small. Further, if we're slightly speeding the clocksource
|
||||
* in the code above, its possible the required corrective factor to
|
||||
* xtime_nsec could cause it to underflow.
|
||||
*
|
||||
* Now, since we already accumulated the second, cannot simply roll
|
||||
* the accumulated second back, since the NTP subsystem has been
|
||||
* notified via second_overflow. So instead we push xtime_nsec forward
|
||||
* by the amount we underflowed, and add that amount into the error.
|
||||
*
|
||||
* We'll correct this error next time through this function, when
|
||||
* xtime_nsec is not as small.
|
||||
*/
|
||||
if (unlikely((s64)tk->xtime_nsec < 0)) {
|
||||
s64 neg = -(s64)tk->xtime_nsec;
|
||||
tk->xtime_nsec = 0;
|
||||
tk->ntp_error += neg << tk->ntp_error_shift;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* accumulate_nsecs_to_secs - Accumulates nsecs into secs
|
||||
*
|
||||
* Helper function that accumulates a the nsecs greater then a second
|
||||
* from the xtime_nsec field to the xtime_secs field.
|
||||
* It also calls into the NTP code to handle leapsecond processing.
|
||||
*
|
||||
*/
|
||||
static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
|
||||
{
|
||||
u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
|
||||
|
||||
while (tk->xtime_nsec >= nsecps) {
|
||||
int leap;
|
||||
|
||||
tk->xtime_nsec -= nsecps;
|
||||
tk->xtime_sec++;
|
||||
|
||||
/* Figure out if its a leap sec and apply if needed */
|
||||
leap = second_overflow(tk->xtime_sec);
|
||||
tk->xtime_sec += leap;
|
||||
tk->wall_to_monotonic.tv_sec -= leap;
|
||||
if (leap)
|
||||
clock_was_set_delayed();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -942,44 +1042,36 @@ static void timekeeping_adjust(s64 offset)
|
||||
*
|
||||
* Returns the unconsumed cycles.
|
||||
*/
|
||||
static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
|
||||
static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
|
||||
u32 shift)
|
||||
{
|
||||
u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
|
||||
u64 raw_nsecs;
|
||||
|
||||
/* If the offset is smaller than a shifted interval, do nothing */
|
||||
if (offset < timekeeper.cycle_interval<<shift)
|
||||
/* If the offset is smaller then a shifted interval, do nothing */
|
||||
if (offset < tk->cycle_interval<<shift)
|
||||
return offset;
|
||||
|
||||
/* Accumulate one shifted interval */
|
||||
offset -= timekeeper.cycle_interval << shift;
|
||||
timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift;
|
||||
offset -= tk->cycle_interval << shift;
|
||||
tk->clock->cycle_last += tk->cycle_interval << shift;
|
||||
|
||||
timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
|
||||
while (timekeeper.xtime_nsec >= nsecps) {
|
||||
int leap;
|
||||
timekeeper.xtime_nsec -= nsecps;
|
||||
timekeeper.xtime.tv_sec++;
|
||||
leap = second_overflow(timekeeper.xtime.tv_sec);
|
||||
timekeeper.xtime.tv_sec += leap;
|
||||
timekeeper.wall_to_monotonic.tv_sec -= leap;
|
||||
}
|
||||
tk->xtime_nsec += tk->xtime_interval << shift;
|
||||
accumulate_nsecs_to_secs(tk);
|
||||
|
||||
/* Accumulate raw time */
|
||||
raw_nsecs = timekeeper.raw_interval << shift;
|
||||
raw_nsecs += timekeeper.raw_time.tv_nsec;
|
||||
raw_nsecs = tk->raw_interval << shift;
|
||||
raw_nsecs += tk->raw_time.tv_nsec;
|
||||
if (raw_nsecs >= NSEC_PER_SEC) {
|
||||
u64 raw_secs = raw_nsecs;
|
||||
raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
|
||||
timekeeper.raw_time.tv_sec += raw_secs;
|
||||
tk->raw_time.tv_sec += raw_secs;
|
||||
}
|
||||
timekeeper.raw_time.tv_nsec = raw_nsecs;
|
||||
tk->raw_time.tv_nsec = raw_nsecs;
|
||||
|
||||
/* Accumulate error between NTP and clock interval */
|
||||
timekeeper.ntp_error += ntp_tick_length() << shift;
|
||||
timekeeper.ntp_error -=
|
||||
(timekeeper.xtime_interval + timekeeper.xtime_remainder) <<
|
||||
(timekeeper.ntp_error_shift + shift);
|
||||
tk->ntp_error += ntp_tick_length() << shift;
|
||||
tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
|
||||
(tk->ntp_error_shift + shift);
|
||||
|
||||
return offset;
|
||||
}
|
||||
@@ -995,6 +1087,7 @@ static void update_wall_time(void)
|
||||
cycle_t offset;
|
||||
int shift = 0, maxshift;
|
||||
unsigned long flags;
|
||||
s64 remainder;
|
||||
|
||||
write_seqlock_irqsave(&timekeeper.lock, flags);
|
||||
|
||||
@@ -1009,8 +1102,6 @@ static void update_wall_time(void)
|
||||
#else
|
||||
offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
|
||||
#endif
|
||||
timekeeper.xtime_nsec = (s64)timekeeper.xtime.tv_nsec <<
|
||||
timekeeper.shift;
|
||||
|
||||
/*
|
||||
* With NO_HZ we may have to accumulate many cycle_intervals
|
||||
@@ -1026,62 +1117,36 @@ static void update_wall_time(void)
|
||||
maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
|
||||
shift = min(shift, maxshift);
|
||||
while (offset >= timekeeper.cycle_interval) {
|
||||
offset = logarithmic_accumulation(offset, shift);
|
||||
offset = logarithmic_accumulation(&timekeeper, offset, shift);
|
||||
if(offset < timekeeper.cycle_interval<<shift)
|
||||
shift--;
|
||||
}
|
||||
|
||||
/* correct the clock when NTP error is too big */
|
||||
timekeeping_adjust(offset);
|
||||
|
||||
/*
|
||||
* Since in the loop above, we accumulate any amount of time
|
||||
* in xtime_nsec over a second into xtime.tv_sec, its possible for
|
||||
* xtime_nsec to be fairly small after the loop. Further, if we're
|
||||
* slightly speeding the clocksource up in timekeeping_adjust(),
|
||||
* its possible the required corrective factor to xtime_nsec could
|
||||
* cause it to underflow.
|
||||
*
|
||||
* Now, we cannot simply roll the accumulated second back, since
|
||||
* the NTP subsystem has been notified via second_overflow. So
|
||||
* instead we push xtime_nsec forward by the amount we underflowed,
|
||||
* and add that amount into the error.
|
||||
*
|
||||
* We'll correct this error next time through this function, when
|
||||
* xtime_nsec is not as small.
|
||||
*/
|
||||
if (unlikely((s64)timekeeper.xtime_nsec < 0)) {
|
||||
s64 neg = -(s64)timekeeper.xtime_nsec;
|
||||
timekeeper.xtime_nsec = 0;
|
||||
timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
|
||||
}
|
||||
timekeeping_adjust(&timekeeper, offset);
|
||||
|
||||
|
||||
/*
|
||||
* Store full nanoseconds into xtime after rounding it up and
|
||||
* add the remainder to the error difference.
|
||||
*/
|
||||
timekeeper.xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >>
|
||||
timekeeper.shift) + 1;
|
||||
timekeeper.xtime_nsec -= (s64)timekeeper.xtime.tv_nsec <<
|
||||
timekeeper.shift;
|
||||
timekeeper.ntp_error += timekeeper.xtime_nsec <<
|
||||
timekeeper.ntp_error_shift;
|
||||
* Store only full nanoseconds into xtime_nsec after rounding
|
||||
* it up and add the remainder to the error difference.
|
||||
* XXX - This is necessary to avoid small 1ns inconsistnecies caused
|
||||
* by truncating the remainder in vsyscalls. However, it causes
|
||||
* additional work to be done in timekeeping_adjust(). Once
|
||||
* the vsyscall implementations are converted to use xtime_nsec
|
||||
* (shifted nanoseconds), this can be killed.
|
||||
*/
|
||||
remainder = timekeeper.xtime_nsec & ((1 << timekeeper.shift) - 1);
|
||||
timekeeper.xtime_nsec -= remainder;
|
||||
timekeeper.xtime_nsec += 1 << timekeeper.shift;
|
||||
timekeeper.ntp_error += remainder << timekeeper.ntp_error_shift;
|
||||
|
||||
/*
|
||||
* Finally, make sure that after the rounding
|
||||
* xtime.tv_nsec isn't larger than NSEC_PER_SEC
|
||||
* xtime_nsec isn't larger than NSEC_PER_SEC
|
||||
*/
|
||||
if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
|
||||
int leap;
|
||||
timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
|
||||
timekeeper.xtime.tv_sec++;
|
||||
leap = second_overflow(timekeeper.xtime.tv_sec);
|
||||
timekeeper.xtime.tv_sec += leap;
|
||||
timekeeper.wall_to_monotonic.tv_sec -= leap;
|
||||
}
|
||||
accumulate_nsecs_to_secs(&timekeeper);
|
||||
|
||||
timekeeping_update(false);
|
||||
timekeeping_update(&timekeeper, false);
|
||||
|
||||
out:
|
||||
write_sequnlock_irqrestore(&timekeeper.lock, flags);
|
||||
@@ -1126,21 +1191,20 @@ void get_monotonic_boottime(struct timespec *ts)
|
||||
{
|
||||
struct timespec tomono, sleep;
|
||||
unsigned int seq;
|
||||
s64 nsecs;
|
||||
|
||||
WARN_ON(timekeeping_suspended);
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&timekeeper.lock);
|
||||
*ts = timekeeper.xtime;
|
||||
ts->tv_sec = timekeeper.xtime_sec;
|
||||
ts->tv_nsec = timekeeping_get_ns(&timekeeper);
|
||||
tomono = timekeeper.wall_to_monotonic;
|
||||
sleep = timekeeper.total_sleep_time;
|
||||
nsecs = timekeeping_get_ns();
|
||||
|
||||
} while (read_seqretry(&timekeeper.lock, seq));
|
||||
|
||||
set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
|
||||
ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs);
|
||||
ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_monotonic_boottime);
|
||||
|
||||
@@ -1173,13 +1237,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
|
||||
|
||||
unsigned long get_seconds(void)
|
||||
{
|
||||
return timekeeper.xtime.tv_sec;
|
||||
return timekeeper.xtime_sec;
|
||||
}
|
||||
EXPORT_SYMBOL(get_seconds);
|
||||
|
||||
struct timespec __current_kernel_time(void)
|
||||
{
|
||||
return timekeeper.xtime;
|
||||
return tk_xtime(&timekeeper);
|
||||
}
|
||||
|
||||
struct timespec current_kernel_time(void)
|
||||
@@ -1190,7 +1254,7 @@ struct timespec current_kernel_time(void)
|
||||
do {
|
||||
seq = read_seqbegin(&timekeeper.lock);
|
||||
|
||||
now = timekeeper.xtime;
|
||||
now = tk_xtime(&timekeeper);
|
||||
} while (read_seqretry(&timekeeper.lock, seq));
|
||||
|
||||
return now;
|
||||
@@ -1205,7 +1269,7 @@ struct timespec get_monotonic_coarse(void)
|
||||
do {
|
||||
seq = read_seqbegin(&timekeeper.lock);
|
||||
|
||||
now = timekeeper.xtime;
|
||||
now = tk_xtime(&timekeeper);
|
||||
mono = timekeeper.wall_to_monotonic;
|
||||
} while (read_seqretry(&timekeeper.lock, seq));
|
||||
|
||||
@@ -1240,12 +1304,43 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&timekeeper.lock);
|
||||
*xtim = timekeeper.xtime;
|
||||
*xtim = tk_xtime(&timekeeper);
|
||||
*wtom = timekeeper.wall_to_monotonic;
|
||||
*sleep = timekeeper.total_sleep_time;
|
||||
} while (read_seqretry(&timekeeper.lock, seq));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
/**
|
||||
* ktime_get_update_offsets - hrtimer helper
|
||||
* @offs_real: pointer to storage for monotonic -> realtime offset
|
||||
* @offs_boot: pointer to storage for monotonic -> boottime offset
|
||||
*
|
||||
* Returns current monotonic time and updates the offsets
|
||||
* Called from hrtimer_interupt() or retrigger_next_event()
|
||||
*/
|
||||
ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
|
||||
{
|
||||
ktime_t now;
|
||||
unsigned int seq;
|
||||
u64 secs, nsecs;
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&timekeeper.lock);
|
||||
|
||||
secs = timekeeper.xtime_sec;
|
||||
nsecs = timekeeping_get_ns(&timekeeper);
|
||||
|
||||
*offs_real = timekeeper.offs_real;
|
||||
*offs_boot = timekeeper.offs_boot;
|
||||
} while (read_seqretry(&timekeeper.lock, seq));
|
||||
|
||||
now = ktime_add_ns(ktime_set(secs, 0), nsecs);
|
||||
now = ktime_sub(now, *offs_real);
|
||||
return now;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
|
||||
*/
|
||||
|
@@ -167,7 +167,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
|
||||
{
|
||||
struct tick_sched *ts = tick_get_tick_sched(cpu);
|
||||
P(nohz_mode);
|
||||
P_ns(idle_tick);
|
||||
P_ns(last_tick);
|
||||
P(tick_stopped);
|
||||
P(idle_jiffies);
|
||||
P(idle_calls);
|
||||
@@ -259,7 +259,7 @@ static int timer_list_show(struct seq_file *m, void *v)
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
int cpu;
|
||||
|
||||
SEQ_printf(m, "Timer List Version: v0.6\n");
|
||||
SEQ_printf(m, "Timer List Version: v0.7\n");
|
||||
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
|
||||
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
|
||||
|
||||
|
110
kernel/timer.c
110
kernel/timer.c
@@ -77,6 +77,7 @@ struct tvec_base {
|
||||
struct timer_list *running_timer;
|
||||
unsigned long timer_jiffies;
|
||||
unsigned long next_timer;
|
||||
unsigned long active_timers;
|
||||
struct tvec_root tv1;
|
||||
struct tvec tv2;
|
||||
struct tvec tv3;
|
||||
@@ -330,7 +331,8 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(set_timer_slack);
|
||||
|
||||
static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
|
||||
static void
|
||||
__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
|
||||
{
|
||||
unsigned long expires = timer->expires;
|
||||
unsigned long idx = expires - base->timer_jiffies;
|
||||
@@ -372,6 +374,19 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
|
||||
list_add_tail(&timer->entry, vec);
|
||||
}
|
||||
|
||||
static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
|
||||
{
|
||||
__internal_add_timer(base, timer);
|
||||
/*
|
||||
* Update base->active_timers and base->next_timer
|
||||
*/
|
||||
if (!tbase_get_deferrable(timer->base)) {
|
||||
if (time_before(timer->expires, base->next_timer))
|
||||
base->next_timer = timer->expires;
|
||||
base->active_timers++;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TIMER_STATS
|
||||
void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
|
||||
{
|
||||
@@ -654,8 +669,7 @@ void init_timer_deferrable_key(struct timer_list *timer,
|
||||
}
|
||||
EXPORT_SYMBOL(init_timer_deferrable_key);
|
||||
|
||||
static inline void detach_timer(struct timer_list *timer,
|
||||
int clear_pending)
|
||||
static inline void detach_timer(struct timer_list *timer, bool clear_pending)
|
||||
{
|
||||
struct list_head *entry = &timer->entry;
|
||||
|
||||
@@ -667,6 +681,29 @@ static inline void detach_timer(struct timer_list *timer,
|
||||
entry->prev = LIST_POISON2;
|
||||
}
|
||||
|
||||
static inline void
|
||||
detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
|
||||
{
|
||||
detach_timer(timer, true);
|
||||
if (!tbase_get_deferrable(timer->base))
|
||||
timer->base->active_timers--;
|
||||
}
|
||||
|
||||
static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
|
||||
bool clear_pending)
|
||||
{
|
||||
if (!timer_pending(timer))
|
||||
return 0;
|
||||
|
||||
detach_timer(timer, clear_pending);
|
||||
if (!tbase_get_deferrable(timer->base)) {
|
||||
timer->base->active_timers--;
|
||||
if (timer->expires == base->next_timer)
|
||||
base->next_timer = base->timer_jiffies;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* We are using hashed locking: holding per_cpu(tvec_bases).lock
|
||||
* means that all timers which are tied to this base via timer->base are
|
||||
@@ -712,16 +749,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
|
||||
|
||||
base = lock_timer_base(timer, &flags);
|
||||
|
||||
if (timer_pending(timer)) {
|
||||
detach_timer(timer, 0);
|
||||
if (timer->expires == base->next_timer &&
|
||||
!tbase_get_deferrable(timer->base))
|
||||
base->next_timer = base->timer_jiffies;
|
||||
ret = 1;
|
||||
} else {
|
||||
if (pending_only)
|
||||
goto out_unlock;
|
||||
}
|
||||
ret = detach_if_pending(timer, base, false);
|
||||
if (!ret && pending_only)
|
||||
goto out_unlock;
|
||||
|
||||
debug_activate(timer, expires);
|
||||
|
||||
@@ -752,9 +782,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
|
||||
}
|
||||
|
||||
timer->expires = expires;
|
||||
if (time_before(timer->expires, base->next_timer) &&
|
||||
!tbase_get_deferrable(timer->base))
|
||||
base->next_timer = timer->expires;
|
||||
internal_add_timer(base, timer);
|
||||
|
||||
out_unlock:
|
||||
@@ -920,9 +947,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
|
||||
spin_lock_irqsave(&base->lock, flags);
|
||||
timer_set_base(timer, base);
|
||||
debug_activate(timer, timer->expires);
|
||||
if (time_before(timer->expires, base->next_timer) &&
|
||||
!tbase_get_deferrable(timer->base))
|
||||
base->next_timer = timer->expires;
|
||||
internal_add_timer(base, timer);
|
||||
/*
|
||||
* Check whether the other CPU is idle and needs to be
|
||||
@@ -959,13 +983,7 @@ int del_timer(struct timer_list *timer)
|
||||
timer_stats_timer_clear_start_info(timer);
|
||||
if (timer_pending(timer)) {
|
||||
base = lock_timer_base(timer, &flags);
|
||||
if (timer_pending(timer)) {
|
||||
detach_timer(timer, 1);
|
||||
if (timer->expires == base->next_timer &&
|
||||
!tbase_get_deferrable(timer->base))
|
||||
base->next_timer = base->timer_jiffies;
|
||||
ret = 1;
|
||||
}
|
||||
ret = detach_if_pending(timer, base, true);
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
}
|
||||
|
||||
@@ -990,19 +1008,10 @@ int try_to_del_timer_sync(struct timer_list *timer)
|
||||
|
||||
base = lock_timer_base(timer, &flags);
|
||||
|
||||
if (base->running_timer == timer)
|
||||
goto out;
|
||||
|
||||
timer_stats_timer_clear_start_info(timer);
|
||||
ret = 0;
|
||||
if (timer_pending(timer)) {
|
||||
detach_timer(timer, 1);
|
||||
if (timer->expires == base->next_timer &&
|
||||
!tbase_get_deferrable(timer->base))
|
||||
base->next_timer = base->timer_jiffies;
|
||||
ret = 1;
|
||||
if (base->running_timer != timer) {
|
||||
timer_stats_timer_clear_start_info(timer);
|
||||
ret = detach_if_pending(timer, base, true);
|
||||
}
|
||||
out:
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
|
||||
return ret;
|
||||
@@ -1089,7 +1098,8 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
|
||||
*/
|
||||
list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
|
||||
BUG_ON(tbase_get_base(timer->base) != base);
|
||||
internal_add_timer(base, timer);
|
||||
/* No accounting, while moving them */
|
||||
__internal_add_timer(base, timer);
|
||||
}
|
||||
|
||||
return index;
|
||||
@@ -1178,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base)
|
||||
timer_stats_account_timer(timer);
|
||||
|
||||
base->running_timer = timer;
|
||||
detach_timer(timer, 1);
|
||||
detach_expired_timer(timer, base);
|
||||
|
||||
spin_unlock_irq(&base->lock);
|
||||
call_timer_fn(timer, fn, data);
|
||||
@@ -1316,18 +1326,21 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
|
||||
unsigned long get_next_timer_interrupt(unsigned long now)
|
||||
{
|
||||
struct tvec_base *base = __this_cpu_read(tvec_bases);
|
||||
unsigned long expires;
|
||||
unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
|
||||
|
||||
/*
|
||||
* Pretend that there is no timer pending if the cpu is offline.
|
||||
* Possible pending timers will be migrated later to an active cpu.
|
||||
*/
|
||||
if (cpu_is_offline(smp_processor_id()))
|
||||
return now + NEXT_TIMER_MAX_DELTA;
|
||||
return expires;
|
||||
|
||||
spin_lock(&base->lock);
|
||||
if (time_before_eq(base->next_timer, base->timer_jiffies))
|
||||
base->next_timer = __next_timer_interrupt(base);
|
||||
expires = base->next_timer;
|
||||
if (base->active_timers) {
|
||||
if (time_before_eq(base->next_timer, base->timer_jiffies))
|
||||
base->next_timer = __next_timer_interrupt(base);
|
||||
expires = base->next_timer;
|
||||
}
|
||||
spin_unlock(&base->lock);
|
||||
|
||||
if (time_before_eq(expires, now))
|
||||
@@ -1704,6 +1717,7 @@ static int __cpuinit init_timers_cpu(int cpu)
|
||||
|
||||
base->timer_jiffies = jiffies;
|
||||
base->next_timer = base->timer_jiffies;
|
||||
base->active_timers = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1714,11 +1728,9 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
|
||||
|
||||
while (!list_empty(head)) {
|
||||
timer = list_first_entry(head, struct timer_list, entry);
|
||||
detach_timer(timer, 0);
|
||||
/* We ignore the accounting on the dying cpu */
|
||||
detach_timer(timer, false);
|
||||
timer_set_base(timer, new_base);
|
||||
if (time_before(timer->expires, new_base->next_timer) &&
|
||||
!tbase_get_deferrable(timer->base))
|
||||
new_base->next_timer = timer->expires;
|
||||
internal_add_timer(new_base, timer);
|
||||
}
|
||||
}
|
||||
|
@@ -312,7 +312,7 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list,
|
||||
|
||||
static int __register_ftrace_function(struct ftrace_ops *ops)
|
||||
{
|
||||
if (ftrace_disabled)
|
||||
if (unlikely(ftrace_disabled))
|
||||
return -ENODEV;
|
||||
|
||||
if (FTRACE_WARN_ON(ops == &global_ops))
|
||||
@@ -4299,16 +4299,12 @@ int register_ftrace_function(struct ftrace_ops *ops)
|
||||
|
||||
mutex_lock(&ftrace_lock);
|
||||
|
||||
if (unlikely(ftrace_disabled))
|
||||
goto out_unlock;
|
||||
|
||||
ret = __register_ftrace_function(ops);
|
||||
if (!ret)
|
||||
ret = ftrace_startup(ops, 0);
|
||||
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&ftrace_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_ftrace_function);
|
||||
|
@@ -1075,6 +1075,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
|
||||
rb_init_page(bpage->page);
|
||||
|
||||
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
|
||||
INIT_LIST_HEAD(&cpu_buffer->new_pages);
|
||||
|
||||
ret = rb_allocate_pages(cpu_buffer, nr_pages);
|
||||
if (ret < 0)
|
||||
@@ -1346,10 +1347,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
|
||||
* If something was added to this page, it was full
|
||||
* since it is not the tail page. So we deduct the
|
||||
* bytes consumed in ring buffer from here.
|
||||
* No need to update overruns, since this page is
|
||||
* deleted from ring buffer and its entries are
|
||||
* already accounted for.
|
||||
* Increment overrun to account for the lost events.
|
||||
*/
|
||||
local_add(page_entries, &cpu_buffer->overrun);
|
||||
local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
|
||||
}
|
||||
|
||||
@@ -3239,6 +3239,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
|
||||
if (cpu_buffer->commit_page == cpu_buffer->reader_page)
|
||||
goto out;
|
||||
|
||||
/* Don't bother swapping if the ring buffer is empty */
|
||||
if (rb_num_of_entries(cpu_buffer) == 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Reset the reader page to size zero.
|
||||
*/
|
||||
|
@@ -830,6 +830,8 @@ int register_tracer(struct tracer *type)
|
||||
current_trace = saved_tracer;
|
||||
if (ret) {
|
||||
printk(KERN_CONT "FAILED!\n");
|
||||
/* Add the warning after printing 'FAILED' */
|
||||
WARN_ON(1);
|
||||
goto out;
|
||||
}
|
||||
/* Only reset on passing, to avoid touching corrupted buffers */
|
||||
@@ -1708,9 +1710,11 @@ EXPORT_SYMBOL_GPL(trace_vprintk);
|
||||
|
||||
static void trace_iterator_increment(struct trace_iterator *iter)
|
||||
{
|
||||
struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
|
||||
|
||||
iter->idx++;
|
||||
if (iter->buffer_iter[iter->cpu])
|
||||
ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
|
||||
if (buf_iter)
|
||||
ring_buffer_read(buf_iter, NULL);
|
||||
}
|
||||
|
||||
static struct trace_entry *
|
||||
@@ -1718,7 +1722,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
|
||||
unsigned long *lost_events)
|
||||
{
|
||||
struct ring_buffer_event *event;
|
||||
struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
|
||||
struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
|
||||
|
||||
if (buf_iter)
|
||||
event = ring_buffer_iter_peek(buf_iter, ts);
|
||||
@@ -1856,10 +1860,10 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
|
||||
|
||||
tr->data[cpu]->skipped_entries = 0;
|
||||
|
||||
if (!iter->buffer_iter[cpu])
|
||||
buf_iter = trace_buffer_iter(iter, cpu);
|
||||
if (!buf_iter)
|
||||
return;
|
||||
|
||||
buf_iter = iter->buffer_iter[cpu];
|
||||
ring_buffer_iter_reset(buf_iter);
|
||||
|
||||
/*
|
||||
@@ -2205,13 +2209,15 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
|
||||
|
||||
int trace_empty(struct trace_iterator *iter)
|
||||
{
|
||||
struct ring_buffer_iter *buf_iter;
|
||||
int cpu;
|
||||
|
||||
/* If we are looking at one CPU buffer, only check that one */
|
||||
if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
|
||||
cpu = iter->cpu_file;
|
||||
if (iter->buffer_iter[cpu]) {
|
||||
if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
|
||||
buf_iter = trace_buffer_iter(iter, cpu);
|
||||
if (buf_iter) {
|
||||
if (!ring_buffer_iter_empty(buf_iter))
|
||||
return 0;
|
||||
} else {
|
||||
if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
|
||||
@@ -2221,8 +2227,9 @@ int trace_empty(struct trace_iterator *iter)
|
||||
}
|
||||
|
||||
for_each_tracing_cpu(cpu) {
|
||||
if (iter->buffer_iter[cpu]) {
|
||||
if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
|
||||
buf_iter = trace_buffer_iter(iter, cpu);
|
||||
if (buf_iter) {
|
||||
if (!ring_buffer_iter_empty(buf_iter))
|
||||
return 0;
|
||||
} else {
|
||||
if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
|
||||
@@ -2381,6 +2388,11 @@ __tracing_open(struct inode *inode, struct file *file)
|
||||
if (!iter)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
|
||||
GFP_KERNEL);
|
||||
if (!iter->buffer_iter)
|
||||
goto release;
|
||||
|
||||
/*
|
||||
* We make a copy of the current tracer to avoid concurrent
|
||||
* changes on it while we are reading.
|
||||
@@ -2441,6 +2453,8 @@ __tracing_open(struct inode *inode, struct file *file)
|
||||
fail:
|
||||
mutex_unlock(&trace_types_lock);
|
||||
kfree(iter->trace);
|
||||
kfree(iter->buffer_iter);
|
||||
release:
|
||||
seq_release_private(inode, file);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
@@ -2481,6 +2495,7 @@ static int tracing_release(struct inode *inode, struct file *file)
|
||||
mutex_destroy(&iter->mutex);
|
||||
free_cpumask_var(iter->started);
|
||||
kfree(iter->trace);
|
||||
kfree(iter->buffer_iter);
|
||||
seq_release_private(inode, file);
|
||||
return 0;
|
||||
}
|
||||
|
@@ -317,6 +317,14 @@ struct tracer {
|
||||
|
||||
#define TRACE_PIPE_ALL_CPU -1
|
||||
|
||||
static inline struct ring_buffer_iter *
|
||||
trace_buffer_iter(struct trace_iterator *iter, int cpu)
|
||||
{
|
||||
if (iter->buffer_iter && iter->buffer_iter[cpu])
|
||||
return iter->buffer_iter[cpu];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int tracer_init(struct tracer *t, struct trace_array *tr);
|
||||
int tracing_is_enabled(void);
|
||||
void trace_wake_up(void);
|
||||
|
@@ -538,7 +538,7 @@ get_return_for_leaf(struct trace_iterator *iter,
|
||||
next = &data->ret;
|
||||
} else {
|
||||
|
||||
ring_iter = iter->buffer_iter[iter->cpu];
|
||||
ring_iter = trace_buffer_iter(iter, iter->cpu);
|
||||
|
||||
/* First peek to compare current entry and the next one */
|
||||
if (ring_iter)
|
||||
|
@@ -1325,4 +1325,4 @@ __init static int init_events(void)
|
||||
|
||||
return 0;
|
||||
}
|
||||
device_initcall(init_events);
|
||||
early_initcall(init_events);
|
||||
|
1154
kernel/workqueue.c
1154
kernel/workqueue.c
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user